Strings, Keys and Lex Rules - amark/gun GitHub Wiki

Everything is a string

Every string is an array

tldr;

    = x - exactly === x
    < y - descend from y - will return true if target is less then or equal op['<'] functionally selecting all comparatively bellow/dependent keys
    > z - ascend from z  - will return true if target greater than or equal op functionally descending from series linearly
    * p - starts with this exactly prefix p

Compares Target to op[x]'s Value using op 'x'
ops used are <= / >= / ===

'Za' < 'aa'   //true
'za' > 'aa'   //true
'aaa' > 'aa'  //true
'aa' > 'aa'   //false
'aa' >= 'aa'  //true

How does JS compare them

let s = 's';
console.log(s < 'Z'); // false
console.log(s > 'Z'); // true

console.log(s < 'z'); // true
console.log(s > 'z'); // false

console.log(s < 'az'); // false
console.log(s > 'az'); // 

console.log(s === 's'); // true

console.log(s.localeCompare('Z')); // -1
console.log(s.localeCompare('z')); // -1
console.log(s.localeCompare('az')); // 1
console.log(s.localeCompare('S')); // -1
console.log(s.localeCompare('s')); // 0

let cat = 'cat in UN ğūN';
console.log(cat.search('at')) // 1
console.log(cat.search('og')) // -1
console.log(cat.includes('un')) // false
console.log(cat.toLowerCase().includes('un')) // true
console.log(cat.toLowerCase().includes('gun')) // false
console.log(cat.normalize().toLowerCase().includes('gun')) // false
//Dont rely on conversion of special charicters

In gun they allow lexical matching based on primitive comparisons. This is pretty powerfully but comes with some fine print. Overall remember that it is only a "prefix" match. if the target string extend further then you query it will only be compared till the end. unless they prefix is an exact match. then any extra char will push the target to be greater than your query.

I have attempted to explain the lex function bellow, the docs could guide one through the logic, but still probably need some tuning.

/**
 * Compare string to lex ops
 * niped from  https://github.com/amark/gun/pull/1204
 * @param t - target string
 * @param op - string match or advanced ops
 * @param op.'=' - target === value
 *          NOTE if exact match other ops are ignored
 *          ALSO String comparison appears to be done one char at a time, as far as the shortest string. and extra ie 'abc '< 'abca' but not 'c'
 * @param op.'<' - starting from target matches targets with prefix less than or equal value
 *      @example 'cat' <= 'dd'; 'foo' </= 'dd'
 * @param op.'>' -  match targets greater then op, functionally selecting all comparatively above prefixes
 *      @example 'cat' >/= 'dd'; 'foo' >= 'dd'
 * @param op.'*' - only matches the key start with our prefix.
 *      NOTE if many ops are specified all must match, ie < AND > AND *
 *      @example {'>':'bb2010/00', "<":'cc2020/12', '*': 'bc'}
 *          //matches 'bc2020/00-12' / 'bc2010/00-12'
 *          // fails 'bb*', 'cc*', 'bc2011*' <--(this is the crux of most bugs prob
 *          //We actually would want {'>':'bb2010/00', "<":'cc2029/19', '*': 'bc'}
 *          //this has the alternative but lesser problem of matching a future dates
 * @return {boolean}
 * 
 * 
 * @example {
 * //# with letters
 * 
 *     'ZZ'/ 11 9090
 *     'aa'/ 22 9797
 *     'ab'/ 23 9798
 *     'aab'/ 223 979798 // longer strings are larger than smaller strings.
 *     'aaz'/'226' //...
 *     'zzz'/'666'
 *
 *     lex = 'aa' {'=':'aa'} // aa only
 *     lex = {'<': 'ab'} // aa, ab
 *     lex = {'>': 'aaz'}  // aaz, zzz
 *     lex = {'*': 'aa'}  // aaz, aab
 *     
 *     
 * //# with nums
 * 
 *     '23'
 *     '226'
 *     '24'
 *     '231'
 *
 *     lex = '23' {'=':'23'} // 23 only
 *     lex = {'<': '23'} // 23 224
 *     lex = {'>': '23'}  // 23, 24, 231
 *     lex = {'*': '23'}  // 23, 231
 *     lex = {'>': '23' , '<':231}  // 23, 231 // above 23 and bellow 231
 * }
 */
let lex = (t, op) => {//target, operators

    // = x - exactly === x
    // < y - descend from y - will return true if target is less then or equal op['<'] functionally selecting all comparatively bellow/dependent keys
    // > z - ascend from z  - will return true if target greater than or equal op functionally descending from series linearly
    // * p - starts with this exactly prefix p
    return (
        ('string' === typeof t && (t === op || t === op['='] || t === op['exactly'])) ||
        (op['='] === undefined &&
            (op['>'] === undefined || t >= op['>'] || t >= op['ascend']) &&
            (op['<'] === undefined || t <= op['<'] || t <= op['descend']) &&
            (op['*'] === undefined || t.slice(0, op['*'].length) === op['*']))
    );

}



/**
 *
 * @param t - the target key / string
 * @param op - the lexical operator string is exact match
 * @param op.exact - alias '=' match this key exactly
 *          NOTE if exact match other ops are ignored
 *          String comparison appears to be done one char at a time, as far as the shortest string. and extra ie 'abc '< 'abca' but not 'c'
 * @param op.ascend - alias '>' 'below' 'ascend' - will return true
 *          if target greater than or equal op functionally descending from series linearly
 * @param op.descend - alias '<', 'above' 'descend' - will return true ie zzaa < zzbb but not aaaaa
 *          if target is less than or equal op['<'] functionally selecting all comparatively bellow/dependent keys
 * @param op.prefix - alias '*', does the key start with our prefix.
 * @param op.includes - bonus check if sting contains the substring tag.
 * @param [op.normalize=false] - accepts js
 "NFC"    Canonical Decomposition, followed by Canonical Composition. (default if true)
 "NFD"    Canonical Decomposition.
 "NFKC"    Compatibility Decomposition, followed by Canonical Composition.
 "NFKD"    Compatibility Decomposition.
 
 * @return {boolean}
 */
let lex2 = (t, op, compare) => {//target, operators
    if (typeof t !== 'string') return false;//y u no give key/string
    if (t === op) return true; // early out if op is string
    let exact = op['='] || op['exact'];
    if (typeof exact !== "undefined") return (t === exact);//early out if we are checking exact

    // if (typeof 0 != 'string' || t) ;//y u no query
    let below = op['>'] || op['ascend'] || op['below'];
    let above = op['<'] || op['descend'] || op['above'];
    let prefix = op['*'] || op['prefix'];
    // let search = op['?'] || op['search'];
    // if(op["normalize"]) t = t.normalize(typeof op["normalize"]==='string'?op["normalize"]:undefined);
    // if(op["lower"])t = t.toLowerCase();

    // = x - exactly === x
    // < y - descend from y - will return true if target is less then or equal op['<'] functionally selecting all comparatively bellow/dependent keys
    // > z - ascend from z  - will return true if target greater than or equal op functionally descending from series linearly
    // * p - starts with this exactly prefix p
    return (
        (below === undefined || t >= below || t >= op['ascend']) &&
        (above === undefined || t <= op['<'] || t <= op['descend']) &&
        (prefix === undefined || t.slice(0, prefix.length) === prefix)  // does moving this first cause it to fail faster if prefex?
        // && // what do you think of this
        // (search === undefined || t.search(search) > -1)
    )
}

We need to test that these functions work the same

// prefix@date#tags

let ta = [
    'aaa',
    'aab',
    'aac',
    'aba',
    'abb',
    'abc',
    'aca',
    'acb',
    'acc',
    'baa',
    'bab',
    'bac',
    'bba',
    'bbb',
    'bbc',
    'bca',
    'bcb',
    'bcc',
    'caa',
    'cab',
    'cac',
    'cba',
    'cbb',
    'cbc',
    'cca',
    'ccb',
    'ccc',
    'aa',
    'bb',
    'cc',
    'a',
    'b',
    'c'
]

ta.forEach(t => {
    let op = {'<': 'bb', '>': 'ab'}
    let m = lex(t, op)
    console.assert(m === lex2(op));
    if (m) app.innerHTML += t + '<br>';
})
/*result
aba
abb
abc
aca
acb
acc
baa
bab
bac
bb
b
*
 Looks good to me .
 */

And finally when we do want to look up a block dates by date.getTime()

/**
 * new Date range
 */
/**
 * Get lex query to approximately get msged from a range,
 * May go over the edges but not under.
 * |  |  |  |  |  [ s |  |  |  |e  ]
 * requested s-e returned []
 * the larger the difference the bigger the error
 * @param prefix - static prefix for all strings
 * @param start - start time Date() accepts
 * @param end - end time 
 * @param [onlyPrefix=false] - don't bother getting the extra digit of precision ^ num records/ one less decimal of precision, but possibly faster comparisons
 * @return {{"*"}|{"<": string, ">": string}}
 */
    let buildLexDateRange = (prefix, start, end, onlyPrefix = false) => {
        // precision
        let s = '' + new Date(start).getTime();
        let e = '' + new Date(end).getTime();
        console.log(' ', s,' \n',e);
        
        let ge = '',
            le = '',
            p = prefix;
    
        let stopMatch = 0
        //walk till it wont match
        for (let i = 0; i < s.length; i++) {
            if (stopMatch !== 0) {
                ge += '0';
                le += '9';
                continue;
            }
    
            if (s[i] === e[i]) {
                ge += s[i];
                le += e[i];
                p += s[i];
            } else {
                stopMatch = i;
                // i++;
                ge += s[i];
                le += e[i];
            }
        }
        
        d1 = new Date(parseInt(ge));
        d2 =  new Date(parseInt(le));
        
        console.log('stopMatck: ', stopMatch, " p: ", p, 
            '\n', d1.toLocaleString(),
            '\n',d1.getTime(),
            '\n', d2.getTime(),
            '\n',d2.toLocaleString(),
            '\n delta dates is days: ',
            (d2.getTime()-d1.getTime())/1000/60/60/24 // days
        )
        ge = prefix + ge;
        le = prefix +le;
        return onlyPrefix? {
            // '<': le,
            // '>': ge,
            '*': p, // i think this is faster
            //'hint': 'ommmit the prefex for some savings'
            // or opit < , >  for less precision but earlier out.
        }:{
            '<': le,
            '>': ge,
            // '*': p// just more work
        }
    // 2022/08/12/24:59:59.59 -> 0000/00/00:00:00.0000 to 9999/19/39/29/59:59.9999
    // vs
    // 1664054514645  ->    0000000000000-99999999999
    
    // id:/prefix/123456570/#tag#tag2
    //we want this to work with time
//  1661929200000  
//  1664060278856 debugger eval code:5:17
//  stopMatck:  3  p:  foo166 
//  8/20/2022, 5:53:20 AM 
//  1661000000000 
//  1664999999999 
//  10/5/2022, 12:59:59 PM 
//  delta dates is days:  -46.29629628472222
//  { "*": "foo166" }
    }
    buildLexDateRange('foo', '2022/09/00', Date.now())
⚠️ **GitHub.com Fallback** ⚠️