0.0.4 • Published 2 years ago

nv-lex-ws v0.0.4

Weekly downloads
-
License
ISC
Repository
-
Last release
2 years ago

nv-lex-ws

  • nv-lex-ws
  • simple tool to find all WHITE-SPACE of a string

install

  • npm install nv-lex-ws

usage

const {parse,split,DFLT_CFG} = require("nv-lex-ws");



    > DFLT_CFG()
    {
      qpairs: [
        [ "'", "'" ],       [ "'''", "'''" ],
        [ '"', '"' ],       [ '"""', '"""' ],
        [ '`', '`' ],       [ '%%', '%%' ],
        [ '###', '###' ],   [ '//', '\n' ],
        [ '//', '�' ],      [ '//', '�' ],
        [ '/*', '*/' ],     [ '<', '>' ],
        [ '<!--', '-->' ],  [ '{-', '-}' ],
        [ '(*', '*)' ],     [ '<%--', '--%>' ],
        [ '%(', '%)' ],     [ '#|', '|#' ],
        [ '#=', '=#' ],     [ '#if', '#endif' ],
        [ '--[[', '--]]' ],
        //--------------------------------------------->the three only used in chinese text
        [ '“', '”' ],
        [ '‘', '’' ],       [ '·', '·' ]
      ],                                         // combos can be treated AS quotes,    define/change it when necessary
                                                 // the default includes quotes used in python-AND-js AND
                                                 //       most  comment-quotes in morden language     AND
                                                 //       three-pairs used in chinese-text            AND
                                                 //           coz i need to parse many chinese-resouce
                                                 //           so i add them

      NL: [ '\n', '\r', '�', '�', '\r\n' ],               
                                                 //new-line-keywords, define/change it when necessary
      NONL: [                                    
        '\t', '\x0B', '\f', ' ',
        ' ',  ' ',    ' ',  ' ',
        ' ',  ' ',    ' ',  ' ',
        ' ',  ' ',    ' ',  ' ',
        ' ',  ' ',    ' ',  ' ',
        ''
      ],                                       //non-newline ,define/change it when necessary,

      indent_nonl: false,                      // USED for language such as python, which use INDENT for block
      max_size: 10000,                         //its the internal-parse-tree max-size, increase it if your src-chunk is large
                                               //keep your chunk small, performance will be good

      terminator: '‌',                    //internal using ,its a invisible character, normally USELESS

      esc: '\\'                                //IF you want to use other character(must be ONE char) as escape
                                               // change this, in MOST morden language, NO need to change it
    }
    >

example

    var chunk = `
    a"bcd" %%ef%% """gh"""i
        let a = 999;
    /*
    jk
    lm
    */  #if nopq #endif  <!--rrrsssttt-->
        function tst(a,b) {
            r = a+b;
            return(r)
        }
    ##`

split

    > split(chunk)
    {
      rslt: [
        '\n',             'a',                '"bcd"',
        ' ',              '%%ef%%',           ' ',
        '"""gh"""',       'i',                '\n',
        '    ',           'let',              ' ',
        'a',              ' ',                '=',
        ' ',              '999;',             '\n',
        '/*\njk\nlm\n*/', '  ',               '#if nopq #endif',
        '  ',             '<!--rrrsssttt-->', '\n',
        '    ',           'function',         ' ',
        'tst(a,b)',       ' ',                '{',
        '\n',             '        ',         'r',
        ' ',              '=',                ' ',
        'a+b;',           '\n',               '        ',
        'return(r)',      '\n',               '    ',
        '}',              '\n',               ''
      ],
      lefted: [ [], '##' ]
    }
    >

parse

    > parse(chunk)
    {
      toks: [
        NlTok { type: 103, count: 1 },
        RawTok { type: 10, raw: 'a' },
        QuotedTok { type: 100, lq: '"', rq: '"', q: '"', str: 'bcd' },
        NoNlTok { type: 102, count: 1 },
        QuotedTok { type: 100, lq: '%%', rq: '%%', q: '%%', str: 'ef' },
        NoNlTok { type: 102, count: 1 },
        QuotedTok { type: 100, lq: '"""', rq: '"""', q: '"""', str: 'gh' },
        RawTok { type: 10, raw: 'i' },
        NlTok { type: 103, count: 1 },
        NoNlTok { type: 102, count: 4 },
        RawTok { type: 10, raw: 'let' },
        NoNlTok { type: 102, count: 1 },
        RawTok { type: 10, raw: 'a' },
        NoNlTok { type: 102, count: 1 },
        RawTok { type: 10, raw: '=' },
        NoNlTok { type: 102, count: 1 },
        RawTok { type: 10, raw: '999;' },
        NlTok { type: 103, count: 1 },
        QuotedTok {
          type: 100,
          lq: '/*',
          rq: '*/',
          q: '/*',
          str: '\njk\nlm\n'
        },
        NoNlTok { type: 102, count: 2 },
        QuotedTok {
          type: 100,
          lq: '#if',
          rq: '#endif',
          q: '#if',
          str: ' nopq '
        },
        NoNlTok { type: 102, count: 2 },
        QuotedTok {
          type: 100,
          lq: '<!--',
          rq: '-->',
          q: '<!--',
          str: 'rrrsssttt'
        },
        NlTok { type: 103, count: 1 },
        NoNlTok { type: 102, count: 4 },
        RawTok { type: 10, raw: 'function' },
        NoNlTok { type: 102, count: 1 },
        RawTok { type: 10, raw: 'tst(a,b)' },
        NoNlTok { type: 102, count: 1 },
        RawTok { type: 10, raw: '{' },
        NlTok { type: 103, count: 1 },
        NoNlTok { type: 102, count: 8 },
        RawTok { type: 10, raw: 'r' },
        NoNlTok { type: 102, count: 1 },
        RawTok { type: 10, raw: '=' },
        NoNlTok { type: 102, count: 1 },
        RawTok { type: 10, raw: 'a+b;' },
        NlTok { type: 103, count: 1 },
        NoNlTok { type: 102, count: 8 },
        RawTok { type: 10, raw: 'return(r)' },
        NlTok { type: 103, count: 1 },
        NoNlTok { type: 102, count: 4 },
        RawTok { type: 10, raw: '}' },
        NlTok { type: 103, count: 1 },
        RawTok { type: 10, raw: '' }
      ],
      lefted: [ [], '##' ]
    }
    >

API

  • DFLT_CFG
  • split(chunk,CFG_DICT)
  • parse(chunk,CFG_DICT)

LICENSE

  • ISC