0.0.4 • Published 2 years ago
nv-lex-ws v0.0.4
nv-lex-ws
- nv-lex-ws
- simple tool to find all WHITE-SPACE of a string
install
- npm install nv-lex-ws
usage
const {parse,split,DFLT_CFG} = require("nv-lex-ws");
> DFLT_CFG()
{
qpairs: [
[ "'", "'" ], [ "'''", "'''" ],
[ '"', '"' ], [ '"""', '"""' ],
[ '`', '`' ], [ '%%', '%%' ],
[ '###', '###' ], [ '//', '\n' ],
[ '//', '�' ], [ '//', '�' ],
[ '/*', '*/' ], [ '<', '>' ],
[ '<!--', '-->' ], [ '{-', '-}' ],
[ '(*', '*)' ], [ '<%--', '--%>' ],
[ '%(', '%)' ], [ '#|', '|#' ],
[ '#=', '=#' ], [ '#if', '#endif' ],
[ '--[[', '--]]' ],
//--------------------------------------------->the three only used in chinese text
[ '“', '”' ],
[ '‘', '’' ], [ '·', '·' ]
], // combos can be treated AS quotes, define/change it when necessary
// the default includes quotes used in python-AND-js AND
// most comment-quotes in morden language AND
// three-pairs used in chinese-text AND
// coz i need to parse many chinese-resouce
// so i add them
NL: [ '\n', '\r', '�', '�', '\r\n' ],
//new-line-keywords, define/change it when necessary
NONL: [
'\t', '\x0B', '\f', ' ',
' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ',
''
], //non-newline ,define/change it when necessary,
indent_nonl: false, // USED for language such as python, which use INDENT for block
max_size: 10000, //its the internal-parse-tree max-size, increase it if your src-chunk is large
//keep your chunk small, performance will be good
terminator: '', //internal using ,its a invisible character, normally USELESS
esc: '\\' //IF you want to use other character(must be ONE char) as escape
// change this, in MOST morden language, NO need to change it
}
>
example
var chunk = `
a"bcd" %%ef%% """gh"""i
let a = 999;
/*
jk
lm
*/ #if nopq #endif <!--rrrsssttt-->
function tst(a,b) {
r = a+b;
return(r)
}
##`
split
> split(chunk)
{
rslt: [
'\n', 'a', '"bcd"',
' ', '%%ef%%', ' ',
'"""gh"""', 'i', '\n',
' ', 'let', ' ',
'a', ' ', '=',
' ', '999;', '\n',
'/*\njk\nlm\n*/', ' ', '#if nopq #endif',
' ', '<!--rrrsssttt-->', '\n',
' ', 'function', ' ',
'tst(a,b)', ' ', '{',
'\n', ' ', 'r',
' ', '=', ' ',
'a+b;', '\n', ' ',
'return(r)', '\n', ' ',
'}', '\n', ''
],
lefted: [ [], '##' ]
}
>
parse
> parse(chunk)
{
toks: [
NlTok { type: 103, count: 1 },
RawTok { type: 10, raw: 'a' },
QuotedTok { type: 100, lq: '"', rq: '"', q: '"', str: 'bcd' },
NoNlTok { type: 102, count: 1 },
QuotedTok { type: 100, lq: '%%', rq: '%%', q: '%%', str: 'ef' },
NoNlTok { type: 102, count: 1 },
QuotedTok { type: 100, lq: '"""', rq: '"""', q: '"""', str: 'gh' },
RawTok { type: 10, raw: 'i' },
NlTok { type: 103, count: 1 },
NoNlTok { type: 102, count: 4 },
RawTok { type: 10, raw: 'let' },
NoNlTok { type: 102, count: 1 },
RawTok { type: 10, raw: 'a' },
NoNlTok { type: 102, count: 1 },
RawTok { type: 10, raw: '=' },
NoNlTok { type: 102, count: 1 },
RawTok { type: 10, raw: '999;' },
NlTok { type: 103, count: 1 },
QuotedTok {
type: 100,
lq: '/*',
rq: '*/',
q: '/*',
str: '\njk\nlm\n'
},
NoNlTok { type: 102, count: 2 },
QuotedTok {
type: 100,
lq: '#if',
rq: '#endif',
q: '#if',
str: ' nopq '
},
NoNlTok { type: 102, count: 2 },
QuotedTok {
type: 100,
lq: '<!--',
rq: '-->',
q: '<!--',
str: 'rrrsssttt'
},
NlTok { type: 103, count: 1 },
NoNlTok { type: 102, count: 4 },
RawTok { type: 10, raw: 'function' },
NoNlTok { type: 102, count: 1 },
RawTok { type: 10, raw: 'tst(a,b)' },
NoNlTok { type: 102, count: 1 },
RawTok { type: 10, raw: '{' },
NlTok { type: 103, count: 1 },
NoNlTok { type: 102, count: 8 },
RawTok { type: 10, raw: 'r' },
NoNlTok { type: 102, count: 1 },
RawTok { type: 10, raw: '=' },
NoNlTok { type: 102, count: 1 },
RawTok { type: 10, raw: 'a+b;' },
NlTok { type: 103, count: 1 },
NoNlTok { type: 102, count: 8 },
RawTok { type: 10, raw: 'return(r)' },
NlTok { type: 103, count: 1 },
NoNlTok { type: 102, count: 4 },
RawTok { type: 10, raw: '}' },
NlTok { type: 103, count: 1 },
RawTok { type: 10, raw: '' }
],
lefted: [ [], '##' ]
}
>
API
- DFLT_CFG
- split(chunk,CFG_DICT)
- parse(chunk,CFG_DICT)
LICENSE
- ISC