1.0.1 • Published 7 years ago
regex-tokenizer v1.0.1
Regex-tokenizer
Tokenize a string according to an array of RegExp and tag objects, and return an in-order array of the tagged tokens.
Tokens are generated in the order of the array. Once a string portion has been tagged, subsequent RegExps will ignore it.
var tokenize = require('regex-tokenizer');
var string = "AAA BBB CCC";
var regexTags = [
{
regex: /AAA/,
tag: "a"
},
{
regex: /BBB/,
tag: 123
},
{
regex: /CCC/,
tag: {name: "c", description: "The letter c"}
}];
console.log(tokenize(string, regexTags));
/**
[ { text: 'AAA', tag: 'a'},
{ text: ' ', tag: -1 },
{ text: 'BBB', tag: 123},
{ text: ' ', tag: -1},
{ text: 'CCC', tag: {name: "c", description: "The letter c"}} ]
*/
Tags may be any type. Unmatched portions of the string are tagged with -1.
example.js
has a more complex example, using regex-tokenizer
to decompose the script of a play.