1.1.5 • Published 3 years ago

@tinkoff/tokenizer v1.1.5

Weekly downloads
1
License
Apache-2.0
Repository
github
Last release
3 years ago

tokenizer

Library to find urls, mentions, hashtags and more.

installation

npm i @tinkoff/tokenizer

API

type Token = { type: string; value: string };

// to specify custom tokens 
// you can pass optional param with your own RegExp (as string only)
// ⚠️IMPORTANT! Do not use capturing groups. Use non-capturing groups "(?:)" instead
type Params = { [token: string]: string }; // e.g. { hashtag: '#[a-z]+' }

function tokenizer(str: string, tokensParam: Params[]): Token[];

examples

import { tokenizer, print } from '@tinkoff/tokenizer';

const textToParse = `
text containing some links like example.com or 
more strict links with protocol https://google.com;
maybe some unicode written urls like сайт.рф К)
or even a link with hash vk.com/#antient/route
also you can parse custom tokens like {$MAC} and {$CHEESE} 
it's easy to mention @someone and get it parsed as well
at last but not least #hashtags should also work fine.
`;


const result = tokenizer(textToParse, {
    tag: '\\{\\$MAC\\}|\\{\\$CHEESE\\}',
    hashtag: '#[a-z]+',
    user: '[@][a-z]+'
})

console.log(print(result));

outputs:

"
text containing some links like " (text)  "example.com" (domain)  " or 
more strict links with protocol " (text)  "https://google.com" (domain)  ";
maybe some unicode written urls like " (text)  "сайт.рф" (domain)  " К)
or even a link with hash " (text)  "vk.com/#antient/route" (domain)  "
also you can parse custom tokens like " (text)  "{$MAC}" (tag)  " and " (text)  "{$CHEESE}" (tag)  " 
it's easy to mention " (text)  "@someone" (user)  " and get it parsed as well
at last but not least " (text)  "#hashtags" (hashtag)  " should also work fine.
" (text)
1.1.5

3 years ago

1.1.4

3 years ago

1.1.3

4 years ago

1.1.2

4 years ago

1.1.1

4 years ago

1.1.0

4 years ago

1.0.4

4 years ago

1.0.3

4 years ago

1.0.2

4 years ago

1.0.1

4 years ago

1.0.0

4 years ago