1.4.0 • Published 2 years ago
nlcst-parse-english v1.4.0
nlcst-parse-english
Parse english with Part-of-speech(POS) tagged and output NLCST.
Install
Install with npm:
npm install nlcst-parse-english
Part-of-speech(POS)
WordNode
has { data: { pos: Annotation } }
.
This library use parse-english + FinNLP/en-pos.
Annotation | Name | Example |
---|---|---|
NN | Noun | dog man |
NNS | Plural noun | dogs men |
NNP | Proper noun | London Alex |
NNPS | Plural proper noun | Smiths |
VB | Base form verb | be |
VBP | Present form verb | throw |
VBZ | Present form (3rd person) | throws |
VBG | Gerund form verb | throwing |
VBD | Past tense verb | threw |
VBN | Past participle verb | thrown |
MD | Modal verb | can shall will may must ought |
JJ | Adjective | big fast |
JJR | Comparative adjective | bigger |
JJS | Superlative adjective | biggest |
RB | Adverb | not quickly closely |
RBR | Comparative adverb | less-closely faster |
RBS | Superlative adverb | fastest |
DT | Determiner | the a some both |
PDT | Predeterminer | all quite |
PRP | Personal Pronoun | I you he she |
PRP$ | Possessive Pronoun | I you he she |
POS | Possessive ending | 's |
IN | Preposition | of by in |
PR | Particle | up off |
TO | to | to |
WDT | Wh-determiner | which that whatever whichever |
WP | Wh-pronoun | who whoever whom what |
WP$ | Wh-possessive | whose |
WRB | Wh-adverb | how where |
EX | Expletive there | there |
CC | Coordinating conjugation | & and nor or |
CD | Cardinal Numbers | 1 7 77 one |
LS | List item marker | 1 B C One |
UH | Interjection | ah oh oops |
FW | Foreign Words | viva mon toujours |
, | Comma | , |
: | Mid-sent punct | : ; ... |
. | Sent-final punct. | . ! ? |
( | Left parenthesis | ) } ] |
) | Right parenthesis | ( { [ |
# | Pound sign | # |
$ | Currency symbols | $ € £ ¥ |
SYM | Other symbols | + * / < > |
EM | Emojis & emoticons | :) ❤ |
Usage
const parser = new EnglishParser();
const CST = parser.parse("Mr. Henry Brown: A hapless but friendly City of London worker.");
assert.deepEqual(CST, {
"type": "RootNode",
"children": [
{
"type": "ParagraphNode",
"children": [
{
"type": "SentenceNode",
"children": [
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "Mr",
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 3,
"offset": 2
}
}
},
{
"type": "PunctuationNode",
"value": ".",
"position": {
"start": {
"line": 1,
"column": 3,
"offset": 2
},
"end": {
"line": 1,
"column": 4,
"offset": 3
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 4,
"offset": 3
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 4,
"offset": 3
},
"end": {
"line": 1,
"column": 5,
"offset": 4
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "Henry",
"position": {
"start": {
"line": 1,
"column": 5,
"offset": 4
},
"end": {
"line": 1,
"column": 10,
"offset": 9
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 5,
"offset": 4
},
"end": {
"line": 1,
"column": 10,
"offset": 9
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 10,
"offset": 9
},
"end": {
"line": 1,
"column": 11,
"offset": 10
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "Brown",
"position": {
"start": {
"line": 1,
"column": 11,
"offset": 10
},
"end": {
"line": 1,
"column": 16,
"offset": 15
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 11,
"offset": 10
},
"end": {
"line": 1,
"column": 16,
"offset": 15
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "PunctuationNode",
"value": ":",
"position": {
"start": {
"line": 1,
"column": 16,
"offset": 15
},
"end": {
"line": 1,
"column": 17,
"offset": 16
}
},
"data": {
"pos": ":"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 17,
"offset": 16
},
"end": {
"line": 1,
"column": 18,
"offset": 17
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "A",
"position": {
"start": {
"line": 1,
"column": 18,
"offset": 17
},
"end": {
"line": 1,
"column": 19,
"offset": 18
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 18,
"offset": 17
},
"end": {
"line": 1,
"column": 19,
"offset": 18
}
},
"data": {
"pos": "DT"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 19,
"offset": 18
},
"end": {
"line": 1,
"column": 20,
"offset": 19
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "hapless",
"position": {
"start": {
"line": 1,
"column": 20,
"offset": 19
},
"end": {
"line": 1,
"column": 27,
"offset": 26
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 20,
"offset": 19
},
"end": {
"line": 1,
"column": 27,
"offset": 26
}
},
"data": {
"pos": "JJ"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 27,
"offset": 26
},
"end": {
"line": 1,
"column": 28,
"offset": 27
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "but",
"position": {
"start": {
"line": 1,
"column": 28,
"offset": 27
},
"end": {
"line": 1,
"column": 31,
"offset": 30
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 28,
"offset": 27
},
"end": {
"line": 1,
"column": 31,
"offset": 30
}
},
"data": {
"pos": "CC"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 31,
"offset": 30
},
"end": {
"line": 1,
"column": 32,
"offset": 31
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "friendly",
"position": {
"start": {
"line": 1,
"column": 32,
"offset": 31
},
"end": {
"line": 1,
"column": 40,
"offset": 39
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 32,
"offset": 31
},
"end": {
"line": 1,
"column": 40,
"offset": 39
}
},
"data": {
"pos": "JJ"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 40,
"offset": 39
},
"end": {
"line": 1,
"column": 41,
"offset": 40
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "City",
"position": {
"start": {
"line": 1,
"column": 41,
"offset": 40
},
"end": {
"line": 1,
"column": 45,
"offset": 44
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 41,
"offset": 40
},
"end": {
"line": 1,
"column": 45,
"offset": 44
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 45,
"offset": 44
},
"end": {
"line": 1,
"column": 46,
"offset": 45
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "of",
"position": {
"start": {
"line": 1,
"column": 46,
"offset": 45
},
"end": {
"line": 1,
"column": 48,
"offset": 47
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 46,
"offset": 45
},
"end": {
"line": 1,
"column": 48,
"offset": 47
}
},
"data": {
"pos": "IN"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 48,
"offset": 47
},
"end": {
"line": 1,
"column": 49,
"offset": 48
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "London",
"position": {
"start": {
"line": 1,
"column": 49,
"offset": 48
},
"end": {
"line": 1,
"column": 55,
"offset": 54
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 49,
"offset": 48
},
"end": {
"line": 1,
"column": 55,
"offset": 54
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 55,
"offset": 54
},
"end": {
"line": 1,
"column": 56,
"offset": 55
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "worker",
"position": {
"start": {
"line": 1,
"column": 56,
"offset": 55
},
"end": {
"line": 1,
"column": 62,
"offset": 61
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 56,
"offset": 55
},
"end": {
"line": 1,
"column": 62,
"offset": 61
}
},
"data": {
"pos": "NN"
}
},
{
"type": "PunctuationNode",
"value": ".",
"position": {
"start": {
"line": 1,
"column": 62,
"offset": 61
},
"end": {
"line": 1,
"column": 63,
"offset": 62
}
},
"data": {
"pos": "."
}
}
],
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 63,
"offset": 62
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 63,
"offset": 62
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 63,
"offset": 62
}
}
});
Changelog
See Releases page.
Running tests
Install devDependencies and Run npm test
:
npm i -d && npm test
Contributing
Pull requests and stars are always welcome.
For bugs and feature requests, please create an issue.
- Fork it!
- Create your feature branch:
git checkout -b my-new-feature
- Commit your changes:
git commit -am 'Add some feature'
- Push to the branch:
git push origin my-new-feature
- Submit a pull request :D
Author
License
MIT © azu