1.0.2 • Published 1 year ago

nv-nlp-parse-stanza v1.0.2

Weekly downloads
-
License
ISC
Repository
-
Last release
1 year ago

nv-nlp-parse-stanza

  • nv-nlp-parse-stanza
  • util for parse the rslt returned from stanza server (stanfordnlp)
  • nvlang has a chinese-DSL mode, this is for testing generate js-code from chinese-sentences in nvlang, normally USELESS
  • in the chinese-DSL mode ,customer will input a chinese-sentence, after handled by stanza, stanza will return the constituency(its s-expression)
  • nvlang will based on the shapes to find a matched function-shape

install

  • npm install nv-nlp-parse-stanza

usage

     const {parse} = require("nv-nlp-parse-stanza");

example

        var d =   {
            text: '所以我们自然就想到了 Python 中的异步框架',
            sentiment: null,
            words: [
              {
                id: 1,
                text: '所以',
                upos: 'ADV',
                xpos: 'RB',
                start_char: 0,
                end_char: 2
              },
              {
                id: 2,
                text: '我们',
                upos: 'PRON',
                xpos: 'PRP',
                feats: 'Number=Plur|Person=1',
                start_char: 2,
                end_char: 4
              },
              {
                id: 3,
                text: '自然',
                upos: 'ADV',
                xpos: 'RB',
                start_char: 4,
                end_char: 6
              },
              {
                id: 4,
                text: '就',
                upos: 'ADV',
                xpos: 'RB',
                start_char: 6,
                end_char: 7
              },
              {
                id: 5,
                text: '想到',
                upos: 'VERB',
                xpos: 'VV',
                start_char: 7,
                end_char: 9
              },
              {
                id: 6,
                text: '了',
                upos: 'AUX',
                xpos: 'AS',
                feats: 'Aspect=Perf',
                start_char: 9,
                end_char: 10
              },
              {
                id: 7,
                text: 'Python',
                upos: 'X',
                xpos: 'FW',
                start_char: 11,
                end_char: 17
              },
              {
                id: 8,
                text: '中',
                upos: 'ADP',
                xpos: 'IN',
                start_char: 18,
                end_char: 19
              },
              {
                id: 9,
                text: '的',
                upos: 'PART',
                xpos: 'DEC',
                feats: 'Case=Gen',
                start_char: 19,
                end_char: 20
              },
              {
                id: 10,
                text: '异步',
                upos: 'ADJ',
                xpos: 'JJ',
                start_char: 20,
                end_char: 22
              },
              {
                id: 11,
                text: '框架',
                upos: 'NOUN',
                xpos: 'NN',
                start_char: 22,
                end_char: 24
              }
            ],
            entities: [],
            constituency: {
              cnt: 36,
              structure: [
                'ROOT',
                [
                  [
                    'IP',
                    [
                      [
                        'ADVP',
                        [
                          [ 'RB', [ [ '所以', [] ] ] ]
                        ]
                      ],
                      [
                        'NP',
                        [
                          [ 'PRP', [ [ '我们', [] ] ] ]
                        ]
                      ],
                      [
                        'VP',
                        [
                          [
                            'ADVP',
                            [
                              [ 'RB', [ [ '自然', [] ] ] ]
                            ]
                          ],
                          [
                            'ADVP',
                            [
                              [ 'RB', [ [ '就', [] ] ] ]
                            ]
                          ],
                          [
                            'VP',
                            [
                              [ 'VV', [ [ '想到', [] ] ] ],
                              [ 'AS', [ [ '了', [] ] ] ],
                              [
                                'NP',
                                [
                                  [
                                    'DNP',
                                    [
                                      [
                                        'LCP',
                                        [
                                          [
                                            'NP',
                                            [
                                              [ 'FW', [ [ 'Python', [] ] ] ]
                                            ]
                                          ],
                                          [ 'IN', [ [ '中', [] ] ] ]
                                        ]
                                      ],
                                      [ 'DEC', [ [ '的', [] ] ] ]
                                    ]
                                  ],
                                  [
                                    'ADJP',
                                    [
                                      [ 'JJ', [ [ '异步', [] ] ] ]
                                    ]
                                  ],
                                  [
                                    'NP',
                                    [
                                      [ 'NN', [ [ '框架', [] ] ] ]
                                    ]
                                  ]
                                ]
                              ]
                            ]
                          ]
                        ]
                      ]
                    ]
                  ]
                ]
              ]
            }
          }



        r = parse(d)
        /*
        {
          text: '所以我们自然就想到了 Python 中的异步框架',
          sentiment: null,
          words: [
            { text: '所以', upos: 'ADV', xpos: 'RB', feats: null },
            {
              text: '我们',
              upos: 'PRON',
              xpos: 'PRP',
              feats: 'Number=Plur|Person=1'
            },
            { text: '自然', upos: 'ADV', xpos: 'RB', feats: null },
            { text: '就', upos: 'ADV', xpos: 'RB', feats: null },
            { text: '想到', upos: 'VERB', xpos: 'VV', feats: null },
            { text: '了', upos: 'AUX', xpos: 'AS', feats: 'Aspect=Perf' },
            { text: 'Python', upos: 'X', xpos: 'FW', feats: null },
            { text: '中', upos: 'ADP', xpos: 'IN', feats: null },
            { text: '的', upos: 'PART', xpos: 'DEC', feats: 'Case=Gen' },
            { text: '异步', upos: 'ADJ', xpos: 'JJ', feats: null },
            { text: '框架', upos: 'NOUN', xpos: 'NN', feats: null }
          ],
          entities: [],
          constituency: {
            brief: { texts: [Array], tblocs: [Array], shapes: [Array] },
            detail: { texts: [Array], tblocs: [Array], shapes: [Array] }
          }
        }
        >
        */

        /*
        > r.constituency.brief
        {
          texts: [
            '所以我们自然就想到了Python中的异步框架',
            '所以',
            '我们',
            '自然就想到了Python中的异步框架',
            '自然',
            '就',
            '想到了Python中的异步框架',
            '想到',
            '了',
            'Python中的异步框架',
            'Python中的',
            'Python中',
            'Python',
            '中',
            '的',
            '异步',
            '框架'
          ],
          tblocs: [
            '(((所以))((我们))(((自然))((就))((想到)(了)(((((Python))(中))(的))((异步))((框架))))))',
            '((所以))',
            '((我们))',
            '(((自然))((就))((想到)(了)(((((Python))(中))(的))((异步))((框架)))))',
            '((自然))',
            '((就))',
            '((想到)(了)(((((Python))(中))(的))((异步))((框架))))',
            '(想到)',
            '(了)',
            '(((((Python))(中))(的))((异步))((框架)))',
            '((((Python))(中))(的))',
            '(((Python))(中))',
            '((Python))',
            '(中)',
            '(的)',
            '((异步))',
            '((框架))'
          ],
          shapes: [
            'IP(ADVP(RB())NP(PRP())VP(ADVP(RB())ADVP(RB())VP(VV()AS()NP(DNP(LCP(NP(FW())IN())DEC())ADJP(JJ())NP(NN())))))',
            'ADVP(RB())',
            'NP(PRP())',
            'VP(ADVP(RB())ADVP(RB())VP(VV()AS()NP(DNP(LCP(NP(FW())IN())DEC())ADJP(JJ())NP(NN()))))',
            'ADVP(RB())',
            'ADVP(RB())',
            'VP(VV()AS()NP(DNP(LCP(NP(FW())IN())DEC())ADJP(JJ())NP(NN())))',
            'VV()',
            'AS()',
            'NP(DNP(LCP(NP(FW())IN())DEC())ADJP(JJ())NP(NN()))',
            'DNP(LCP(NP(FW())IN())DEC())',
            'LCP(NP(FW())IN())',
            'NP(FW())',
            'IN()',
            'DEC()',
            'ADJP(JJ())',
            'NP(NN())'
          ]
        }
        >
        */

METHODS

API

    {
        parse_words,
        parse_constituency,
        parse
    }

LICENSE

  • ISC