0.0.4 • Published 10 years ago

ngrams-search v0.0.4

Weekly downloads
8
License
MIT
Repository
github
Last release
10 years ago

ngrams

nodejs module for searching by Ngram similarity of characters. An emitation of the python NGram module

npm package

basic usage

var NGrams = require('ngram-search');
var n = new NGrams()    //default N=3 (size of ngram) w=1 (warp, use greater than 1 to increase the similarity of shorter string pairs)
n.add("spam");          //add single items
n.add(["span", "eg"]);  //or an array of items
console.log(n.search("spa"));   // second argument is optional - threshold - return only items with similarity greater than threshold. default is 0
/*
will output an array of items with similarity greater than threshold ordered by similarity
//[{
    item: "spam",
    similarity: 0.375
}, {
    item: "span",
    similarity: 0.375
}]
*/

n.getMaxNgram("spam");    
/*
returns the item with the maximum ngram similarity or undefined if none
{
    item: "spam",
    similarity: 1.0
}
*/

more usage examples

var n = new NGrams(2);  //create ngrams of size 2
n.pad("word");           //returns " word " padding is of size N-1
n.split("ab");
/*
returns the ngrams of the item "ab" after padding
[
  [' ', 'a'],
  ['a', 'b'],
  ['b', ' ']
]
*/

n.getSharedNgrams("abe", "abc");
/*
returns all the ngrams that both items share:
[
  [' ', 'a'],
  ['a', 'b']
]
*/
n.getCountSharedNgrams("abe", "abc");    // returns 2
n.getStatsSharedNgrams("abe", "abc");
/*
returns
{ 
  all: 8,         //count of all ngrams in both items
  same: 2,        //ngrams sahred by both items
  distinct: 6,    //count of distinct ngrams in total
  diff: 4         //count of unique ngrams - which do not appear in both items
}
*/
n.compare("abe","abc");         //third argument is warp - optional, default is 1 
/*
returns 0.3333333333333333
formula is: ((distinct ^ warp)-(diff ^ warp))/(distinct^warp)
*/

for more use cases look at test.js