1.0.13 • Published 4 years ago
word2vec-wordlist v1.0.13
word2vec-wordlist
Dataset of word2vec embeddings for the top 15,000 english words according to norvig frequency.
Useful for practicing ideas with word2vec without requiring the massive database.
- contains 300-dimensional vectors from the GoogleNews dataset 
- words that had the same stemming via stemmer were considered identical 
Installation
npm i word2vec-wordlist Usage
var word2Vec_wordList = require('word2vec-wordlist');
word2Vec_wordList.decompress(onDecompressed);
//takes about 20 seconds to decompress!
function onDecompressed(){ 
    console.log(word2Vec_wordList.getWord("cheese")); //this result is instant
    // Float32Array(300) [
    // -0.08591480553150177,  -0.07043947279453278,   0.03762108087539673,
    // 0.13874441385269165,  -0.01687612198293209,   0.06163453683257103,
    // -0.05923318862915039,  -0.04108969122171402,  -0.06030045449733734,
    // 0.05069507285952568,  -0.04215695336461067,   -0.0853811725974083,
    // -0.07577579468488693,  0.034419286996126175,  -0.04429148510098457,
    // ...
    // ]
}