Distributed-ngram NPM

WAT

Simply put predict next word user will write.

HOWTO

installation

    git clone git@github.com:syzer/distributedNgram.git && cd $_
    npm install
    npm install --save-dev

The file nGram.js offers more compact version of code:

    npm start

testing basic distributed task

var jsSpark = require('js-spark')({workers: 16});
var task = jsSpark.jsSpark;
var q = jsSpark.q;

task([20, 30, 40, 50])
    // this is executed on client side
    .map(function addOne(num) {
        return num + 1;
    })
    .reduce(function sumUp(sum, num) {
        return sum + num;
    })
    .run()
    .then(function(data) {
        // this is executed on back on server
        console.log('i finished calculating', data);
    })

tests

    npm test

Tasks

clone https://github.com/syzer/distributedNgram.git

./index.js

load:

dracula
lodash
load helpers

(gist)

// helpers

./lib/index.js

make function prepare()

// remove special characters
function prepare(str){}
prepare('“Listen to them, the children of the night. What music they make!”')
//=>"listen to them the children of the night what music they make"

(gist)

./index.js

make bigramText()

bigramText("to listen to them the children of the night what music they make");
//=>{to: {listen: 1, them:1} , listen:{to:1}, the:{children:1}}...

function bigramText(str) {
    return arr.reduce(bigramArray);
}

(gist)

./index.js

function mergeSmall()

create 2 tasks ch01, and ch02
use tasks to bigram those chapters
reduce response with _.merge

(gist)

./index.js

function mergeBig(texts)

load ch1, ch2, ch3 or texts
make distinct tasks to bigram this text
reduce with _.mergeObjectsInArr
cache result
return result

(gist)

./index.js

function predict(word) 1. load appropriate key/word from cache

calc total hits
sort all hits in order,

may use helper function objToSortedArr(obj)

calc frequency/probability of next word

(gist)

./index.js

function train(fileName, splitter)

load file
prepare
use splitter(string) to create separate tasks
calculate tasks on clients using mergeBig()

TODO

git checkout js-spark adventure

Machine learning ML ngram NLP Natual Language Processing distributed js-spark

js-spark lodash prompt

@infinitebrahmanuniverse/nolb-dist @everything-registry/sub-chunk-1488

1.0.2

10 years ago

1.0.1

11 years ago