1.0.0 • Published 5 years ago
faster-computing v1.0.0
Small Node.js lib.
Table Of Contents
Installation
npm i --save faster-computing
Usage
This package is intended for multiprocessing. If your computing device supports parallelisation of the processes, then this module may speedup your daily data computation. It does a very simple task - takes your code and data, splits the data into the batches, replicates your code into the tmp file, forks the process your-machine-cpu-threads-number times, and then each child process executes the code for the specific batch of data. If your data processing takes a very small amount of time (e.g, milliseconds), you may not find it useful, but if you are working on big data and you have to do many computations, this module should be helpful.
Examples
const FasterComputing = require('faster-computing');
const fc = new FasterComputing();
const dataSource = {
data: [1, 2, 3, 4]
};
function computeProcedure(params) {
const { data, foo, bar } = params;
return data.map(val => val + 1);
}
const computeProcedureParams = {
foo: 1,
bar: 2
};
const memory = 25;
// First version of calling the `compute` method.
(async () => {
try {
const data = await fc.compute(dataSource, computeProcedure, computeProcedureParams, memory);
} catch (err) { ... }
})();
// Second version of calling the `compute` method.
fc.compute(dataSource, computeProcedure, computeProcedureParams, memory)
.then(data => { ... })
.catch(err => { ... });
Another example
const FasterComputing = require('faster-computing');
const fc = new FasterComputing();
const mongoose = require('mongoose');
const path = require('path');
const System = require(path.join(__dirname, '/../models/system'));
await mongoose.connect('mongodb://localhost:27017/foo-bar');
const dataSource = {
data: await System.find({ }).limit(1e5)
};
async function computeProcedure(params) {
const { data, $__dirname } = params;
const path = require('path');
const { someAsyncFunction, someSyncFunction } = require(path.join($__dirname, '/../../subroutines'));
const results = [];
for (let item of data) {
const result = await someAsyncFunction(item);
results.push(someSyncFunction(result));
}
return results;
}
const computeProcedureParams = {
$__dirname: __dirname
};
const memory = 3500;
(async () => {
try {
const data = await fc.compute(dataSource, computeProcedure, computeProcedureParams, memory);
} catch (err) { ... }
})();
Specs
- {Class} FasterComputing - must be instantiated to access the
compute
method. - {Function} compute(dataSource, computeProcedure, computeProcedureParams, memory) - is the main
function which returns your computations' joined results.
- param {Object} dataSource - is the object which can contain, only one of these properties:
data
ordataFetchProcedure
(this property is not implemented yet).- prop {Array} data - is the container for your data. It can only be an array, which will be splited into the balanced batches.
- param {Function || AsyncFunction} computeProcedure(params) - this is the function, where
your computation goes. This function can not access the global scope, for it, is only visible the
params
(which containscomputeProcedureParams
(third parameter)).- param {Object} params - this object will contain all of your parameters, with the reserved
key -
data
, which will be a specific batch of your data.
- param {Object} params - this object will contain all of your parameters, with the reserved
key -
- param {Object} computeProcedureParams - You can pass any parameter here, but you must take
into consideration that the key -
data
is reserved and if you pass this key it will throw aninvalid_params
error. You can access these params incomputeProcedure
like this:const { data, foo, bar, baz } = params;
. Also consider thatcomputeProcedure
's scope is this module's scope and if you want to access some file in your local directory using__dirname
you must pass this param for example like this{ $__dirname: __dirname }
. - param {number} memory - this is the memory (in megabytes) (limit), which will be allocated for each child process. The memory for each child process will be the same, because the data batches will be balanced.
- param {Object} dataSource - is the object which can contain, only one of these properties:
License
MIT
1.0.0
5 years ago