1.0.1 • Published 5 years ago

reservoir-sample-stdin v1.0.1

Weekly downloads
4
License
Unlicense
Repository
github
Last release
5 years ago

#!/usr/bin/env node 'use strict';

const USAGE = USAGE: $ cat files* | npx reservoir-sample-stdin NUMBER-OF-LINES;

/**

  • Apply reservoir sampling on an array: randomly sample a fixed number of
  • elements from an array, in such a way that new data can be incorporated.
  • Iterative usage example:
  • var reservoir = [];
  • var linesRead = 0;
  • var maxSize = 3;
  • reservoir, linesRead = reservoirSample(reservoir, linesRead, maxSize,
  •                                      [1, 2, 3, 4, 5]);
  • reservoir, linesRead = reservoirSample(reservoir, linesRead, maxSize,
  •                                      [10, 20, 30, 40, 50]);
  • console.log(reservoir);
  • @param {Array} reservoir current reservoir
  • @param {number} linesRead number of lines read to achieve reservoir
  • @param {number} maxSize maximum size of reservoir
  • @param {Array} lines new lines to evaluate
  • @returns {Array, number} the new reservoir and linesRead. / function reservoirSample(reservoir, linesRead, maxSize, lines) { for (const line of lines) { if ((++linesRead) <= maxSize) { reservoir.push(line); } else { if (Math.random() <= maxSize / linesRead) { const idx = Math.floor(Math.random() maxSize); reservoiridx = line; } } } return reservoir, linesRead; } module.export = reservoirSample;

if (module === require.main) { // Parse inputs const maxSize = parseInt(process.argv2); if (!maxSize) { console.error(USAGE); process.exit(1); }

// Prepare stdin process.stdin.setEncoding('utf8');

// Parse stdin let tailBuffer = ''; let reservoir = []; let linesRead = 0; function processData(data) { const lines = (tailBuffer + data).split('\n'); tailBuffer = lines.pop(); reservoir, linesRead = reservoirSample(reservoir, linesRead, maxSize, lines); } process.stdin.on('data', processData);

// Finalize trailing chunk of stdin and print reservoir process.stdin.on('end', () => { processData(''); console.log(reservoir.join('\n')); }); }

1.0.1

5 years ago

1.0.0

5 years ago