1.0.1 • Published 10 years ago

level-datablobs v1.0.1

Weekly downloads
3
License
MIT
Repository
github
Last release
10 years ago

level-datablobs

loads blobs of data into leveldb reliably.

A datablob is any file containing that want to turn into many pieces of data, for example a spreadsheet that becomes many rows.

level-datablobs provides a streaming api, and tracks which blobs have been processed. Each blob is processed in order, and can be immutably reprocessed.

Exampls

var db = DataBlobs(
  // a database to store the data.
  level(path.join(dirname, 'db'), {encoding: 'json'}),

  // a database to store metadata about what has been processed.
  level(path.join(dirname, 'meta'), {encoding: 'json'}),

  // a content-addressable-store to store the raw datablobs
  cas(path.join(dirname, 'blobs')),

  //a function that puts a datablob into the database.
  function (stream, cb) {
    stream
      .pipe(csv.createStream())
      .pipe(through(function (data) {
        var obj = {}
        for(var k in data) {
          var value = data[k].trim()
          obj[k.trim()] = isNaN(value) ? value : + value
        }
        this.queue({key: first(obj), value: obj, type: 'put'})
      }))
      //using a pull stream to write
      //because levelup's write stream emits close too early.
      //see levelup issue https://github.com/rvagg/node-levelup/issues/247
      .pipe(toStream(pl.write(db)).on('close', cb))
  }
)

License

MIT