0.1.19 • Published 12 years ago

timothy v0.1.19

Weekly downloads
5
License
-
Repository
github
Last release
12 years ago

timothy: a Node.js library for building Hadoop jobs in JS

Timothy's primary goal is to make The Yellow Elephant rich and famous.

Basic Example

    // require timothy
    require('timothy')
        // basic configuration for the job: hadoop conf, input, output, name, etc
        .configure({	
             config: "./hadoop.xml",
             input:  "/test.txt",
             output: "/processed_"+(new Date().getTime()),
             name:   "Timothy Word Count Example"
        })
        // map function: one (line) or two (key, value) arguments
        .map(function(line){
            var words = line.split(" ");
            for(var i=0; i<words.length; i++)
                emit(words[i], 1); // emit is used to generate output
        })
        // reduce function: two arguments (key, value)
        .reduce(function(word,counts){
            emit(word, counts.length);
        })
        // run function, creates the job, uploads it and blocks until the
        // the execution has finished
        .run();

Testing in the local machine

    require('timothy')
        .map(function(line){
            var words = line.split(" ");
            for(var i=0; i<words.length; i++)
                emit(words[i], 1);
        })
        .reduce(function(word,counts){
            emit(word, counts.length);
        })
        // runLocal can be used instead of run to simulte the job execution 
        // from the command line
        .runLocal("~/Desktop/test_input.txt");

Initialising a job

    require('timothy')
        .configure({	
             config: "./hadoop.xml",
             input:  "/test.txt",
             output: "/processed_"+(new Date().getTime()),
             name:   "Timothy Word Count Example"
        })
        // variables and functions added to the global object will be available
        // in the map and reduce functions
        .setup(function(){
            global.x = 0;
            global.inc = function() {
                global.x = global.x + 1;
            };
        })
        .map(function(line){
            var words = line.split(" ");
            for(var i=0; i<words.length; i++) {
                inc();
                emit(words[i], x);
            }
        })
        .reduce(function(word,counts){
            emit(word, counts.length);
        })
        .run();

Using node libraries

    require('timothy')
        .configure({	
             config: "./hadoop.xml",
             input:  "/test.txt",
             output: "/processed_"+(new Date().getTime()),
             name:   "Timothy Word Count Example"
        })
        // Libraries can be added using the same syntax as
        // in a NPM package.json file
        .dependencies({"node-uuid":"1.3.3"})
        .setup(function(){
            // libraries can be required in the setup function
            global.uuid = require('node-uuid');
        })
        .map(function(line){
            var words = line.split(" ");
            for(var i=0; i<words.length; i++) {
                   emit(words[i], 1);
            }
        })
        .reduce(function(word,counts){
            emit(word, counts.length);
            emit(uuid.v1(),"10000000");
        })
        .run();

Status and counters

Status and counters for the job can be updated using the updateStatus and updateCounter functions.

0.1.19

12 years ago

0.1.18

12 years ago

0.1.17

12 years ago

0.1.16

12 years ago

0.1.14

13 years ago

0.1.13

13 years ago

0.1.12

13 years ago

0.1.11

13 years ago

0.1.10

13 years ago

0.1.9

13 years ago

0.1.8

13 years ago

0.1.7

13 years ago

0.1.6

13 years ago

0.1.5

13 years ago

0.1.4

13 years ago

0.1.3

13 years ago

0.1.2

13 years ago

0.1.1

13 years ago

0.1.0

13 years ago