1.2.4 • Published 9 years ago

flowesh v1.2.4

Weekly downloads
3
License
ISC
Repository
-
Last release
9 years ago

Flowesh

Flowesh is the non-cluster version of floodesh. It's a middleware based web spider which is lightweight and easy to maintain

Install

npm install flowesh

Usage

function Spider(){
    this.name = 'MySpider'
}

Spider.prototype = {
    seed:[{
        opt:{
            uri:'http://www.baidu.com'
        },
        next:'parse'
    }],
    onInit:function(done){
        // do whatever you want on the init stage
       this.seed.push({
            uri:'http://www.hao123.com',
            priority:3
       });
       this.seed.push({
            uri:'http://www.qq.com',
            priority:1
       });
       done();
    },
    // onData is optional, happens after the response goes through all the middlewares
    onDate:function(dataSet){
        console.log(dataSet.get('data'));
    },
    // onComplete is optional, happens after onData
    onComplete:function(ctx){
        console.log('%s complete', ctx.request.url);
    },
    parse:function(ctx, done){
        console.log(ctx.content.match(/<title>(.*?)<\title>/)[1]);
        // if you have new tasks generated
        ctx.tasks.push({
            opt:{
                uri:'http://www.163.com',
                priority:0
            },
            next:'parse'
        });
        done();
    }
}

const Flowesh = require('flowesh'),
    // request middleware that detects charset of reponse
    charsetparser = require('mof-charsetparser'),
    // request middleware that converts response to utf-8 encoding
    iconv = require('mof-iconv'),
    // response middleware that loads response into a jQuery object which has the same usage as jQuery
    cheerio = require('mof-cheerio'),
    // request middleware that corrects your queue options, e.g. attribute 'jquery' in your queue option will be changed into 'jQuery'
    normalizer = require('mof-normalizer'),
    // request middleware that adapts your queue options to meet request(https://github.com/request/request) requirements
    reqadapter = require('mof-reqadapter');

const config = {
    "schedule":{
	    "concurrent": 1,
	    "rate": 5000,
	    "priorityRange":10 // default 10
	},
	"request":{
		"retry":3
	}
}

const flowesh = new Flowesh(config).attach(new Spider());

// middlewares will be executed in order

flowesh.requestmw.use(normalizer());
flowesh.requestmw.use(reqadapter());

flowesh.responsemw.use(charsetparser());
flowesh.responsemw.use(iconv());
flowesh.responsemw.use(cheerio());

flowesh.start();
1.2.4

9 years ago

1.2.3

9 years ago

1.2.2

9 years ago

1.2.1

9 years ago

1.2.0

9 years ago

1.1.9

10 years ago

1.1.8

10 years ago

1.1.7

10 years ago

1.1.6

10 years ago

1.1.5

10 years ago

1.1.4

10 years ago

1.1.3

10 years ago

1.1.2

10 years ago

1.1.1

10 years ago

1.1.0

10 years ago

1.0.1

10 years ago

1.0.0

10 years ago