1.0.1 • Published 7 years ago

easycrawler v1.0.1

Weekly downloads
2
License
-
Repository
github
Last release
7 years ago

Easy Crawler

A simple webcrawler for Node.js supporting Concurrent Connections and Queues.

Installation

npm install easycrawler

Usage

var Crawler = require('easycrawler');

var crawler = new Crawler({
	thread: 5,
	logs: true,
	depth: 2,
	headers : {'user-agent' : 'foobar'},
	onlyCrawl : ['reddit', 'reddit.com'], //will only crawl urls containing these strings
	reject : ['rutube'], //will reject links containing rutube
	onSuccess : function(data){
		//console.log(data.url);
		//console.log(data.body);
	},
	onError : function(data){
		//console.log(data.url);
		//console.log(data.status);
	},
	onFinished : function(urls){
		//console.log(urls.crawled);//urls.crawled for visited urls;
		//console.log(urls.discovered);//urls.discovered for discovered urls
	}
});


crawler.crawl('http://www.reddit.com/');