1.5.7 • Published 3 years ago
crawlx v1.5.7
crawlx
⚡Lightweight web crawler with powerful plugins!
const x = require("crawlx").default;
x({
url: "http://quotes.toscrape.com/",
parse: [
"[.quote]",
{
author: ".author",
authorUrl: ".author+a@href",
text: ".text",
tags: "[a.tag]",
type: () => "quote"
},
s => ((s["crawled"] = new Date()), s)
],
follow: ["[.author+a@href]", followAuthorRule]
}).then(task => {
console.log(task.parsed);
});
function followAuthorRule(url) {
return {
url,
parse: {
name: ["h3 | reverse", v => v.toUpperCase()],
born: ".author-born-date | date"
},
callback(task) {
console.log(task.parsed);
}
};
}
Features
- Make http request with got
- Priority queue of requests
- Simple plugin system
- Promise support
- Flexible schema with powerful parse plugin, using only one rule object
- Easily paginate and follow links with builtin follow plugin
- Spawner mode: add a url directly
Installation
npm install crawlx
Documentation
Documentaition: crawlx.js.org
See more examples: crawlx/examples
1.5.7
3 years ago
1.5.6
3 years ago
1.5.5
4 years ago
1.5.4
4 years ago
1.5.3
4 years ago
1.5.2
4 years ago
1.5.1
4 years ago
1.5.0
4 years ago
1.4.9
4 years ago
1.4.8
4 years ago
1.4.6
4 years ago
1.4.7
4 years ago
1.4.5
4 years ago
1.4.1
4 years ago
1.4.0
4 years ago
1.3.0
4 years ago
1.2.5
4 years ago
1.2.4
4 years ago
1.2.0
4 years ago
1.2.3
4 years ago
1.2.2
4 years ago
1.2.1
4 years ago
1.1.3
4 years ago
1.1.2
4 years ago
1.1.1
4 years ago
1.1.0
4 years ago
1.0.0
4 years ago