1.0.0 • Published 4 years ago
hptquickscrape v1.0.0
https://travis-ci.org/github/hpt-dev/QuickScrape
QuickScrape
Web scraping extension for puppeteer. Quickly scrape html page data to JSON object.
Installation
Use the package manager npm to install quickscrape.
npm install quickscrape
Usage
const quickscrape = require('./quickscrape');
(async () => {
let urls = [
'https://www.sherdog.com/fighter/Tony-Ferguson-31239'
];
let selectors =
{
Text: {
Name: 'body > div.container > div:nth-child(3) > div.col_left > section:nth-child(3) > div > h1 > span.fn',
NickName: 'body > div.container > div:nth-child(3) > div.col_left > section:nth-child(3) > div > h1 > span.nickname > em',
DOB: 'body > div.container > div:nth-child(3) > div.col_left > section:nth-child(3) > div > div.content > div:nth-child(1) > div > div.bio > div.birth_info > span.item.birthday > span',
Age: 'body > div.container > div:nth-child(3) > div.col_left > section:nth-child(3) > div > div.content > div:nth-child(1) > div > div.bio > div.birth_info > span.item.birthday > strong',
Wins:
{
Total: 'body > div.container > div:nth-child(3) > div.col_left > section:nth-child(3) > div > div.content > div:nth-child(1) > div > div.record > div > div > div:nth-child(1) > span.card > span.counter'
},
},
Tables: {
FightHistory: 'body > div.container > div:nth-child(3) > div.col_left > section:nth-child(5) > div > div.content.table > table tr',
},
Images: {
Picture: 'body > div.container > div:nth-child(3) > div.col_left > section:nth-child(3) > div > div.content > div:nth-child(1) > img'
},
};
let actual = await quickscrape(urls, selectors);
console.log(actual);
})()
Contributing
Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
Please make sure to update tests as appropriate.
License
1.0.0
4 years ago