@web-master/node-web-scraper NPM

Description

It scrapes the specific page :)

Installation

$ npm install --save @web-master/node-web-scraper

Usage

Basic

import scrape from '@web-master/node-web-scraper';

const data = await scrape({
  target: 'http://example.com',
  fetch: {
    title: 'h1',
    info: {
      selector: 'p > a',
      attr: 'href',
    },
  },
});

console.log(data);

// {
//   title: 'Example Domain',
//   info: 'http://www.iana.org/domains/example'
// }

Waitable (by using `puppeteer`)

import scrape from '@web-master/node-web-scraper';

const data = await scrape({
  target: 'https://news.ycombinator.com/item?id=20821022',
  waitFor: 3 * 1000, // wait for the content loaded! (like single page apps)
  fetch: {
    title: '.title > a',
  },
});

console.log(data);

// {
//   title: 'How we reduced deployment times by 95%'
// }

TypeScript Support

import scrape from '@web-master/node-web-scraper';

interface WikiSite {
  url: string;
}

interface Wikipedia {
  sites: WikiSite[];
}

const wiki: Wikipedia = await scrape({
  target: 'https://www.wikipedia.org',
  fetch: {
    sites: {
      listItem: '.central-featured a.link-box',
      data: {
        url: {
          attr: 'href',
          convert: (x: string) => `https:${x}`,
        },
      },
    },
  },
});

console.log(wiki.sites);

// [
//   { url: 'https://en.wikipedia.org/' },
//   { url: 'https://ja.wikipedia.org/' },
//   ...
//   ...
//   { url: 'https://de.wikipedia.org/' }
// ]