1.0.0 • Published 5 years ago

@iryu54/scraper v1.0.0

Weekly downloads
1
License
MIT
Repository
-
Last release
5 years ago

Scraper

Scrap website with a configuration

Usage

npm i @iryu54/scraper

Crawl page

const Scraper = require('@iryu54/scraper')
const html = await Scraper.fetch('<URL>')

Scrap with a config

// index.js
const config = require('../test.conf')
const Scraper = require('@iryu54/scraper')
const parsedSite = await Scraper.scrapUrl('<URL>', config)
// test.conf.js
module.exports = [
  { // find .name-title and insert value in name field. Value is include in innerText of HTMLElement
    "title": "name",
    "value": ".main-title",
    "text": true
  },
  { // find .wrapper img and insert value in img field. Value is include in src attributes
    "title": "img",
    "value": ".wrapper img",
    "attr": "src"
  },
  { // find .count and insert value in nbPerson field. Value is include in value attributes then transform to a number
    "title": "nbPerson",
    "value": ".count input",
    "attr": "value",
    "transform": value => +value
  },
  { // find .list and insert value in preparation field. Value is include in innerText then transform on CheerioStatic is applied 
    "title": "preparation",
    "value": ".list",
    "text": true,
    html: value => {
      return value.toArray()
        .map(($preparation, i) => {
          $preparation = cheerio.load(cheerio.html($preparation))
          $preparation('h3').remove()
          return `<h3>Etape ${i + 1}</h3>\n<p>${$preparation('.recipe-item').text().trim()}</p>`
        }).join('\n')
    }
  },
  { // Execute instructions inside .list HTMLElement and put result in array on ingredients field
    "title": "ingredients",
    "value": ".list",
    "children": [
      {
        "title": "quantity",
        "value": ".recipe-ingredient-qt",
        "text": true
      },
      {
        "title": "name",
        "value": ".ingredient",
        "text": true
      }
    ]
  }
]