2.1.0 • Published 2 years ago

@gabrielflores/cakez v2.1.0

Weekly downloads
-
License
ISC
Repository
-
Last release
2 years ago

What is Cakez

cakez is a package created to facilitate web scraping

Installation

npm install cakez

Quick start

const cakez = require("@gabrielflores/cakez")

cakez({
      //url you want to scrape
      //not optional
      url: "",
      
      //element(s) you want to scrape
      //not optional
      scrape: [

            //element 1
            {
                  //data_path = querySelector(element class, id, selector, ...)
                  //Personally I recommend using the selector and not the element id or class
                  data_path: "",

                  //attributes from element that you want to scrape
                  //data_attr special value -> "children" = element innerText
                  //example: 
                        //data_attr: ["src", "alt", "class"]
                        //or
                        //data_attr: "children"
                  data_attr: "" 
            },
            //there is no limit to the number of elements, pass as many as you want and enjoy!
      ],

      //request method(get, post, delete, .....)
      //optional | default = "get"
      //example:
            //method: "post"
            //or 
            //method: "get"
      //method,

      //in POST requests, values are sent in the "body" of the request
      //optional | default = {}
      //example:
            request_body: {name: "your name"}
      //request_body,
})
      .then(res => {
            console.log(res)
      })
      .catch(err => {
            console.log(err)
      })

Use example

const cakez = require("@gabrielflores/cakez")

cakez({
      //url you want to scrape
      //not optional
      url: "https://www.bbc.com/culture/article/20191220-the-20-best-songs-of-2019",
      
      //element(s) you want to scrape
      //not optional
      scrape: [

            //element 1
            {
                  //data_path = querySelector(element class, id, selector, ...)
                  //Personally I recommend using the selector and not the element id or class
                  data_path: "#culturearticle20191220-the-20-best-songs-of-2019 > div.article__container > div > div > article > div > div.article__intro.b-font-family-serif",

                  //attributes from element that you want to scrape
                  //data_attr special value -> "children" = element innerText
                  //example: 
                        //data_attr: ["src", "alt", "class"]
                        //or
                        //data_attr: "children"
                  data_attr: "children" //"children" = element innerText
            },

            //element 2
            {
                  data_path: ".article-body__image-text img",
                  data_attr: ["src", "alt"]
            },

            //there is no limit to the number of elements, pass as many as you want and enjoy!
      ],

      //request method(get, post, delete, .....)
      //optional | default = "get"
      //example:
            //method: "post"
            //or 
            //method: "get"
      //method,

      //in POST requests, values are sent in the "body" of the request
      //optional | default = {}
      //example:
            request_body: {name: "your name"}
      //request_body,
})
      .then(res => {
            console.log(res)
      })
      .catch(err => {
            console.log(err)
      })

Powered by

Dev

  • Gabriel Flores

Supporters

  • Pedro Benedito F.
  • José Reinaldo A.
  • Rita Aparecida M.
2.1.0

2 years ago

1.1.5

2 years ago

1.1.4

2 years ago

1.1.3

2 years ago

1.1.2

2 years ago

1.1.1

2 years ago

1.2.0

2 years ago

1.1.0

2 years ago

1.0.0

2 years ago