0.0.2 • Published 2 years ago

collective-action v0.0.2

Weekly downloads
-
License
MIT
Repository
github
Last release
2 years ago

Collective-action

Anyone can some "collective" action with puppeteer in asynchronously.
( And you should not to be annoyance to anyone's to be, please )

const CollectiveActionModule = require('collective-action');

const CollectiveAction = CollectiveActionModule.CollectiveAction;
const EvaluateFunction = CollectiveActionModule.EvaluateFunction;

async function init() {
  // Create new CollectiveAction !
  let collectiveAction = new CollectiveAction({
    // Evaluate settings
    evaluateFunctions: [
      new EvaluateFunction({
        name: "GooglePageTitle",
        // Regex about that filters analyze or not.
        urlRegex: /.*/,
        // Functions
        functions: {
          // This function will running in the before evaluation.
          prev: ((_this, url, response) => {
            // Skip evaluation when the status code isn't 200.
            let doesSkipEvaluate = (response._status != 200);

            console.log(`URL: ${url}`);
            console.log(`StatusCode: ${response._status}`);

            // It'll be skip evaluate if functions.prev returns 'True'.
            return doesSkipEvaluate;
          }),
          // Evaluation function.
          eval: (() => {
            return document.title;
          }),
          // This function will running in the after evaluation.
          after: ((_this, res) => {
            return res;
          })
        },
        // Puppeteer's waitUntil settings (this value uses in the "newPage" method)
        waitUntil: undefined
      }),
      // This one is a scraper that reads page titles in the search result
      new EvaluateFunction({
        name: "Results",
        urlRegex: /.*/,
        functions: {
          eval: (() => {
            let ret = []
            ret.push(document.body.innerText)
            let results = document.querySelectorAll(".LC20lb.MBeuO.DKV0Md");  // What is the ".LC20lb.MBeuO.DKV0Md" mean...? > GGL

            for (let i = 0; i < results.length; i++) {
              ret.push(results[i].innerText);
            }

            return ret;
          })
        },
        waitUntil: undefined
      })
    ],
    // How many puppeteer pages will be use in concurrentry. (Default: 10)
    concurrentPageNumber: 5
  });

  // Initialize this collective action.
  await collectiveAction.init();

  // google returns 10 result in 1 page * 15 times => get 150 results
  for (let i = 0; i < 15; i++) {
    // Pushing URL into the collectiveAction's queue
    ((start)=>{
      collectiveAction.push(`https://www.google.com/search?q=book&num=10&start=${(start * 10)}`);
    })(i);
  }

  // Show queue statuses
  console.log(`unanalyzed: ${collectiveAction.unanalyzedQueues.length}`);
  console.log(`analyzed: ${collectiveAction.analyzedQueues.length}`);

  // Analyzing URLs with 5 puppeteer pages.
  await collectiveAction.analyzeAll();

  // Show queue statuses
  console.log(`unanalyzed: ${collectiveAction.unanalyzedQueues.length}`);
  console.log(`analyzed: ${collectiveAction.analyzedQueues.length}`);

  // Add some URLs after analyzing...
  for (let i = 0; i < 15; i++) {
    ((start)=>{
      collectiveAction.push(`https://www.google.com/search?q=書籍&num=10&start=${(start * 10)}`);
    })(i);
  }

  // Show queue statuses again..
  console.log(`unanalyzed: ${collectiveAction.unanalyzedQueues.length}`);
  console.log(`analyzed: ${collectiveAction.analyzedQueues.length}`);

  // Analyze all...
  await collectiveAction.analyzeAll();

  // And done !
  console.log(`unanalyzed: ${collectiveAction.unanalyzedQueues.length}`);
  console.log(`analyzed: ${collectiveAction.analyzedQueues.length}`);

  // Show results.
  console.log(collectiveAction.results);

  // Close this collective action
  await collectiveAction.close();
}

!(async () => {
  await init();
})();