0.0.3 • Published 10 years ago

schwabbelini v0.0.3

Weekly downloads
4
License
MIT
Repository
github
Last release
10 years ago

[api] [github] [npm]

  • Schwabbelini is a library for scraping relational data from websites.

Example

  • itunes podcasts
import Schwabbelini from '../app/Schwabbelini';
var schwabbelini = new Schwabbelini({

  storeConfig: {
    client: 'pg',
    connection: {
      host     : 'localhost',
      user     : 'postgres',
      password : 'foobar',
      database : 'schwabbelini'
    }
  },

  schedulerConfig: {
    delay: 100,
    concurrency: 10,
    retry : 3
  },

  transformations: {
    aria: document => document.model.attr('aria-label'),
    page: document => {
      var pages = document.model.find('ul.list.paginate').eq(1).find('li a:not(.selected)');
      var hrefs = _.map(pages, elem => document.model.find(elem).attr('href'));
      return hrefs.length ? hrefs : document;
    }
  }
});

var sourceTemplates = {
  list:     'https://itunes.apple.com/us/genre/podcasts/id26?mt=2',
  subgenre: ['list     | ul.top-level-subgenres > li a | href'],
  letter:   ['subgenre | ul.list.alpha > li a          | href'],
  page:     ['letter   | html                          | page'],
  podcast:  ['page     | #selectedcontent li a         | href']
};

var tableTemplates = {
  arts_podcast: {
    subgenre: 'list     | ul.breadcrumb                                       | text',
    id:       'podcast  | *                                                   | url',
    letter:   'letter     | ul.alpha a.selected                               | text',
    page:     'page   | ul.paginate a.selected                                | text',
    title:    'podcast  | #title h1                                           | text',
    rating:   'podcast  | #left-stack > div.extra-list.customer-ratings > div | aria',
    episodes: 'podcast  | span.track-count                                    | text'
  }
};

schwabbelini
  .set(artsTemplates, tableTemplates)
  .save();
0.0.3

10 years ago

0.0.2

10 years ago

1.6.0

10 years ago

1.5.0

10 years ago

1.4.0

10 years ago

1.3.0

10 years ago

1.2.0

10 years ago

1.1.0

10 years ago

1.0.0

10 years ago