0.0.3 • Published 8 years ago

schwabbelini v0.0.3

Weekly downloads
4
License
MIT
Repository
github
Last release
8 years ago

[api] [github] [npm]

  • Schwabbelini is a library for scraping relational data from websites.

Example

  • itunes podcasts
import Schwabbelini from '../app/Schwabbelini';
var schwabbelini = new Schwabbelini({

  storeConfig: {
    client: 'pg',
    connection: {
      host     : 'localhost',
      user     : 'postgres',
      password : 'foobar',
      database : 'schwabbelini'
    }
  },

  schedulerConfig: {
    delay: 100,
    concurrency: 10,
    retry : 3
  },

  transformations: {
    aria: document => document.model.attr('aria-label'),
    page: document => {
      var pages = document.model.find('ul.list.paginate').eq(1).find('li a:not(.selected)');
      var hrefs = _.map(pages, elem => document.model.find(elem).attr('href'));
      return hrefs.length ? hrefs : document;
    }
  }
});

var sourceTemplates = {
  list:     'https://itunes.apple.com/us/genre/podcasts/id26?mt=2',
  subgenre: ['list     | ul.top-level-subgenres > li a | href'],
  letter:   ['subgenre | ul.list.alpha > li a          | href'],
  page:     ['letter   | html                          | page'],
  podcast:  ['page     | #selectedcontent li a         | href']
};

var tableTemplates = {
  arts_podcast: {
    subgenre: 'list     | ul.breadcrumb                                       | text',
    id:       'podcast  | *                                                   | url',
    letter:   'letter     | ul.alpha a.selected                               | text',
    page:     'page   | ul.paginate a.selected                                | text',
    title:    'podcast  | #title h1                                           | text',
    rating:   'podcast  | #left-stack > div.extra-list.customer-ratings > div | aria',
    episodes: 'podcast  | span.track-count                                    | text'
  }
};

schwabbelini
  .set(artsTemplates, tableTemplates)
  .save();
0.0.3

8 years ago

0.0.2

8 years ago

1.6.0

8 years ago

1.5.0

8 years ago

1.4.0

8 years ago

1.3.0

8 years ago

1.2.0

8 years ago

1.1.0

8 years ago

1.0.0

9 years ago