3.2.8 • Published 6 months ago
sitemapper v3.2.8
Sitemap-parser
Parse through a sitemaps xml to get all the urls for your crawler.
Version 2
Installation
npm install sitemapper --save
Simple Example
const Sitemapper = require('sitemapper');
const sitemap = new Sitemapper();
sitemap.fetch('https://wp.seantburke.com/sitemap.xml').then(function(sites) {
console.log(sites);
});
Examples in ES6
import Sitemapper from 'sitemapper';
(async () => {
const Google = new Sitemapper({
url: 'https://www.google.com/work/sitemap.xml',
timeout: 15000, // 15 seconds
});
try {
const { sites } = await Google.fetch();
console.log(sites);
} catch (error) {
console.log(error);
}
})();
// or
const sitemapper = new Sitemapper();
sitemapper.timeout = 5000;
sitemapper.fetch('https://wp.seantburke.com/sitemap.xml')
.then(({ url, sites }) => console.log(`url:${url}`, 'sites:', sites))
.catch(error => console.log(error));
Options
You can add options on the initial Sitemapper object when instantiating it.
requestHeaders
: (Object) - Additional Request Headers (e.g.User-Agent
)timeout
: (Number) - Maximum timeout in ms for a single URL. Default: 15000 (15 seconds)url
: (String) - Sitemap URL to crawldebug
: (Boolean) - Enables/Disables debug console logging. Default: Falseconcurrency
: (Number) - Sets the maximum number of concurrent sitemap crawling threads. Default: 10retries
: (Number) - Sets the maximum number of retries to attempt in case of an error response (e.g. 404 or Timeout). Default: 0rejectUnauthorized
: (Boolean) - If true, it will throw on invalid certificates, such as expired or self-signed ones. Default: Truelastmod
: (Number) - Timestamp of the minimum lastmod value allowed for returned urls
const sitemapper = new Sitemapper({
url: 'https://art-works.community/sitemap.xml',
rejectUnauthorized: true,
timeout: 15000,
requestHeaders: {
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:81.0) Gecko/20100101 Firefox/81.0'
}
});
An example using all available options:
const sitemapper = new Sitemapper({
url: 'https://art-works.community/sitemap.xml',
timeout: 15000,
requestHeaders: {
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:81.0) Gecko/20100101 Firefox/81.0'
},
debug: true,
concurrency: 2,
retries: 1,
});
Examples in ES5
var Sitemapper = require('sitemapper');
var Google = new Sitemapper({
url: 'https://www.google.com/work/sitemap.xml',
timeout: 15000 //15 seconds
});
Google.fetch()
.then(function (data) {
console.log(data);
})
.catch(function (error) {
console.log(error);
});
// or
var sitemapper = new Sitemapper();
sitemapper.timeout = 5000;
sitemapper.fetch('https://wp.seantburke.com/sitemap.xml')
.then(function (data) {
console.log(data);
})
.catch(function (error) {
console.log(error);
});
Version 1
npm install sitemapper@1.1.1 --save
Simple Example
var Sitemapper = require('sitemapper');
var sitemapper = new Sitemapper();
sitemapper.getSites('https://wp.seantburke.com/sitemap.xml', function(err, sites) {
if (!err) {
console.log(sites);
}
});
3.2.8
6 months ago
3.2.7
6 months ago
3.2.6
2 years ago
3.2.5
2 years ago
3.2.4
2 years ago
3.2.3
2 years ago
3.2.2
2 years ago
3.2.1
2 years ago
3.2.0
2 years ago
3.1.15
3 years ago
3.1.12
3 years ago
3.1.11
3 years ago
3.1.10
3 years ago
3.1.8
3 years ago
3.1.9
3 years ago
3.1.7
3 years ago
3.1.3
3 years ago
3.1.1
3 years ago
3.1.4
3 years ago
3.1.2
3 years ago
3.0.9
3 years ago
3.1.0
3 years ago
3.0.5
4 years ago
3.0.4
4 years ago
3.0.3
4 years ago
3.0.2
4 years ago
2.2.0
6 years ago
2.1.14
6 years ago
2.1.13
7 years ago
2.1.12
7 years ago
2.1.7
8 years ago
2.1.6
8 years ago
2.1.5
8 years ago
2.1.4
8 years ago
2.1.1
8 years ago
2.1.0
8 years ago
2.0.0
8 years ago
1.1.1
8 years ago
1.1.0
8 years ago
1.0.4
8 years ago
1.0.3
8 years ago
1.0.1
8 years ago
1.0.0
8 years ago
0.0.3
8 years ago
0.0.2
8 years ago
0.0.1
10 years ago