1.0.0 • Published 3 years ago
express-robots-txt v1.0.0
express-robots-txt
Express middleware for generating a robots.txt or responding with an existing file.
Usage
// es6 usage requires a version of node which supports esm_conditional_exports, see
// https://nodejs.org/api/esm.html#esm_conditional_exports
import express from 'express'
import robots from 'express-robots-txt'
// commonjs
const express = require('express')
const robots = require('express-robots-txt')
const app = express()
const port = 3000
app.use(robots({
UserAgent: '*',
Disallow: '/',
CrawlDelay: '5',
Sitemap: 'https://nowhere.com/sitemap.xml',
}))
app.listen(port)
Using a file
app.use(robots(__dirname + '/robots.txt'));
Using an object
Basic object
app.use(robots({
UserAgent: '*',
Disallow: '/'
}))
Will produce:
User-agent: *
Disallow: /
Crawl Delay
You can optionally pass a CrawlDelay in just like passing in Disallow
app.use(robots({
UserAgent: '*',
Disallow: '/',
CrawlDelay: '5'
}))
Will produce:
User-agent: *
Disallow: /
Crawl-delay: 5
Sitemap
You can optionally pass a Sitemap in just like passing in CrawlDelay:
app.use(robots({
UserAgent: '*',
Disallow: '/',
CrawlDelay: '5',
Sitemap: 'https://nowhere.com/sitemap.xml'
}))
Will produce:
User-agent: *
Disallow: /
Crawl-delay: 5
Sitemap: https://nowhere.com/sitemap.xml
You can also pass in an array of Sitemaps:
app.use(robots({
UserAgent: '*',
Disallow: '/',
CrawlDelay: '5',
Sitemap: [
'https://nowhere.com/sitemap.xml',
'https://nowhere.com/sitemap2.xml'
]
}))
Will produce:
User-agent: *
Disallow: /
Crawl-delay: 5
Sitemap: https://nowhere.com/sitemap.xml
Sitemap: https://nowhere.com/sitemap2.xml
Or an array of objects
app.use(robots([
{
UserAgent: 'Googlebot',
Disallow: '/no-google'
},
{
UserAgent: 'Bingbot',
Disallow: '/no-bing'
}
]));
Will produce:
User-agent: Googlebot
Disallow: /no-google
User-agent: Bingbot
Disallow: /no-bing
Or either property (UserAgent | Disallow) as an array
app.use(robots([
{
UserAgent: [ 'Googlebot', 'Slurp' ],
Disallow: [ '/no-google', '/no-yahoo' ]
},
{
UserAgent: '*',
Disallow: [ '/no-bots', '/still-no-bots' ]
}
]));
Will produce:
User-agent: Googlebot
User-agent: Slurp
Disallow: /no-google
Disallow: /no-yahoo
User-agent: *
Disallow: /no-bots
Disallow: /still-no-bots
Host
You can optionally pass a Host in just like passing in Disallow. Note that some crawlers may not support this drective.
app.use(robots({
UserAgent: '*',
Disallow: '/',
CrawlDelay: '5',
Host: 'foobar.com'
}))
Will produce:
User-agent: *
Disallow: /
Crawl-delay: 5
Host: foobar.com
Thanks
Originally forked from weo-edu/express-robots.