Merge pull request #57 from Klemek/f-ignore-bots

ignore bots in hit counter
This commit is contained in:
Klemek
2021-04-04 23:31:38 +02:00
committed by GitHub
10 changed files with 228 additions and 14 deletions
+1
View File
@@ -2,6 +2,7 @@
/node_modules /node_modules
/config.json /config.json
/config.example.json /config.example.json
/robots_list.json
/data /data
/data/* /data/*
/test_data /test_data
+5
View File
@@ -336,6 +336,11 @@ Any URL like `/year/month/day/anything/` will redirect to this article (and link
* `hit_counter` * `hit_counter`
* `unique_visitor_timeout`: (default: 7200000 / 2h) * `unique_visitor_timeout`: (default: 7200000 / 2h)
specify the time (in ms) before a visitor can be accounted again specify the time (in ms) before a visitor can be accounted again
* `robots`
* `list_url`: (default: https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json)
url to fetch for web crawlers patterns
* `list_file`: (default: robots_list.json)
file to store web crawlers patterns
* `redis` * `redis`
Options to connect to redis (see [redis options](https://github.com/NodeRedis/node-redis#options-object-properties) for more info) Options to connect to redis (see [redis options](https://github.com/NodeRedis/node-redis#options-object-properties) for more info)
* `host`: (default: localhost) * `host`: (default: localhost)
+2 -2
View File
@@ -1,11 +1,11 @@
{ {
"name": "gitblog.md", "name": "gitblog.md",
"version": "1.2.8", "version": "1.3.2",
"lockfileVersion": 2, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"version": "1.2.8", "version": "1.3.2",
"hasInstallScript": true, "hasInstallScript": true,
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
+1 -1
View File
@@ -1,6 +1,6 @@
{ {
"name": "gitblog.md", "name": "gitblog.md",
"version": "1.3.1", "version": "1.3.2",
"description": "A static blog using Markdown pulled from your git repository.", "description": "A static blog using Markdown pulled from your git repository.",
"main": "src/server.js", "main": "src/server.js",
"dependencies": { "dependencies": {
+22 -2
View File
@@ -61,6 +61,23 @@ module.exports = (config) => {
} }
}, },
); );
const botDetector = require('./bot_detector')(config);
botDetector.load((status, err) => {
switch (status) {
case botDetector.status.FETCH_OK:
console.log(cons.ok, 'fetched robots list');
break;
case botDetector.status.FETCH_ERROR:
console.error(cons.error, 'error fetching robots list : ' + err);
break;
case botDetector.status.READ_OK:
console.log(cons.ok, `read robots list: ${botDetector.count}`);
break;
case botDetector.status.READ_ERROR:
console.error(cons.error, 'error reading robots list : ' + err);
break;
}
});
// set view engine from configuration // set view engine from configuration
app.set('view engine', config['view_engine']); app.set('view engine', config['view_engine']);
@@ -145,6 +162,9 @@ module.exports = (config) => {
}); });
app.use(limiter); app.use(limiter);
//detect robots
app.use(botDetector.handle);
//log request at result end //log request at result end
app.use((req, res, next) => { app.use((req, res, next) => {
if (config['access_log']) { if (config['access_log']) {
@@ -168,7 +188,7 @@ module.exports = (config) => {
if (err) { if (err) {
showError(req, res, 404); showError(req, res, 404);
} else { } else {
hc.count(req, '/', () => { hc.count(req, '/', req.isRobot, () => {
render(req, res, homePath, render(req, res, homePath,
{ {
articles: Object.values(articles) articles: Object.values(articles)
@@ -271,7 +291,7 @@ module.exports = (config) => {
showError(req, res, 404); showError(req, res, 404);
} }
} else { } else {
hc.count(req, articlePath, () => { hc.count(req, articlePath, req.isRobot, () => {
renderer.render(article.realPath, (err, html) => { renderer.render(article.realPath, (err, html) => {
if (err) { if (err) {
console.log(cons.error, `failed to render article ${req.path} : ${err}`); console.log(cons.error, `failed to render article ${req.path} : ${err}`);
+68
View File
@@ -0,0 +1,68 @@
const https = require('https');
const fs = require('fs');
module.exports = (config) => {
const _this = {
status: {
FETCH_OK: 1,
FETCH_ERROR: 2,
READ_OK: 3,
READ_ERROR: 4,
},
count: [],
regex: null,
knownBots: [],
known: [],
};
const fetchList = (cb) => {
https.get(config['robots']['list_url'], (res) => {
if (res.statusCode !== 200) {
cb(res.statusCode);
} else {
const file = fs.createWriteStream(config['robots']['list_file']);
res.pipe(file);
file.on('finish', () => {
file.close(cb);
});
}
}).on('error', (err) => {
cb(err.message);
});
};
const readFile = (cb) => {
fs.readFile(config['robots']['list_file'], { encoding: 'utf-8' }, (err, data) => {
if (err) {
cb(err, undefined);
} else {
try {
cb(undefined, JSON.parse(data));
} catch (err) {
cb(err, undefined);
}
}
});
};
_this.load = (cb) => {
fetchList((err) => {
cb(err ? _this.status.FETCH_ERROR : _this.status.FETCH_OK, err);
readFile((err, data) => {
if (!err) {
_this.count = data.length;
_this.regex = new RegExp('(' + data.filter(v => v['pattern']).map(v => v['pattern'])
.join('|') + ')');
}
cb(err ? _this.status.READ_ERROR : _this.status.READ_OK, err);
});
});
};
_this.handle = (req, res, next) => {
req.isRobot = !!((req.headers['user-agent'] || '').match(_this.regex));
next();
};
return _this;
};
+4
View File
@@ -63,6 +63,10 @@
"hit_counter": { "hit_counter": {
"unique_visitor_timeout": 7200000 "unique_visitor_timeout": 7200000
}, },
"robots": {
"list_url": "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json",
"list_file": "robots_list.json"
},
"redis": { "redis": {
"host": "localhost", "host": "localhost",
"port": 6379 "port": 6379
+2 -2
View File
@@ -8,8 +8,8 @@ module.exports = (config, onConnect, onError) => {
const visitors = {}; const visitors = {};
const count = (req, path, cb) => { const count = (req, path, disable, cb) => {
if (!client.connected) { if (!client.connected || disable) {
cb(); cb();
} else { } else {
const ip = req.headers['x-forwarded-for'] || req.connection.remoteAddress; const ip = req.headers['x-forwarded-for'] || req.connection.remoteAddress;
+116
View File
@@ -0,0 +1,116 @@
const fs = require('fs');
const utils = require('./test_utils');
const dataDir = 'test_data';
const config = {
robots: {
list_url: '',
list_file: `${dataDir}/robots_list.json`,
},
};
beforeAll(() => {
utils.deleteFolderSync(dataDir);
fs.mkdirSync(dataDir);
});
afterAll(() => {
if (fs.existsSync(dataDir)) {
utils.deleteFolderSync(dataDir);
}
});
const botDetector = require('../src/bot_detector')(config);
describe('load()', () => {
test('success', (done) => {
config.robots = {
list_url: 'https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json',
list_file: `${dataDir}/robots_list_success.json`,
};
let count = 0;
botDetector.load((status, err) => {
expect(err).not.toBeDefined();
expect(status).toBe(count === 0 ? botDetector.status.FETCH_OK : botDetector.status.READ_OK);
if (count > 0) {
done();
}
count++;
});
});
test('fetch and file failure', (done) => {
let count = 0;
config.robots = {
list_url: 'https://blog.klemek.fr/invalid.json',
list_file: `${dataDir}/robots_list_fail_1.json`,
};
botDetector.load((status) => {
expect(status).toBe(count === 0 ? botDetector.status.FETCH_ERROR : botDetector.status.READ_ERROR);
if (count > 0) {
done();
}
count++;
});
});
test('fetch failure and file ok', (done) => {
let count = 0;
config.robots = {
list_url: 'https://blog.klemek.fr/invalid.json',
list_file: `${dataDir}/robots_list_fail_2.json`,
};
fs.writeFile(config.robots.list_file, '[]\n', { encoding: 'utf-8' }, () => {
botDetector.load((status) => {
expect(status).toBe(count === 0 ? botDetector.status.FETCH_ERROR : botDetector.status.READ_OK);
if (count > 0) {
done();
}
count++;
});
});
});
});
describe('handle()', () => {
beforeAll((done) => {
config.robots = {
list_url: 'https://blog.klemek.fr/invalid.json',
list_file: `${dataDir}/robots_list_fake.json`,
};
fs.writeFile(config.robots.list_file, '[{"pattern":"bot"}]\n', { encoding: 'utf-8' }, () => {
botDetector.load((status) => {
if (status !== botDetector.status.FETCH_ERROR) {
done();
}
});
});
});
test('not bot', (done) => {
const req = {
headers: {
'user-agent': 'my user agent',
},
};
botDetector.handle(req, null, () => {
expect(req.isRobot).toBeFalsy();
done();
});
});
test('bot', (done) => {
const req = {
headers: {
'user-agent': 'bot',
},
};
botDetector.handle(req, null, () => {
expect(req.isRobot).toBeTruthy();
done();
});
});
});
+7 -7
View File
@@ -98,7 +98,7 @@ describe('read()', () => {
hc.count({ hc.count({
headers: {}, headers: {},
connection: { remoteAddress: 'test1' }, connection: { remoteAddress: 'test1' },
}, '/test/path/5', () => { }, '/test/path/5', false, () => {
hc.read('/test/path/5', (data) => { hc.read('/test/path/5', (data) => {
expect(data).toBeDefined(); expect(data).toBeDefined();
expect(data.current_visitors).toBe(1); expect(data.current_visitors).toBe(1);
@@ -111,7 +111,7 @@ describe('read()', () => {
hc.count({ hc.count({
headers: {}, headers: {},
connection: { remoteAddress: 'test1' }, connection: { remoteAddress: 'test1' },
}, '/test/path/5', () => { }, '/test/path/5', false, () => {
hc.read('/test/path/5', (data) => { hc.read('/test/path/5', (data) => {
expect(data).toBeDefined(); expect(data).toBeDefined();
expect(data.current_visitors).toBe(0); expect(data.current_visitors).toBe(0);
@@ -145,7 +145,7 @@ describe('count()', () => {
hc.count({ hc.count({
headers: {}, headers: {},
connection: { remoteAddress: 'test1' }, connection: { remoteAddress: 'test1' },
}, '/test/path/1', () => { }, '/test/path/1', false, () => {
expect(multiCalled).toBeTruthy(); expect(multiCalled).toBeTruthy();
expect(hincrbyCalls).toEqual([ expect(hincrbyCalls).toEqual([
[ [
@@ -177,11 +177,11 @@ describe('count()', () => {
hc.count({ hc.count({
headers: {}, headers: {},
connection: { remoteAddress: 'test2' }, connection: { remoteAddress: 'test2' },
}, '/test/path/2', () => { }, '/test/path/2', false, () => {
hc.count({ hc.count({
headers: {}, headers: {},
connection: { remoteAddress: 'test2' }, connection: { remoteAddress: 'test2' },
}, '/test/path/2', () => { }, '/test/path/2', false, () => {
expect(hincrbyCalls).toEqual([ expect(hincrbyCalls).toEqual([
[ [
'/test/path/2', '/test/path/2',
@@ -223,11 +223,11 @@ describe('count()', () => {
hc.count({ hc.count({
headers: {}, headers: {},
connection: { remoteAddress: 'test3' }, connection: { remoteAddress: 'test3' },
}, '/test/path/3', () => { }, '/test/path/3', false, () => {
hc.count({ hc.count({
headers: {}, headers: {},
connection: { remoteAddress: 'test3' }, connection: { remoteAddress: 'test3' },
}, '/test/path/3', () => { }, '/test/path/3', false, () => {
expect(hincrbyCalls).toEqual([ expect(hincrbyCalls).toEqual([
[ [
'/test/path/3', '/test/path/3',