bot detector base code

This commit is contained in:
Klemek
2021-04-04 21:56:39 +02:00
parent 823d97f4bb
commit 140e472e29
3 changed files with 67 additions and 0 deletions
+1
View File
@@ -2,6 +2,7 @@
/node_modules
/config.json
/config.example.json
/robots_list.json
/data
/data/*
/test_data
+62
View File
@@ -0,0 +1,62 @@
const https = require('https');
const fs = require('fs');
module.exports = (config) => {
const _this = {
status: {
FETCH_OK: 1,
FETCH_ERROR: 2,
READ_OK: 3,
READ_ERROR: 4,
},
count: [],
regex: null,
knownBots: [],
known: [],
};
const fetchList = (cb) => {
https.get(config['robots']['list_url'], (res) => {
const file = fs.createWriteStream(config['robots']['list_file']);
res.pipe(file);
file.on('finish', () => {
file.close(cb);
});
}).on('error', (err) => {
cb(err.message);
});
};
const readFile = (cb) => {
fs.readFile(config['robots']['list_file'], (err, data) => {
if (err) {
cb(err, undefined);
} else {
try {
cb(undefined, JSON.parse(data));
} catch (err) {
cb(err, undefined);
}
}
});
};
_this.load = (cb) => {
fetchList((err) => {
cb(err ? _this.status.FETCH_ERROR : _this.status.FETCH_OK, err);
readFile((err, data) => {
_this.count = data.length;
_this.regex = new RegExp('(' + data.filter(v => v['pattern']).map(v => v['pattern'])
.join('|') + ')');
cb(err ? _this.status.READ_ERROR : _this.status.READ_OK, err);
});
});
};
_this.handle = (req, res, next) => {
req.isRobot = !!((req.headers['user-agent'] || '').match(_this.regex));
next();
};
return _this;
};
+4
View File
@@ -63,6 +63,10 @@
"hit_counter": {
"unique_visitor_timeout": 7200000
},
"robots": {
"list_url": "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json",
"list_file": "robots_list.json"
},
"redis": {
"host": "localhost",
"port": 6379