mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
Release v1.0.0 🎉
This commit is contained in:
16
lib/errors.js
Executable file
16
lib/errors.js
Executable file
@@ -0,0 +1,16 @@
|
||||
class ExtendableError extends Error {
|
||||
constructor(message) {
|
||||
super(message);
|
||||
this.name = this.constructor.name;
|
||||
if (typeof Error.captureStackTrace === 'function') {
|
||||
Error.captureStackTrace(this, this.constructor)
|
||||
} else {
|
||||
this.stack = (new Error(message)).stack
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class NoNewListingsError extends ExtendableError {
|
||||
}
|
||||
|
||||
module.exports = {NoNewListingsError};
|
||||
108
lib/fredy.js
Executable file
108
lib/fredy.js
Executable file
@@ -0,0 +1,108 @@
|
||||
const {NoNewListingsError} = require('./errors');
|
||||
const Store = require('./services/store');
|
||||
|
||||
const notify = require('./notification/notify');
|
||||
const xray = require('./services/scraper');
|
||||
|
||||
class Fredy {
|
||||
constructor(source) {
|
||||
this._store = new Store(source.name);
|
||||
this._fullCrawl = true;
|
||||
this._source = source;
|
||||
this._stats = null;
|
||||
}
|
||||
|
||||
run(stats) {
|
||||
|
||||
if(!this._stats){
|
||||
this._stats = stats;
|
||||
}
|
||||
|
||||
if (!this._source.enabled) return Promise.resolve();
|
||||
|
||||
return Promise.resolve(this._source.url)
|
||||
.then(this._store.warmup)
|
||||
.then(this._getListings.bind(this))
|
||||
.then(this._normalize.bind(this))
|
||||
.then(this._filter.bind(this))
|
||||
.then(this._findNew.bind(this))
|
||||
.then(this._save.bind(this))
|
||||
.then(this._notify.bind(this))
|
||||
.then(this._updateStates.bind(this))
|
||||
.catch(this._handleError.bind(this))
|
||||
}
|
||||
|
||||
_getListings(url) {
|
||||
return new Promise((resolve, reject) => {
|
||||
let x = xray(url, this._source.crawlContainer, [this._source.crawlFields]);
|
||||
|
||||
if (this._source.paginage && this._fullCrawl) {
|
||||
this._fullCrawl = false;
|
||||
x = x.paginate(this._source.paginage)
|
||||
}
|
||||
|
||||
x((err, listings) => {
|
||||
if (err) reject(err);
|
||||
else resolve(listings)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
_normalize(listings) {
|
||||
return listings.map(this._source.normalize)
|
||||
}
|
||||
|
||||
_filter(listings) {
|
||||
return listings.filter(this._source.filter)
|
||||
}
|
||||
|
||||
_findNew(listings) {
|
||||
const newListings = listings.filter(
|
||||
o => this._store.knownListings.indexOf(o.id) === -1
|
||||
);
|
||||
|
||||
if (newListings.length === 0) {
|
||||
this._updateStates([]);
|
||||
throw new NoNewListingsError();
|
||||
}
|
||||
|
||||
return newListings
|
||||
}
|
||||
|
||||
_notify(newListings) {
|
||||
const sendNotifications = newListings.map(payload => {
|
||||
return notify.send(this._source.name, payload);
|
||||
}
|
||||
);
|
||||
|
||||
return Promise.all(sendNotifications).then(() => newListings)
|
||||
}
|
||||
|
||||
_updateStates(newListings){
|
||||
this._stats.setLastScrape(this._source.name, newListings.length);
|
||||
return newListings;
|
||||
}
|
||||
|
||||
_save(newListings) {
|
||||
this._store.knownListings = [
|
||||
...this._store.knownListings,
|
||||
...newListings.map(l => l.id)
|
||||
];
|
||||
return newListings;
|
||||
}
|
||||
|
||||
_handleError(err) {
|
||||
if (err.name !== 'NoNewListingsError') console.error(err)
|
||||
}
|
||||
|
||||
/**
|
||||
* for testing purposes only
|
||||
* @returns {Store}
|
||||
* @private
|
||||
*/
|
||||
_getStore(){
|
||||
return this._store;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = Fredy;
|
||||
18
lib/notification/adapter/console.js
Executable file
18
lib/notification/adapter/console.js
Executable file
@@ -0,0 +1,18 @@
|
||||
const config = require('../../../conf/config.json');
|
||||
|
||||
/**
|
||||
* simply prints out the found data to the console
|
||||
* @param serviceName e.g immoscout
|
||||
* @param payload the actual payload that is used to construct the message
|
||||
* @returns {Promise<Void> | void}
|
||||
*/
|
||||
exports.send = (serviceName, payload) => {
|
||||
return Promise.resolve(console.info(`Found entry from service ${serviceName}:`, payload))
|
||||
};
|
||||
|
||||
/**
|
||||
* each integration needs to implement this method
|
||||
*/
|
||||
exports.enabled = () => {
|
||||
return config.notification.console.enabled;
|
||||
};
|
||||
54
lib/notification/adapter/slack.js
Executable file
54
lib/notification/adapter/slack.js
Executable file
@@ -0,0 +1,54 @@
|
||||
const Slack = require('slack');
|
||||
const config = require('../../conf/config.json');
|
||||
const msg = Slack.chat.postMessage;
|
||||
|
||||
const {token, channel} = config.notification.slack;
|
||||
|
||||
/**
|
||||
* sends a new listing to slack
|
||||
* @param serviceName e.g immoscout
|
||||
* @param payload the actual payload that is used to construct the message
|
||||
* @returns {Promise<Chat.PostMessage.Response> | void}
|
||||
*/
|
||||
exports.send = (serviceName, payload) => {
|
||||
return msg({
|
||||
token,
|
||||
channel,
|
||||
text: `*(${serviceName})* - ${payload.title}`,
|
||||
"attachments": [
|
||||
{
|
||||
"fallback": payload.title,
|
||||
"color": "#36a64f",
|
||||
"title": "Link to Exposé",
|
||||
"title_link": payload.link,
|
||||
"fields": [
|
||||
{
|
||||
"title": "Price",
|
||||
"value": payload.price,
|
||||
"short": false
|
||||
},
|
||||
{
|
||||
"title": "Size",
|
||||
"value": payload.size,
|
||||
"short": false
|
||||
},
|
||||
{
|
||||
"title": "Address",
|
||||
"value": payload.address,
|
||||
"short": false
|
||||
}
|
||||
],
|
||||
"footer": "Powered by Fredy",
|
||||
ts: new Date().getTime() / 1000
|
||||
}
|
||||
]
|
||||
}
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* each integration needs to implement this method
|
||||
*/
|
||||
exports.enabled = () => {
|
||||
return config.notification.slack.enabled;
|
||||
};
|
||||
17
lib/notification/notify.js
Executable file
17
lib/notification/notify.js
Executable file
@@ -0,0 +1,17 @@
|
||||
const fs = require('fs');
|
||||
const path = './adapter';
|
||||
|
||||
/** Read every integration existing in ./adapter **/
|
||||
const adapter = fs
|
||||
.readdirSync('./lib/notification/adapter')
|
||||
.map(integPath => require(`${path}/${integPath}`))
|
||||
.filter(integration => integration.enabled());
|
||||
|
||||
if (adapter.length === 0) {
|
||||
throw new Error('Please specify at least one notification provider');
|
||||
}
|
||||
|
||||
exports.send = (serviceName, payload) => {
|
||||
//this is not being used in tests, therefor adapter are always set
|
||||
return adapter.map(a => a.send(serviceName, payload));
|
||||
};
|
||||
38
lib/provider/immonet.js
Executable file
38
lib/provider/immonet.js
Executable file
@@ -0,0 +1,38 @@
|
||||
const config = require('../../conf/config.json');
|
||||
const Fredy = require('../fredy');
|
||||
const utils = require('../utils');
|
||||
|
||||
function normalize(o) {
|
||||
const id = parseInt(o.id.split('_')[1], 10);
|
||||
const title = o.title.replace('NEU ', '');
|
||||
const address = o.address.split(' - ')[1];
|
||||
|
||||
return Object.assign(o, { id, title, address });
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
|
||||
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const immonet = {
|
||||
name: 'immonet',
|
||||
enabled: config.sources.immonet.enabled,
|
||||
url: config.sources.immonet.url,
|
||||
crawlContainer: '#idResultList .search-object',
|
||||
crawlFields: {
|
||||
id: '.search-info a@id',
|
||||
price: '#keyfacts-bar div:first-child span',
|
||||
size: '#keyfacts-bar div:nth-child(2) .text-primary-highlight',
|
||||
title: '.search-info a | removeNewline | trim',
|
||||
link: '.search-info a@href',
|
||||
address: '.search-info p | removeNewline | trim'
|
||||
},
|
||||
paginate: '#idResultList .margin-bottom-6.margin-bottom-sm-12 .panel a.pull-right@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
};
|
||||
|
||||
module.exports = new Fredy(immonet);
|
||||
34
lib/provider/immoscout.js
Executable file
34
lib/provider/immoscout.js
Executable file
@@ -0,0 +1,34 @@
|
||||
const config = require('../../conf/config.json');
|
||||
const Fredy = require('../fredy');
|
||||
const utils = require('../utils');
|
||||
|
||||
function normalize(o) {
|
||||
const title = o.title.replace('NEU', '');
|
||||
const address = (o.address || '').replace(/\(.*\),.*$/, '').trim();
|
||||
|
||||
return Object.assign(o, { title, address });
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
return !utils.isOneOf(o.title, config.blacklist);
|
||||
}
|
||||
|
||||
const immoscout = {
|
||||
name: 'immoscout',
|
||||
enabled: config.sources.immoscout.enabled,
|
||||
url: config.sources.immoscout.url,
|
||||
crawlContainer: '#resultListItems li.result-list__listing',
|
||||
crawlFields: {
|
||||
id: '.result-list-entry@data-obid | int',
|
||||
price: '.result-list-entry .result-list-entry__criteria .grid-item:first-child dd | removeNewline | trim',
|
||||
size: '.result-list-entry .result-list-entry__criteria .grid-item:nth-child(2) dd | removeNewline | trim',
|
||||
title: '.result-list-entry .result-list-entry__brand-title-container h5 | removeNewline | trim',
|
||||
link: '.result-list-entry .result-list-entry__brand-title-container@href',
|
||||
address: '.result-list-entry .result-list-entry__address a'
|
||||
},
|
||||
paginate: '#pager .align-right a@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
};
|
||||
|
||||
module.exports = new Fredy(immoscout);
|
||||
38
lib/provider/immowelt.js
Executable file
38
lib/provider/immowelt.js
Executable file
@@ -0,0 +1,38 @@
|
||||
const Fredy = require('../fredy');
|
||||
const config = require('../../conf/config.json');
|
||||
const utils = require('../utils');
|
||||
|
||||
function normalize(o) {
|
||||
const size = o.size.split('Wohnfläche')[0];
|
||||
const address = o.address;
|
||||
|
||||
return Object.assign(o, { size, address });
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
|
||||
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const immowelt = {
|
||||
name: 'immowelt',
|
||||
enabled: config.sources.immowelt.enabled,
|
||||
url: config.sources.immowelt.url,
|
||||
crawlContainer: '.immoliste .js-object.listitem_wrap ',
|
||||
crawlFields: {
|
||||
id: '@data-estateid | int',
|
||||
price: '.hardfacts_3 strong | removeNewline | trim',
|
||||
size: '.hardfacts_3 div:nth-child(2):not(.hardfactlabel)| removeNewline | trim',
|
||||
title: '.listcontent.clear h2',
|
||||
link: 'a@href',
|
||||
description: '.listconten_offset .listmerkmale| removeNewline | trim',
|
||||
address: '.listconten_offset .listlocation| removeNewline | trim'
|
||||
},
|
||||
paginate: '#pnlPaging #nlbPlus@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
};
|
||||
|
||||
module.exports = new Fredy(immowelt);
|
||||
46
lib/provider/kalaydo.js
Executable file
46
lib/provider/kalaydo.js
Executable file
@@ -0,0 +1,46 @@
|
||||
const config = require('../../conf/config.json');
|
||||
const Fredy = require('../fredy');
|
||||
const utils = require('../utils');
|
||||
|
||||
function normalize(o) {
|
||||
const id = o.id
|
||||
.split('/')
|
||||
.filter(Boolean)
|
||||
.reverse()[0];
|
||||
const price = o.price.replace('Preis: ', '');
|
||||
let size = o.size.replace('Wohnfläche: ', '').replace('ca. ', '');
|
||||
size += ' / ' + o.rooms;
|
||||
const address = '---';
|
||||
|
||||
return Object.assign(o, { id, price, size, address });
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
|
||||
|
||||
const isBlacklistedDistrict =
|
||||
config.blacklistedDistrics.length === 0 ? false : utils.isOneOf(o.title, config.blacklistedDistrics);
|
||||
|
||||
return !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const kalaydo = {
|
||||
name: 'kalaydo',
|
||||
enabled: config.sources.kalaydo.enabled,
|
||||
url: config.sources.kalaydo.url,
|
||||
crawlContainer: '#resultList .resultitem-content-container',
|
||||
crawlFields: {
|
||||
id: '.resultitem-content-container a@href',
|
||||
price: '.clear-row .rent | removeNewline | trim',
|
||||
title: '.resultitem-content-container a@title',
|
||||
link: '.resultitem-content-container a@href',
|
||||
rooms: '.resultitem-content-container .no-of-rooms | removeNewline | trim',
|
||||
size: '.resultitem-content-container .living-area | removeNewline | trim'
|
||||
},
|
||||
paginate: '.markt_pagination_pageLinkNext .markt_pagination_link@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
};
|
||||
|
||||
module.exports = new Fredy(kalaydo);
|
||||
39
lib/provider/kleinanzeigen.js
Executable file
39
lib/provider/kleinanzeigen.js
Executable file
@@ -0,0 +1,39 @@
|
||||
const Fredy = require('../fredy');
|
||||
const config = require('../../conf/config.json');
|
||||
const utils = require('../utils');
|
||||
|
||||
function normalize(o) {
|
||||
const address = o.address.split('\n')[4].trim();
|
||||
|
||||
return Object.assign(o, { address });
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
|
||||
const isBlacklistedDistrict =
|
||||
config.blacklistedDistrics.length === 0 ? false : utils.isOneOf(o.description, config.blacklistedDistrics);
|
||||
|
||||
return !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const kleinanzeigen = {
|
||||
name: 'kleinanzeigen',
|
||||
enabled: config.sources.kleinanzeigen.enabled,
|
||||
url: config.sources.kleinanzeigen.url,
|
||||
crawlContainer: '#srchrslt-adtable .ad-listitem',
|
||||
crawlFields: {
|
||||
id: '.aditem@data-adid | int',
|
||||
price: '.aditem-details strong | removeNewline | trim',
|
||||
size: '.aditem-main .text-module-end span:nth-child(2) | removeNewline | trim',
|
||||
title: '.aditem-main .text-module-begin a | removeNewline | trim',
|
||||
link: '.aditem-main .text-module-begin a@href | removeNewline | trim',
|
||||
description: '.aditem-main p:not(.text-module-end) | removeNewline | trim',
|
||||
address: '.aditem-details'
|
||||
},
|
||||
paginate: '#srchrslt-pagination .pagination-next@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
};
|
||||
|
||||
module.exports = new Fredy(kleinanzeigen);
|
||||
42
lib/services/scraper.js
Executable file
42
lib/services/scraper.js
Executable file
@@ -0,0 +1,42 @@
|
||||
const makeDriver = require('request-x-ray');
|
||||
const config = require('../../conf/config.json');
|
||||
const Xray = require('x-ray');
|
||||
|
||||
class Scraper {
|
||||
constructor() {
|
||||
const filters = {
|
||||
removeNewline: this._removeNewline,
|
||||
trim: this._trim,
|
||||
int: this._int
|
||||
};
|
||||
|
||||
const driver = makeDriver({
|
||||
headers: {
|
||||
'User-Agent': config.userAgent
|
||||
}
|
||||
});
|
||||
|
||||
const xray = Xray({ filters });
|
||||
xray.driver(driver);
|
||||
|
||||
this.xray = xray;
|
||||
}
|
||||
|
||||
get x() {
|
||||
return this.xray;
|
||||
}
|
||||
|
||||
_removeNewline(value) {
|
||||
return typeof value === 'string' ? value.replace(/\\n/g, '') : value;
|
||||
}
|
||||
|
||||
_trim(value) {
|
||||
return typeof value === 'string' ? value.replace(/\s+/g, ' ').trim() : value;
|
||||
}
|
||||
|
||||
_int(value) {
|
||||
return typeof value === 'string' ? parseInt(value, 10) : value;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = new Scraper().x;
|
||||
25
lib/services/stats.js
Normal file
25
lib/services/stats.js
Normal file
@@ -0,0 +1,25 @@
|
||||
const config = require('../../conf/config.json');
|
||||
let lastScrape = {};
|
||||
|
||||
if (config.enableStats) {
|
||||
const http = require('http');
|
||||
http
|
||||
.createServer((req, res) => {
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(
|
||||
JSON.stringify({
|
||||
config,
|
||||
lastScrape
|
||||
})
|
||||
);
|
||||
})
|
||||
.listen(config.statsPort, '127.0.0.1');
|
||||
}
|
||||
|
||||
exports.setLastScrape = (serviceName, numberFound) => {
|
||||
lastScrape[serviceName] = lastScrape[serviceName] || [];
|
||||
lastScrape[serviceName].push({
|
||||
scapeTime: new Date().toString(),
|
||||
numberFound: numberFound
|
||||
});
|
||||
};
|
||||
36
lib/services/store.js
Executable file
36
lib/services/store.js
Executable file
@@ -0,0 +1,36 @@
|
||||
const path = require('path');
|
||||
const DB_PATH = path.dirname(require.main.filename) + '/conf/store.json';
|
||||
|
||||
const FileAsync = require('lowdb/adapters/FileAsync');
|
||||
const adapter = new FileAsync(DB_PATH);
|
||||
const low = require('lowdb');
|
||||
|
||||
const lowdb = low(adapter);
|
||||
|
||||
class Store {
|
||||
constructor(name) {
|
||||
this._name = name;
|
||||
this._db = null;
|
||||
}
|
||||
|
||||
get warmup() {
|
||||
return new Promise(resolve => {
|
||||
lowdb.then(db => {
|
||||
this._db = db;
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
set knownListings(value) {
|
||||
if (!Array.isArray(value)) throw Error('Not a valid array');
|
||||
|
||||
return this._db.set(this._name, value).write();
|
||||
}
|
||||
|
||||
get knownListings() {
|
||||
return this._db.get(this._name).value() || [];
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = Store;
|
||||
8
lib/utils.js
Executable file
8
lib/utils.js
Executable file
@@ -0,0 +1,8 @@
|
||||
function isOneOf (word, arr) {
|
||||
const expression = String.raw`\b(${arr.join('|')})\b`;
|
||||
const blacklist = new RegExp(expression, 'ig');
|
||||
|
||||
return blacklist.test(word)
|
||||
}
|
||||
|
||||
module.exports = { isOneOf };
|
||||
Reference in New Issue
Block a user