Release v1.0.0 🎉

This commit is contained in:
Christian Kellner
2018-01-20 20:23:27 +01:00
commit c6cffe029d
33 changed files with 2168 additions and 0 deletions

16
lib/errors.js Executable file
View File

@@ -0,0 +1,16 @@
class ExtendableError extends Error {
constructor(message) {
super(message);
this.name = this.constructor.name;
if (typeof Error.captureStackTrace === 'function') {
Error.captureStackTrace(this, this.constructor)
} else {
this.stack = (new Error(message)).stack
}
}
}
class NoNewListingsError extends ExtendableError {
}
module.exports = {NoNewListingsError};

108
lib/fredy.js Executable file
View File

@@ -0,0 +1,108 @@
const {NoNewListingsError} = require('./errors');
const Store = require('./services/store');
const notify = require('./notification/notify');
const xray = require('./services/scraper');
class Fredy {
constructor(source) {
this._store = new Store(source.name);
this._fullCrawl = true;
this._source = source;
this._stats = null;
}
run(stats) {
if(!this._stats){
this._stats = stats;
}
if (!this._source.enabled) return Promise.resolve();
return Promise.resolve(this._source.url)
.then(this._store.warmup)
.then(this._getListings.bind(this))
.then(this._normalize.bind(this))
.then(this._filter.bind(this))
.then(this._findNew.bind(this))
.then(this._save.bind(this))
.then(this._notify.bind(this))
.then(this._updateStates.bind(this))
.catch(this._handleError.bind(this))
}
_getListings(url) {
return new Promise((resolve, reject) => {
let x = xray(url, this._source.crawlContainer, [this._source.crawlFields]);
if (this._source.paginage && this._fullCrawl) {
this._fullCrawl = false;
x = x.paginate(this._source.paginage)
}
x((err, listings) => {
if (err) reject(err);
else resolve(listings)
})
})
}
_normalize(listings) {
return listings.map(this._source.normalize)
}
_filter(listings) {
return listings.filter(this._source.filter)
}
_findNew(listings) {
const newListings = listings.filter(
o => this._store.knownListings.indexOf(o.id) === -1
);
if (newListings.length === 0) {
this._updateStates([]);
throw new NoNewListingsError();
}
return newListings
}
_notify(newListings) {
const sendNotifications = newListings.map(payload => {
return notify.send(this._source.name, payload);
}
);
return Promise.all(sendNotifications).then(() => newListings)
}
_updateStates(newListings){
this._stats.setLastScrape(this._source.name, newListings.length);
return newListings;
}
_save(newListings) {
this._store.knownListings = [
...this._store.knownListings,
...newListings.map(l => l.id)
];
return newListings;
}
_handleError(err) {
if (err.name !== 'NoNewListingsError') console.error(err)
}
/**
* for testing purposes only
* @returns {Store}
* @private
*/
_getStore(){
return this._store;
}
}
module.exports = Fredy;

View File

@@ -0,0 +1,18 @@
const config = require('../../../conf/config.json');
/**
* simply prints out the found data to the console
* @param serviceName e.g immoscout
* @param payload the actual payload that is used to construct the message
* @returns {Promise<Void> | void}
*/
exports.send = (serviceName, payload) => {
return Promise.resolve(console.info(`Found entry from service ${serviceName}:`, payload))
};
/**
* each integration needs to implement this method
*/
exports.enabled = () => {
return config.notification.console.enabled;
};

View File

@@ -0,0 +1,54 @@
const Slack = require('slack');
const config = require('../../conf/config.json');
const msg = Slack.chat.postMessage;
const {token, channel} = config.notification.slack;
/**
* sends a new listing to slack
* @param serviceName e.g immoscout
* @param payload the actual payload that is used to construct the message
* @returns {Promise<Chat.PostMessage.Response> | void}
*/
exports.send = (serviceName, payload) => {
return msg({
token,
channel,
text: `*(${serviceName})* - ${payload.title}`,
"attachments": [
{
"fallback": payload.title,
"color": "#36a64f",
"title": "Link to Exposé",
"title_link": payload.link,
"fields": [
{
"title": "Price",
"value": payload.price,
"short": false
},
{
"title": "Size",
"value": payload.size,
"short": false
},
{
"title": "Address",
"value": payload.address,
"short": false
}
],
"footer": "Powered by Fredy",
ts: new Date().getTime() / 1000
}
]
}
);
};
/**
* each integration needs to implement this method
*/
exports.enabled = () => {
return config.notification.slack.enabled;
};

17
lib/notification/notify.js Executable file
View File

@@ -0,0 +1,17 @@
const fs = require('fs');
const path = './adapter';
/** Read every integration existing in ./adapter **/
const adapter = fs
.readdirSync('./lib/notification/adapter')
.map(integPath => require(`${path}/${integPath}`))
.filter(integration => integration.enabled());
if (adapter.length === 0) {
throw new Error('Please specify at least one notification provider');
}
exports.send = (serviceName, payload) => {
//this is not being used in tests, therefor adapter are always set
return adapter.map(a => a.send(serviceName, payload));
};

38
lib/provider/immonet.js Executable file
View File

@@ -0,0 +1,38 @@
const config = require('../../conf/config.json');
const Fredy = require('../fredy');
const utils = require('../utils');
function normalize(o) {
const id = parseInt(o.id.split('_')[1], 10);
const title = o.title.replace('NEU ', '');
const address = o.address.split(' - ')[1];
return Object.assign(o, { id, title, address });
}
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
return titleNotBlacklisted && descNotBlacklisted;
}
const immonet = {
name: 'immonet',
enabled: config.sources.immonet.enabled,
url: config.sources.immonet.url,
crawlContainer: '#idResultList .search-object',
crawlFields: {
id: '.search-info a@id',
price: '#keyfacts-bar div:first-child span',
size: '#keyfacts-bar div:nth-child(2) .text-primary-highlight',
title: '.search-info a | removeNewline | trim',
link: '.search-info a@href',
address: '.search-info p | removeNewline | trim'
},
paginate: '#idResultList .margin-bottom-6.margin-bottom-sm-12 .panel a.pull-right@href',
normalize: normalize,
filter: applyBlacklist
};
module.exports = new Fredy(immonet);

34
lib/provider/immoscout.js Executable file
View File

@@ -0,0 +1,34 @@
const config = require('../../conf/config.json');
const Fredy = require('../fredy');
const utils = require('../utils');
function normalize(o) {
const title = o.title.replace('NEU', '');
const address = (o.address || '').replace(/\(.*\),.*$/, '').trim();
return Object.assign(o, { title, address });
}
function applyBlacklist(o) {
return !utils.isOneOf(o.title, config.blacklist);
}
const immoscout = {
name: 'immoscout',
enabled: config.sources.immoscout.enabled,
url: config.sources.immoscout.url,
crawlContainer: '#resultListItems li.result-list__listing',
crawlFields: {
id: '.result-list-entry@data-obid | int',
price: '.result-list-entry .result-list-entry__criteria .grid-item:first-child dd | removeNewline | trim',
size: '.result-list-entry .result-list-entry__criteria .grid-item:nth-child(2) dd | removeNewline | trim',
title: '.result-list-entry .result-list-entry__brand-title-container h5 | removeNewline | trim',
link: '.result-list-entry .result-list-entry__brand-title-container@href',
address: '.result-list-entry .result-list-entry__address a'
},
paginate: '#pager .align-right a@href',
normalize: normalize,
filter: applyBlacklist
};
module.exports = new Fredy(immoscout);

38
lib/provider/immowelt.js Executable file
View File

@@ -0,0 +1,38 @@
const Fredy = require('../fredy');
const config = require('../../conf/config.json');
const utils = require('../utils');
function normalize(o) {
const size = o.size.split('Wohnfläche')[0];
const address = o.address;
return Object.assign(o, { size, address });
}
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
return titleNotBlacklisted && descNotBlacklisted;
}
const immowelt = {
name: 'immowelt',
enabled: config.sources.immowelt.enabled,
url: config.sources.immowelt.url,
crawlContainer: '.immoliste .js-object.listitem_wrap ',
crawlFields: {
id: '@data-estateid | int',
price: '.hardfacts_3 strong | removeNewline | trim',
size: '.hardfacts_3 div:nth-child(2):not(.hardfactlabel)| removeNewline | trim',
title: '.listcontent.clear h2',
link: 'a@href',
description: '.listconten_offset .listmerkmale| removeNewline | trim',
address: '.listconten_offset .listlocation| removeNewline | trim'
},
paginate: '#pnlPaging #nlbPlus@href',
normalize: normalize,
filter: applyBlacklist
};
module.exports = new Fredy(immowelt);

46
lib/provider/kalaydo.js Executable file
View File

@@ -0,0 +1,46 @@
const config = require('../../conf/config.json');
const Fredy = require('../fredy');
const utils = require('../utils');
function normalize(o) {
const id = o.id
.split('/')
.filter(Boolean)
.reverse()[0];
const price = o.price.replace('Preis: ', '');
let size = o.size.replace('Wohnfläche: ', '').replace('ca. ', '');
size += ' / ' + o.rooms;
const address = '---';
return Object.assign(o, { id, price, size, address });
}
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
const isBlacklistedDistrict =
config.blacklistedDistrics.length === 0 ? false : utils.isOneOf(o.title, config.blacklistedDistrics);
return !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
}
const kalaydo = {
name: 'kalaydo',
enabled: config.sources.kalaydo.enabled,
url: config.sources.kalaydo.url,
crawlContainer: '#resultList .resultitem-content-container',
crawlFields: {
id: '.resultitem-content-container a@href',
price: '.clear-row .rent | removeNewline | trim',
title: '.resultitem-content-container a@title',
link: '.resultitem-content-container a@href',
rooms: '.resultitem-content-container .no-of-rooms | removeNewline | trim',
size: '.resultitem-content-container .living-area | removeNewline | trim'
},
paginate: '.markt_pagination_pageLinkNext .markt_pagination_link@href',
normalize: normalize,
filter: applyBlacklist
};
module.exports = new Fredy(kalaydo);

39
lib/provider/kleinanzeigen.js Executable file
View File

@@ -0,0 +1,39 @@
const Fredy = require('../fredy');
const config = require('../../conf/config.json');
const utils = require('../utils');
function normalize(o) {
const address = o.address.split('\n')[4].trim();
return Object.assign(o, { address });
}
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
const isBlacklistedDistrict =
config.blacklistedDistrics.length === 0 ? false : utils.isOneOf(o.description, config.blacklistedDistrics);
return !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
}
const kleinanzeigen = {
name: 'kleinanzeigen',
enabled: config.sources.kleinanzeigen.enabled,
url: config.sources.kleinanzeigen.url,
crawlContainer: '#srchrslt-adtable .ad-listitem',
crawlFields: {
id: '.aditem@data-adid | int',
price: '.aditem-details strong | removeNewline | trim',
size: '.aditem-main .text-module-end span:nth-child(2) | removeNewline | trim',
title: '.aditem-main .text-module-begin a | removeNewline | trim',
link: '.aditem-main .text-module-begin a@href | removeNewline | trim',
description: '.aditem-main p:not(.text-module-end) | removeNewline | trim',
address: '.aditem-details'
},
paginate: '#srchrslt-pagination .pagination-next@href',
normalize: normalize,
filter: applyBlacklist
};
module.exports = new Fredy(kleinanzeigen);

42
lib/services/scraper.js Executable file
View File

@@ -0,0 +1,42 @@
const makeDriver = require('request-x-ray');
const config = require('../../conf/config.json');
const Xray = require('x-ray');
class Scraper {
constructor() {
const filters = {
removeNewline: this._removeNewline,
trim: this._trim,
int: this._int
};
const driver = makeDriver({
headers: {
'User-Agent': config.userAgent
}
});
const xray = Xray({ filters });
xray.driver(driver);
this.xray = xray;
}
get x() {
return this.xray;
}
_removeNewline(value) {
return typeof value === 'string' ? value.replace(/\\n/g, '') : value;
}
_trim(value) {
return typeof value === 'string' ? value.replace(/\s+/g, ' ').trim() : value;
}
_int(value) {
return typeof value === 'string' ? parseInt(value, 10) : value;
}
}
module.exports = new Scraper().x;

25
lib/services/stats.js Normal file
View File

@@ -0,0 +1,25 @@
const config = require('../../conf/config.json');
let lastScrape = {};
if (config.enableStats) {
const http = require('http');
http
.createServer((req, res) => {
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(
JSON.stringify({
config,
lastScrape
})
);
})
.listen(config.statsPort, '127.0.0.1');
}
exports.setLastScrape = (serviceName, numberFound) => {
lastScrape[serviceName] = lastScrape[serviceName] || [];
lastScrape[serviceName].push({
scapeTime: new Date().toString(),
numberFound: numberFound
});
};

36
lib/services/store.js Executable file
View File

@@ -0,0 +1,36 @@
const path = require('path');
const DB_PATH = path.dirname(require.main.filename) + '/conf/store.json';
const FileAsync = require('lowdb/adapters/FileAsync');
const adapter = new FileAsync(DB_PATH);
const low = require('lowdb');
const lowdb = low(adapter);
class Store {
constructor(name) {
this._name = name;
this._db = null;
}
get warmup() {
return new Promise(resolve => {
lowdb.then(db => {
this._db = db;
resolve();
});
});
}
set knownListings(value) {
if (!Array.isArray(value)) throw Error('Not a valid array');
return this._db.set(this._name, value).write();
}
get knownListings() {
return this._db.get(this._name).value() || [];
}
}
module.exports = Store;

8
lib/utils.js Executable file
View File

@@ -0,0 +1,8 @@
function isOneOf (word, arr) {
const expression = String.raw`\b(${arr.join('|')})\b`;
const blacklist = new RegExp(expression, 'ig');
return blacklist.test(word)
}
module.exports = { isOneOf };