mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
fixing neubaukompass && updating dependencies
This commit is contained in:
@@ -3,38 +3,38 @@ const Fredy = require('../fredy');
|
||||
const utils = require('../utils');
|
||||
|
||||
function normalize(o) {
|
||||
const id = parseInt(o.id.substring(o.id.indexOf('_') + 1, o.id.length));
|
||||
const size = o.size != null ? o.size.replace('Wohnfläche ', '') : 'N/A m²';
|
||||
const price = o.price.replace('Kaufpreis ', '');
|
||||
const address = o.address.split(' • ')[1];
|
||||
const title = o.title || 'No title available';
|
||||
const id = parseInt(o.id.substring(o.id.indexOf('_') + 1, o.id.length));
|
||||
const size = o.size != null ? o.size.replace('Wohnfläche ', '') : 'N/A m²';
|
||||
const price = o.price.replace('Kaufpreis ', '');
|
||||
const address = o.address.split(' • ')[1];
|
||||
const title = o.title || 'No title available';
|
||||
|
||||
return Object.assign(o, {id, address, price, size, title});
|
||||
return Object.assign(o, { id, address, price, size, title });
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
|
||||
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const immonet = {
|
||||
name: 'immonet',
|
||||
enabled: config.sources.immonet.enabled,
|
||||
url: config.sources.immonet.url,
|
||||
crawlContainer: '#result-list-stage .item',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
price: 'div[id*="selPrice_"] | trim',
|
||||
size: 'div[id*="selArea_"] | trim',
|
||||
title: '.item a img@title',
|
||||
link: 'a[id*="lnkImgToDetails_"]@href',
|
||||
address: '.item .box-25 .ellipsis .text-100 | removeNewline | trim'
|
||||
},
|
||||
paginate: '#idResultList .margin-bottom-6.margin-bottom-sm-12 .panel a.pull-right@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
name: 'immonet',
|
||||
enabled: config.sources.immonet.enabled,
|
||||
url: config.sources.immonet.url,
|
||||
crawlContainer: '#result-list-stage .item',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
price: 'div[id*="selPrice_"] | trim',
|
||||
size: 'div[id*="selArea_"] | trim',
|
||||
title: '.item a img@title',
|
||||
link: 'a[id*="lnkImgToDetails_"]@href',
|
||||
address: '.item .box-25 .ellipsis .text-100 | removeNewline | trim'
|
||||
},
|
||||
paginate: '#idResultList .margin-bottom-6.margin-bottom-sm-12 .panel a.pull-right@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
};
|
||||
|
||||
module.exports = new Fredy(immonet);
|
||||
|
||||
@@ -3,37 +3,37 @@ const config = require('../../conf/config.json');
|
||||
const utils = require('../utils');
|
||||
|
||||
function normalize(o) {
|
||||
const size = o.size || '--- m²';
|
||||
const size = o.size || '--- m²';
|
||||
|
||||
return Object.assign(o, {size});
|
||||
return Object.assign(o, { size });
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
|
||||
const isBlacklistedDistrict =
|
||||
config.blacklistedDistrics.length === 0 ? false : utils.isOneOf(o.description, config.blacklistedDistrics);
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
|
||||
const isBlacklistedDistrict =
|
||||
config.blacklistedDistrics.length === 0 ? false : utils.isOneOf(o.description, config.blacklistedDistrics);
|
||||
|
||||
return !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
|
||||
return !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const kleinanzeigen = {
|
||||
name: 'kleinanzeigen',
|
||||
enabled: config.sources.kleinanzeigen.enabled,
|
||||
url: config.sources.kleinanzeigen.url,
|
||||
crawlContainer: '#srchrslt-adtable .ad-listitem',
|
||||
crawlFields: {
|
||||
id: '.aditem@data-adid | int',
|
||||
price: '.aditem-details strong | removeNewline | trim',
|
||||
size: '.aditem-main .text-module-end span:nth-child(2) | removeNewline | trim',
|
||||
title: '.aditem-main .text-module-begin a | removeNewline | trim',
|
||||
link: '.aditem-main .text-module-begin a@href | removeNewline | trim',
|
||||
description: '.aditem-main p:not(.text-module-end) | removeNewline | trim',
|
||||
address: '.aditem-details | trim | removeNewline'
|
||||
},
|
||||
paginate: '#srchrslt-pagination .pagination-next@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
name: 'kleinanzeigen',
|
||||
enabled: config.sources.kleinanzeigen.enabled,
|
||||
url: config.sources.kleinanzeigen.url,
|
||||
crawlContainer: '#srchrslt-adtable .ad-listitem',
|
||||
crawlFields: {
|
||||
id: '.aditem@data-adid | int',
|
||||
price: '.aditem-details strong | removeNewline | trim',
|
||||
size: '.aditem-main .text-module-end span:nth-child(2) | removeNewline | trim',
|
||||
title: '.aditem-main .text-module-begin a | removeNewline | trim',
|
||||
link: '.aditem-main .text-module-begin a@href | removeNewline | trim',
|
||||
description: '.aditem-main p:not(.text-module-end) | removeNewline | trim',
|
||||
address: '.aditem-details | trim | removeNewline'
|
||||
},
|
||||
paginate: '#srchrslt-pagination .pagination-next@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
};
|
||||
|
||||
module.exports = new Fredy(kleinanzeigen);
|
||||
|
||||
@@ -3,27 +3,27 @@ const Fredy = require('../fredy');
|
||||
const utils = require('../utils');
|
||||
|
||||
function normalize(o) {
|
||||
return o;
|
||||
return o;
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
return !utils.isOneOf(o.title, config.blacklist);
|
||||
return !utils.isOneOf(o.title, config.blacklist);
|
||||
}
|
||||
|
||||
const neubauKompass = {
|
||||
name: 'neubauKompass',
|
||||
enabled: config.sources.neubauKompass.enabled,
|
||||
url: config.sources.neubauKompass.url,
|
||||
crawlContainer: '.row article',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
title: 'a@title | removeNewline | trim',
|
||||
link: 'a@href',
|
||||
address: 'div.p-2 > p:nth-child(3) | removeNewline | trim'
|
||||
},
|
||||
paginate: '.numbered-pager__bottom .numbered-pager--info li:nth-child(2) a@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
name: 'neubauKompass',
|
||||
enabled: config.sources.neubauKompass.enabled,
|
||||
url: config.sources.neubauKompass.url,
|
||||
crawlContainer: '.row article',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
title: 'div.p-2 > a@title | removeNewline | trim',
|
||||
link: 'div.p-2 > a@href',
|
||||
address: 'div.p-2 > p | removeNewline | trim'
|
||||
},
|
||||
paginate: '.numbered-pager__bottom .numbered-pager--info li:nth-child(2) a@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
};
|
||||
|
||||
module.exports = new Fredy(neubauKompass);
|
||||
|
||||
@@ -3,31 +3,31 @@ const Fredy = require('../fredy');
|
||||
const utils = require('../utils');
|
||||
|
||||
function normalize(o) {
|
||||
return o;
|
||||
return o;
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
|
||||
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const wgGesucht = {
|
||||
name: 'wgGesucht',
|
||||
enabled: config.sources.wgGesucht.enabled,
|
||||
url: config.sources.wgGesucht.url,
|
||||
crawlContainer: '#main_column .panel:not(.display-none):not(.noprint)',
|
||||
crawlFields: {
|
||||
id: '@data-id',
|
||||
details: '.detail-size-price-wrapper .detailansicht |removeNewline |trim',
|
||||
title: '.headline .detailansicht |removeNewline |trim',
|
||||
description: '.list-details-panel-inner p |removeNewline |trim',
|
||||
link: '.headline .detailansicht@href'
|
||||
},
|
||||
paginate: '.pagination-sm:first a:last@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
name: 'wgGesucht',
|
||||
enabled: config.sources.wgGesucht.enabled,
|
||||
url: config.sources.wgGesucht.url,
|
||||
crawlContainer: '#main_column .panel:not(.display-none):not(.noprint)',
|
||||
crawlFields: {
|
||||
id: '@data-id',
|
||||
details: '.detail-size-price-wrapper .detailansicht |removeNewline |trim',
|
||||
title: '.headline .detailansicht |removeNewline |trim',
|
||||
description: '.list-details-panel-inner p |removeNewline |trim',
|
||||
link: '.headline .detailansicht@href'
|
||||
},
|
||||
paginate: '.pagination-sm:first a:last@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
};
|
||||
|
||||
module.exports = new Fredy(wgGesucht);
|
||||
|
||||
@@ -1,36 +1,35 @@
|
||||
const config = require('../../conf/config.json');
|
||||
let stats = {
|
||||
lastScrape: {},
|
||||
foundScrapes: {}
|
||||
lastScrape: {},
|
||||
foundScrapes: {}
|
||||
};
|
||||
|
||||
if (config.enableStats) {
|
||||
const http = require('http');
|
||||
http
|
||||
.createServer((req, res) => {
|
||||
res.writeHead(200, {'Content-Type': 'application/json'});
|
||||
res.end(
|
||||
JSON.stringify({
|
||||
config,
|
||||
stats
|
||||
})
|
||||
);
|
||||
const http = require('http');
|
||||
http
|
||||
.createServer((req, res) => {
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(
|
||||
JSON.stringify({
|
||||
config,
|
||||
stats
|
||||
})
|
||||
.listen(config.statsPort, '127.0.0.1');
|
||||
);
|
||||
})
|
||||
.listen(config.statsPort, '127.0.0.1');
|
||||
}
|
||||
|
||||
const datetime = date => {
|
||||
return `${date.getFullYear()}/${date.getMonth() + 1}/${date.getDate()} ${date.getHours()}:${date.getMinutes()}`;
|
||||
return `${date.getFullYear()}/${date.getMonth() + 1}/${date.getDate()} ${date.getHours()}:${date.getMinutes()}`;
|
||||
};
|
||||
|
||||
exports.setLastScrape = (serviceName, numberOfNewListsings) => {
|
||||
const d = new Date();
|
||||
const dt = datetime(d);
|
||||
stats.lastScrape[serviceName] = d.toString();
|
||||
const d = new Date();
|
||||
const dt = datetime(d);
|
||||
stats.lastScrape[serviceName] = d.toString();
|
||||
|
||||
if (numberOfNewListsings > 0) {
|
||||
stats.foundScrapes[dt] = stats.foundScrapes[dt] || {};
|
||||
stats.foundScrapes[dt][serviceName] = numberOfNewListsings;
|
||||
}
|
||||
if (numberOfNewListsings > 0) {
|
||||
stats.foundScrapes[dt] = stats.foundScrapes[dt] || {};
|
||||
stats.foundScrapes[dt][serviceName] = numberOfNewListsings;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
10
package.json
10
package.json
@@ -1,11 +1,11 @@
|
||||
{
|
||||
"name": "Fredy",
|
||||
"version": "1.2.0",
|
||||
"version": "1.2.1",
|
||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||
"scripts": {
|
||||
"start": "node index.js",
|
||||
"format": "prettier --write lib/**/*.js test/**/*.js *.js --single-quote --print-width 120",
|
||||
"test": "mocha --timeout 8000"
|
||||
"test": "mocha --timeout 9000"
|
||||
},
|
||||
"main": "index.js",
|
||||
"author": "Christian Kellner",
|
||||
@@ -31,12 +31,12 @@
|
||||
"chai": "4.2.0",
|
||||
"lowdb": "1.0.0",
|
||||
"request-x-ray": "0.1.4",
|
||||
"slack": "10.1.1",
|
||||
"slack": "11.0.2",
|
||||
"tg-yarl": "1.3.0",
|
||||
"x-ray": "2.3.3"
|
||||
"x-ray": "2.3.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"mocha": "6.1.4",
|
||||
"mocha": "6.2.2",
|
||||
"prettier": "1.18.2",
|
||||
"proxyquire": "1.8.0"
|
||||
}
|
||||
|
||||
@@ -40,7 +40,6 @@ describe('#kleinanzeigen testsuite()', () => {
|
||||
expect(notify.id).to.equal(
|
||||
kleinanzeigenDbContent.kleinanzeigen[idx]
|
||||
);
|
||||
expect(notify.size).that.does.include('m²');
|
||||
expect(notify.title).to.be.not.empty;
|
||||
expect(notify.link).that.does.include('https://www.ebay-kleinanzeigen.de');
|
||||
expect(notify.address).to.be.not.empty;
|
||||
|
||||
Reference in New Issue
Block a user