2018-01-20 20:23:27 +01:00
|
|
|
const utils = require('../utils');
|
|
|
|
|
|
2020-02-26 09:05:20 +01:00
|
|
|
let appliedBlackList = [];
|
|
|
|
|
|
2018-01-20 20:23:27 +01:00
|
|
|
function normalize(o) {
|
2018-06-10 11:25:48 +02:00
|
|
|
const size = o.size == null ? '--- m²' : o.size.split('Wohnfläche')[1].replace(' (ca.) ', '');
|
2018-01-20 20:23:27 +01:00
|
|
|
const address = o.address;
|
|
|
|
|
|
|
|
|
|
return Object.assign(o, { size, address });
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function applyBlacklist(o) {
|
2020-02-26 09:05:20 +01:00
|
|
|
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
|
|
|
|
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
2018-01-20 20:23:27 +01:00
|
|
|
|
|
|
|
|
return titleNotBlacklisted && descNotBlacklisted;
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-26 09:05:20 +01:00
|
|
|
const config = {
|
|
|
|
|
enabled: null,
|
|
|
|
|
url: null,
|
2018-01-20 20:23:27 +01:00
|
|
|
crawlContainer: '.immoliste .js-object.listitem_wrap ',
|
|
|
|
|
crawlFields: {
|
|
|
|
|
id: '@data-estateid | int',
|
|
|
|
|
price: '.hardfacts_3 strong | removeNewline | trim',
|
2018-06-10 10:50:45 +02:00
|
|
|
size: '.js-object.listitem_wrap .hardfacts_3 div:nth-child(2)| removeNewline | trim',
|
2018-01-20 20:23:27 +01:00
|
|
|
title: '.listcontent.clear h2',
|
|
|
|
|
link: 'a@href',
|
2018-06-10 10:50:45 +02:00
|
|
|
address: '.listcontent .details .listlocation| removeNewline | trim'
|
2018-01-20 20:23:27 +01:00
|
|
|
},
|
|
|
|
|
paginate: '#pnlPaging #nlbPlus@href',
|
|
|
|
|
normalize: normalize,
|
|
|
|
|
filter: applyBlacklist
|
|
|
|
|
};
|
|
|
|
|
|
2020-02-26 09:05:20 +01:00
|
|
|
exports.init = (sourceConfig, blacklist) => {
|
|
|
|
|
config.enabled = sourceConfig.enabled;
|
|
|
|
|
config.url = sourceConfig.url;
|
|
|
|
|
appliedBlackList = blacklist;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//must match the id of the source given in the config!
|
|
|
|
|
exports.id = () => 'immowelt';
|
|
|
|
|
|
|
|
|
|
exports.config = config;
|