2018-01-20 20:23:27 +01:00
|
|
|
const utils = require('../utils');
|
|
|
|
|
|
2020-02-26 09:05:20 +01:00
|
|
|
let appliedBlackList = [];
|
|
|
|
|
let appliedBlacklistedDistricts = [];
|
|
|
|
|
|
2018-01-20 20:23:27 +01:00
|
|
|
function normalize(o) {
|
|
|
|
|
const id = o.id
|
|
|
|
|
.split('/')
|
|
|
|
|
.filter(Boolean)
|
|
|
|
|
.reverse()[0];
|
2020-03-15 16:10:49 +01:00
|
|
|
const price = o.price == null ? 'unknown' : o.price.trim().replace('Preis', '');
|
|
|
|
|
let size = o.size == null ? 'unknown' : o.size.replace('Wohnfläche: ', '').replace('ca. ', '');
|
2018-01-20 20:23:27 +01:00
|
|
|
size += ' / ' + o.rooms;
|
|
|
|
|
const address = '---';
|
|
|
|
|
|
|
|
|
|
return Object.assign(o, { id, price, size, address });
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function applyBlacklist(o) {
|
2020-02-26 09:05:20 +01:00
|
|
|
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
|
|
|
|
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
2018-01-20 20:23:27 +01:00
|
|
|
|
|
|
|
|
const isBlacklistedDistrict =
|
2020-03-15 16:10:49 +01:00
|
|
|
appliedBlacklistedDistricts.length === 0 ? false : utils.isOneOf(o.title, appliedBlacklistedDistricts);
|
2018-01-20 20:23:27 +01:00
|
|
|
|
|
|
|
|
return !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-26 09:05:20 +01:00
|
|
|
const config = {
|
|
|
|
|
enabled: null,
|
|
|
|
|
url: null,
|
2018-01-20 20:23:27 +01:00
|
|
|
crawlContainer: '#resultList .resultitem-content-container',
|
|
|
|
|
crawlFields: {
|
|
|
|
|
id: '.resultitem-content-container a@href',
|
2020-03-15 16:10:49 +01:00
|
|
|
price: '.description .rent | removeNewline | trim',
|
2018-01-20 20:23:27 +01:00
|
|
|
title: '.resultitem-content-container a@title',
|
|
|
|
|
link: '.resultitem-content-container a@href',
|
|
|
|
|
rooms: '.resultitem-content-container .no-of-rooms | removeNewline | trim',
|
|
|
|
|
size: '.resultitem-content-container .living-area | removeNewline | trim'
|
|
|
|
|
},
|
|
|
|
|
paginate: '.markt_pagination_pageLinkNext .markt_pagination_link@href',
|
|
|
|
|
normalize: normalize,
|
|
|
|
|
filter: applyBlacklist
|
|
|
|
|
};
|
|
|
|
|
|
2020-02-26 09:05:20 +01:00
|
|
|
exports.init = (sourceConfig, blacklist, blacklistedDistricts) => {
|
2020-03-15 16:10:49 +01:00
|
|
|
config.enabled = sourceConfig.enabled;
|
|
|
|
|
config.url = sourceConfig.url;
|
|
|
|
|
appliedBlackList = blacklist;
|
|
|
|
|
appliedBlacklistedDistricts = blacklistedDistricts;
|
2020-02-26 09:05:20 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//must match the id of the source given in the config!
|
|
|
|
|
exports.id = () => 'kalaydo';
|
|
|
|
|
|
|
|
|
|
exports.config = config;
|