fixing crawling of immonet

This commit is contained in:
Christian Kellner
2018-06-10 10:36:29 +02:00
parent 8a1d86c90e
commit bb08d036af

View File

@@ -3,36 +3,37 @@ const Fredy = require('../fredy');
const utils = require('../utils');
function normalize(o) {
const id = parseInt(o.id.split('_')[1], 10);
const title = o.title.replace('NEU ', '');
const address = o.address.split(' - ')[1];
const id = parseInt(o.id.substring(o.id.indexOf('_') + 1, o.id.length));
const size = o.size.replace('Wohnfläche ', '');
const price = o.price.replace('Kaufpreis ', '');
const address = o.address.split(' • ')[1];
return Object.assign(o, { id, title, address });
return Object.assign(o, {id, address, price, size});
}
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
return titleNotBlacklisted && descNotBlacklisted;
return titleNotBlacklisted && descNotBlacklisted;
}
const immonet = {
name: 'immonet',
enabled: config.sources.immonet.enabled,
url: config.sources.immonet.url,
crawlContainer: '#idResultList .search-object',
crawlFields: {
id: '.search-info a@id',
price: '#keyfacts-bar div:first-child span',
size: '#keyfacts-bar div:nth-child(2) .text-primary-highlight',
title: '.search-info a | removeNewline | trim',
link: '.search-info a@href',
address: '.search-info p | removeNewline | trim'
},
paginate: '#idResultList .margin-bottom-6.margin-bottom-sm-12 .panel a.pull-right@href',
normalize: normalize,
filter: applyBlacklist
name: 'immonet',
enabled: config.sources.immonet.enabled,
url: config.sources.immonet.url,
crawlContainer: '#result-list-stage .item',
crawlFields: {
id: '@id',
price: 'div[id*="selPrice_"] | trim',
size: 'div[id*="selArea_"] | trim',
title: '.item a img@title',
link: '.item a@href',
address: '.item .box-25 .ellipsis .text-100 | removeNewline | trim'
},
paginate: '#idResultList .margin-bottom-6.margin-bottom-sm-12 .panel a.pull-right@href',
normalize: normalize,
filter: applyBlacklist
};
module.exports = new Fredy(immonet);