fixing immonet and neubaukompass

This commit is contained in:
orangecoding
2021-03-01 17:29:52 +01:00
parent a3a648928f
commit 9898b65ec8
2 changed files with 8 additions and 6 deletions

View File

@@ -8,8 +8,10 @@ function normalize(o) {
const price = o.price.replace('Kaufpreis ', '');
const address = o.address.split(' • ')[1];
const title = o.title || 'No title available';
return Object.assign(o, { id, address, price, size, title });
//normally we would just read the link from the source, but immonet decided to trick user by adding a click listener instead of
//a href to do some weird reporting. (Very user friendly for handicaped ppl... not)
const link = `https://www.immonet.de/angebot/${id}`;
return Object.assign(o, { id, address, price, size, title, link });
}
function applyBlacklist(o) {
@@ -27,7 +29,6 @@ const config = {
price: 'div[id*="selPrice_"] | trim',
size: 'div[id*="selArea_"] | trim',
title: '.item a img@title',
link: 'a[id*="lnkImgToDetails_"]@href',
address: '.item .box-25 .ellipsis .text-100 | removeNewline | trim',
},
paginate: '#idResultList .margin-bottom-6.margin-bottom-sm-12 .panel a.pull-right@href',

View File

@@ -15,9 +15,10 @@ const config = {
crawlContainer: '.nbk-container >div article',
crawlFields: {
id: '@id',
title: 'div.nbk-p-2 > h3 a@title | removeNewline | trim',
link: 'div.nbk-p-2 > h3 > a@href',
address: 'div.nbk-p-2 > p | removeNewline | trim',
title: 'a.nbk-truncate@title | removeNewline | trim',
link: 'a.nbk-truncate@href',
address: 'p.nbk-truncate | removeNewline | trim',
price: 'p.nbk-mb-0 | removeNewline | trim',
},
paginate: '.numbered-pager__bottom .numbered-pager--info li:nth-child(2) a@href',
normalize: normalize,