mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
fixing crawling of immonet
This commit is contained in:
@@ -3,36 +3,37 @@ const Fredy = require('../fredy');
|
||||
const utils = require('../utils');
|
||||
|
||||
function normalize(o) {
|
||||
const id = parseInt(o.id.split('_')[1], 10);
|
||||
const title = o.title.replace('NEU ', '');
|
||||
const address = o.address.split(' - ')[1];
|
||||
const id = parseInt(o.id.substring(o.id.indexOf('_') + 1, o.id.length));
|
||||
const size = o.size.replace('Wohnfläche ', '');
|
||||
const price = o.price.replace('Kaufpreis ', '');
|
||||
const address = o.address.split(' • ')[1];
|
||||
|
||||
return Object.assign(o, { id, title, address });
|
||||
return Object.assign(o, {id, address, price, size});
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, config.blacklist);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, config.blacklist);
|
||||
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const immonet = {
|
||||
name: 'immonet',
|
||||
enabled: config.sources.immonet.enabled,
|
||||
url: config.sources.immonet.url,
|
||||
crawlContainer: '#idResultList .search-object',
|
||||
crawlFields: {
|
||||
id: '.search-info a@id',
|
||||
price: '#keyfacts-bar div:first-child span',
|
||||
size: '#keyfacts-bar div:nth-child(2) .text-primary-highlight',
|
||||
title: '.search-info a | removeNewline | trim',
|
||||
link: '.search-info a@href',
|
||||
address: '.search-info p | removeNewline | trim'
|
||||
},
|
||||
paginate: '#idResultList .margin-bottom-6.margin-bottom-sm-12 .panel a.pull-right@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
name: 'immonet',
|
||||
enabled: config.sources.immonet.enabled,
|
||||
url: config.sources.immonet.url,
|
||||
crawlContainer: '#result-list-stage .item',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
price: 'div[id*="selPrice_"] | trim',
|
||||
size: 'div[id*="selArea_"] | trim',
|
||||
title: '.item a img@title',
|
||||
link: '.item a@href',
|
||||
address: '.item .box-25 .ellipsis .text-100 | removeNewline | trim'
|
||||
},
|
||||
paginate: '#idResultList .margin-bottom-6.margin-bottom-sm-12 .panel a.pull-right@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist
|
||||
};
|
||||
|
||||
module.exports = new Fredy(immonet);
|
||||
|
||||
Reference in New Issue
Block a user