upgrading dependencies, fixing immowelt, using hash of price and id as unique identifier for listings

This commit is contained in:
Christian Kellner
2024-09-05 13:34:14 +02:00
parent 1bf012f13e
commit 35feb772d7
15 changed files with 680 additions and 446 deletions

View File

@@ -1,37 +1,42 @@
import utils from '../utils.js';
import utils, {buildHash} from '../utils.js';
let appliedBlackList = [];
function normalize(o) {
return o;
const id = buildHash(o.id, o.price);
return Object.assign(o, {id});
}
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted;
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted;
}
const config = {
url: null,
crawlContainer: "div[class^='EstateItem-']",
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
crawlFields: {
id: 'a@id',
price: "div[class^='KeyFacts-'] [data-test='price'] | removeNewline | trim",
size: "div[class^='KeyFacts-'] [data-test='area'] | removeNewline | trim",
title: "div[class^='FactsMain-'] h2",
link: 'a@href',
address: "div[class^='estateFacts-'] span | removeNewline | trim",
},
paginate: '#pnlPaging #nlbPlus@href',
normalize: normalize,
filter: applyBlacklist,
url: null,
crawlContainer: 'div[data-testid="serp-card-testid"]',
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
crawlFields: {
id: 'a@id',
price: 'div[data-testid="cardmfe-price-testid"] | removeNewline | trim',
size: 'div[data-testid="cardmfe-keyfacts-testid"] | removeNewline | trim',
title: '.css-1cbj9xw',
link: 'a@href',
address: 'div[data-testid="cardmfe-description-box-address"] | removeNewline | trim',
},
paginate: '#pnlPaging #nlbPlus@href',
normalize: normalize,
filter: applyBlacklist,
};
export const init = (sourceConfig, blacklist) => {
config.enabled = sourceConfig.enabled;
config.url = sourceConfig.url;
appliedBlackList = blacklist || [];
config.enabled = sourceConfig.enabled;
config.url = sourceConfig.url;
appliedBlackList = blacklist || [];
};
export const metaInformation = {
name: 'Immowelt',
baseUrl: 'https://www.immowelt.de/',
id: 'immowelt',
name: 'Immowelt',
baseUrl: 'https://www.immowelt.de/',
id: 'immowelt',
};
export { config };
export {config};