mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
upgrading dependencies, fixing immowelt, using hash of price and id as unique identifier for listings
This commit is contained in:
@@ -1,37 +1,42 @@
|
||||
import utils from '../utils.js';
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
return o;
|
||||
const id = buildHash(o.id, o.price);
|
||||
return Object.assign(o, {id});
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: "div[class^='EstateItem-']",
|
||||
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
|
||||
crawlFields: {
|
||||
id: 'a@id',
|
||||
price: "div[class^='KeyFacts-'] [data-test='price'] | removeNewline | trim",
|
||||
size: "div[class^='KeyFacts-'] [data-test='area'] | removeNewline | trim",
|
||||
title: "div[class^='FactsMain-'] h2",
|
||||
link: 'a@href',
|
||||
address: "div[class^='estateFacts-'] span | removeNewline | trim",
|
||||
},
|
||||
paginate: '#pnlPaging #nlbPlus@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
url: null,
|
||||
crawlContainer: 'div[data-testid="serp-card-testid"]',
|
||||
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
|
||||
crawlFields: {
|
||||
id: 'a@id',
|
||||
price: 'div[data-testid="cardmfe-price-testid"] | removeNewline | trim',
|
||||
size: 'div[data-testid="cardmfe-keyfacts-testid"] | removeNewline | trim',
|
||||
title: '.css-1cbj9xw',
|
||||
link: 'a@href',
|
||||
address: 'div[data-testid="cardmfe-description-box-address"] | removeNewline | trim',
|
||||
},
|
||||
paginate: '#pnlPaging #nlbPlus@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
};
|
||||
export const metaInformation = {
|
||||
name: 'Immowelt',
|
||||
baseUrl: 'https://www.immowelt.de/',
|
||||
id: 'immowelt',
|
||||
name: 'Immowelt',
|
||||
baseUrl: 'https://www.immowelt.de/',
|
||||
id: 'immowelt',
|
||||
};
|
||||
export { config };
|
||||
export {config};
|
||||
|
||||
Reference in New Issue
Block a user