Files
fredy/lib/provider/wgGesucht.js
Christian Kellner 214e714c03 Puppeteer rewrite (#119)
* Moving to puppeteer | removing scrapingAnt
2024-12-17 12:38:28 +01:00

43 lines
1.3 KiB
JavaScript
Executable File

import utils, {buildHash} from '../utils.js';
let appliedBlackList = [];
function normalize(o) {
const id = buildHash(o.id, o.price);
return Object.assign(o, {id});
}
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
return o.id != null && titleNotBlacklisted && descNotBlacklisted;
}
const config = {
url: null,
crawlContainer: '#main_column .wgg_card',
sortByDateParam: 'sort_column=0&sort_order=0',
waitForSelector: 'body',
crawlFields: {
id: '@data-id',
details: '.row .noprint .col-xs-11 |removeNewline |trim',
price: '.middle .col-xs-3 |removeNewline |trim',
size: '.middle .text-right |removeNewline |trim',
title: '.truncate_title a |removeNewline |trim',
link: '.truncate_title a@href',
},
normalize: normalize,
filter: applyBlacklist,
};
export const init = (sourceConfig, blacklist) => {
config.enabled = sourceConfig.enabled;
config.url = sourceConfig.url;
appliedBlackList = blacklist || [];
};
export const metaInformation = {
name: 'Wg gesucht',
baseUrl: 'https://www.wg-gesucht.de/',
id: 'wgGesucht',
};
export {config};