adding wohnungsboerse provider

This commit is contained in:
orangecoding
2026-01-09 11:37:03 +01:00
parent 2926ee7e08
commit 7fd8be07a2
5 changed files with 180 additions and 75 deletions

View File

@@ -0,0 +1,56 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
import * as utils from '../utils.js';
let appliedBlackList = [];
function normalize(o) {
const id = o.link.split('/').pop();
const price = o.price;
const size = o.size;
const rooms = o.rooms;
const [city = '', part = ''] = (o.description || '').split('-').map((v) => v.trim());
const address = `${part}, ${city}`;
return Object.assign(o, { id, price, size, rooms, address });
}
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
return o.id != null && o.title != null && titleNotBlacklisted && descNotBlacklisted && o.link.startsWith(o.link);
}
const config = {
url: null,
sortByDateParam: null,
waitForSelector: 'body',
crawlContainer: '.search_result_container > a',
crawlFields: {
id: '*',
title: 'h3 | trim',
price: 'dl:nth-of-type(1) dd | removeNewline | trim',
rooms: 'dl:nth-of-type(2) dd | removeNewline | trim',
size: 'dl:nth-of-type(3) dd | removeNewline | trim',
description: 'div.before\\:icon-location_marker | trim',
link: '@href',
imageUrl: 'img@src',
},
normalize: normalize,
filter: applyBlacklist,
};
export const init = (sourceConfig, blacklistTerms) => {
config.url = sourceConfig.url;
appliedBlackList = blacklistTerms || [];
};
export const metaInformation = {
name: 'Wohnungsboerse',
baseUrl: 'https://www.wohnungsboerse.net',
id: 'wohnungsboerse',
};
export { config };