2025-12-11 10:40:55 +01:00
|
|
|
/*
|
2026-01-12 15:00:36 +01:00
|
|
|
* Copyright (c) 2026 by Christian Kellner.
|
2025-12-11 10:40:55 +01:00
|
|
|
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
|
|
|
|
|
*/
|
|
|
|
|
|
2025-09-22 09:57:50 +02:00
|
|
|
import { buildHash, isOneOf } from '../utils.js';
|
|
|
|
|
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
2026-04-12 09:17:23 +02:00
|
|
|
import { extractNumber } from '../utils/extract-number.js';
|
|
|
|
|
/** @import { ParsedListing } from '../types/listing.js' */
|
|
|
|
|
/** @import { ProviderConfig } from '../types/providerConfig.js' */
|
|
|
|
|
|
2020-02-26 09:05:20 +01:00
|
|
|
let appliedBlackList = [];
|
2024-09-05 13:34:14 +02:00
|
|
|
|
2026-04-12 09:17:23 +02:00
|
|
|
/**
|
|
|
|
|
* @param {any} o
|
|
|
|
|
* @returns {ParsedListing}
|
|
|
|
|
*/
|
2018-01-25 16:38:39 +01:00
|
|
|
function normalize(o) {
|
2025-08-30 21:21:34 +02:00
|
|
|
const baseUrl = 'https://www.1a-immobilienmarkt.de';
|
|
|
|
|
const link = `${baseUrl}/expose/${o.id}.html`;
|
2024-09-29 16:58:01 +02:00
|
|
|
const price = normalizePrice(o.price);
|
|
|
|
|
const id = buildHash(o.id, price);
|
2025-08-30 21:21:34 +02:00
|
|
|
const image = baseUrl + o.image;
|
2025-09-18 15:38:23 +02:00
|
|
|
const address = o.address == null ? null : o.address.trim().replaceAll('/', ',');
|
2026-04-12 09:17:23 +02:00
|
|
|
return {
|
|
|
|
|
id,
|
|
|
|
|
link,
|
|
|
|
|
title: o.title || '',
|
|
|
|
|
price: extractNumber(price),
|
|
|
|
|
size: extractNumber(o.size),
|
|
|
|
|
rooms: extractNumber(o.rooms),
|
|
|
|
|
address,
|
|
|
|
|
image,
|
|
|
|
|
description: undefined,
|
|
|
|
|
};
|
2018-01-25 16:38:39 +01:00
|
|
|
}
|
2024-09-05 13:34:14 +02:00
|
|
|
|
2024-09-29 16:58:01 +02:00
|
|
|
/**
|
|
|
|
|
* einsAImmobilien sometimes use a weird pricing label such as `775.700,00 EUR Kaufpreis ab 2.475 € mtl`.
|
|
|
|
|
* Make sure to extract only the actual price out of the string.
|
|
|
|
|
* @param price
|
|
|
|
|
* @returns {*}
|
|
|
|
|
*/
|
|
|
|
|
function normalizePrice(price) {
|
|
|
|
|
if (price == null) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
const regex = /(\d{1,3}(?:\.\d{3})*,\d{2})\s?(EUR|€)/g;
|
|
|
|
|
const result = price.match(regex);
|
|
|
|
|
if (result == null || result.length === 0) {
|
|
|
|
|
return price;
|
|
|
|
|
}
|
|
|
|
|
return result[0];
|
|
|
|
|
}
|
2026-04-12 09:17:23 +02:00
|
|
|
/**
|
|
|
|
|
* @param {ParsedListing} o
|
|
|
|
|
* @returns {boolean}
|
|
|
|
|
*/
|
2018-01-25 16:38:39 +01:00
|
|
|
function applyBlacklist(o) {
|
2025-09-22 09:57:50 +02:00
|
|
|
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
|
|
|
|
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
2018-01-25 16:38:39 +01:00
|
|
|
return titleNotBlacklisted && descNotBlacklisted;
|
|
|
|
|
}
|
2024-09-05 13:34:14 +02:00
|
|
|
|
2026-04-12 09:17:23 +02:00
|
|
|
/** @type {ProviderConfig} */
|
2020-02-26 09:05:20 +01:00
|
|
|
const config = {
|
2026-04-12 09:17:23 +02:00
|
|
|
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
2020-02-26 09:05:20 +01:00
|
|
|
url: null,
|
2018-01-25 16:38:39 +01:00
|
|
|
crawlContainer: '.tabelle',
|
2021-11-26 21:02:09 +01:00
|
|
|
sortByDateParam: 'sort_type=newest',
|
2024-12-17 12:38:28 +01:00
|
|
|
waitForSelector: 'body',
|
2018-01-25 16:38:39 +01:00
|
|
|
crawlFields: {
|
|
|
|
|
id: '.inner_object_data input[name="marker_objekt_id"]@value | int',
|
2024-12-17 12:38:28 +01:00
|
|
|
price: '.inner_object_data .single_data_price | removeNewline | trim',
|
2026-04-12 09:17:23 +02:00
|
|
|
size: '.tabelle .tabelle_inhalt_infos .single_data_box:nth-of-type(1) | removeNewline | trim',
|
|
|
|
|
rooms: '.tabelle .tabelle_inhalt_infos .single_data_box:nth-of-type(2) | removeNewline | trim',
|
2024-12-17 12:38:28 +01:00
|
|
|
title: '.inner_object_data .tabelle_inhalt_titel_black | removeNewline | trim',
|
2025-08-30 21:21:34 +02:00
|
|
|
image: '.inner_object_pic img@src',
|
2025-09-18 15:38:23 +02:00
|
|
|
address: '.tabelle .tabelle_inhalt_infos .left_information > div:nth-child(2) | removeNewline | trim',
|
2018-01-25 16:38:39 +01:00
|
|
|
},
|
|
|
|
|
normalize: normalize,
|
2021-01-21 16:09:23 +01:00
|
|
|
filter: applyBlacklist,
|
2025-09-22 09:57:50 +02:00
|
|
|
activeTester: checkIfListingIsActive,
|
2018-01-25 16:38:39 +01:00
|
|
|
};
|
2023-03-13 13:42:43 +01:00
|
|
|
export const init = (sourceConfig, blacklist) => {
|
2020-02-26 09:05:20 +01:00
|
|
|
config.enabled = sourceConfig.enabled;
|
|
|
|
|
config.url = sourceConfig.url;
|
2021-01-21 16:09:23 +01:00
|
|
|
appliedBlackList = blacklist || [];
|
2020-02-26 09:05:20 +01:00
|
|
|
};
|
2023-03-13 13:42:43 +01:00
|
|
|
export const metaInformation = {
|
2021-01-21 16:09:23 +01:00
|
|
|
name: '1a Immobilien',
|
|
|
|
|
baseUrl: 'https://www.1a-immobilienmarkt.de/',
|
2023-03-13 13:42:43 +01:00
|
|
|
id: 'einsAImmobilien',
|
2021-01-21 16:09:23 +01:00
|
|
|
};
|
2023-03-13 13:42:43 +01:00
|
|
|
export { config };
|