mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
regex for einsAImmobilien price normalization | filter listings that does not have all required keys
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
@@ -7,10 +7,28 @@ function normalize(o) {
|
||||
size += ` / / ${o.rooms.trim()}`;
|
||||
}
|
||||
const link = `https://www.1a-immobilienmarkt.de/expose/${o.id}.html`;
|
||||
const id = buildHash(o.id, o.price);
|
||||
return Object.assign(o, { id, size, link });
|
||||
const price = normalizePrice(o.price);
|
||||
const id = buildHash(o.id, price);
|
||||
return Object.assign(o, { id, price, size, link });
|
||||
}
|
||||
|
||||
/**
|
||||
* einsAImmobilien sometimes use a weird pricing label such as `775.700,00 EUR Kaufpreis ab 2.475 € mtl`.
|
||||
* Make sure to extract only the actual price out of the string.
|
||||
* @param price
|
||||
* @returns {*}
|
||||
*/
|
||||
function normalizePrice(price) {
|
||||
if (price == null) {
|
||||
return null;
|
||||
}
|
||||
const regex = /(\d{1,3}(?:\.\d{3})*,\d{2})\s?(EUR|€)/g;
|
||||
const result = price.match(regex);
|
||||
if (result == null || result.length === 0) {
|
||||
return price;
|
||||
}
|
||||
return result[0];
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
@@ -27,7 +45,6 @@ const config = {
|
||||
size: '.tabelle .inner_object_data .data_boxes div:nth-child(1)',
|
||||
rooms: '.tabelle .inner_object_data .data_boxes div:nth-child(2)',
|
||||
title: '.tabelle .inner_object_data .tabelle_inhalt_titel_black | removeNewline | trim',
|
||||
description: '.tabelle .inner_object_data .objekt_beschreibung | removeNewline | trim',
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
|
||||
Reference in New Issue
Block a user