Feature/spec filter (#276)

* feat(): create map component, add area filtering to the job config

* feat(): filter listings by area filter

* chore(): cleanup

* feat(): solve feedback

* feat(): solve most providers

* feat(): solve maybe other providers

* feat(): add specFilter config, also add rooms to listing

* feat(): change tests

* feat(): fix kleinanzeigen parser

* feat(): add spec filter switch for listing overviiews

* feat(): add rooms and size to the overview and detail of a listing

* feat(): rem label

* feat(): add types, update providers, they now return specs as numbers

* feat(): add jsonconfig to enable type checks

* feat: add type for prividerConfig, add fieldNames per provider

* feat: fix tests, provider, add formatListing

* chore: remov duplicates

* feat(): fix tests

* feat: fix immoscout

* chore: geojson typing

* feat: solve requested changes
This commit is contained in:
Stephan
2026-04-12 09:17:23 +02:00
committed by GitHub
parent 05f74f99ef
commit 10c94eea0a
49 changed files with 1004 additions and 250 deletions

12
jsconfig.json Normal file
View File

@@ -0,0 +1,12 @@
{
"compilerOptions": {
"module": "NodeNext",
"moduleResolution": "NodeNext",
"target": "ESNext",
"checkJs": true,
"allowJs": true,
"noEmit": true,
"strict": false
},
"exclude": ["node_modules", "ui"]
}

View File

@@ -19,22 +19,14 @@ import { distanceMeters } from './services/listings/distanceCalculator.js';
import { getUserSettings } from './services/storage/settingsStorage.js'; import { getUserSettings } from './services/storage/settingsStorage.js';
import { updateListingDistance } from './services/storage/listingsStorage.js'; import { updateListingDistance } from './services/storage/listingsStorage.js';
import booleanPointInPolygon from '@turf/boolean-point-in-polygon'; import booleanPointInPolygon from '@turf/boolean-point-in-polygon';
import { formatListing } from './utils/formatListing.js';
/** /** @import { ParsedListing } from './types/listing.js' */
* @typedef {Object} Listing /** @import { Job } from './types/job.js' */
* @property {string} id Stable unique identifier (hash) of the listing. /** @import { ProviderConfig } from './types/providerConfig.js' */
* @property {string} title Title or headline of the listing. /** @import { SpecFilter, SpatialFilter } from './types/filter.js' */
* @property {string} [address] Optional address/location text. /** @import { SimilarityCache } from './types/similarityCache.js' */
* @property {string} [price] Optional price text/value. /** @import { Browser } from './types/browser.js' */
* @property {string} [url] Link to the listing detail page.
* @property {any} [meta] Provider-specific additional metadata.
*/
/**
* @typedef {Object} SimilarityCache
* @property {(title:string, address?:string)=>boolean} hasSimilarEntries Returns true if a similar entry is known.
* @property {(title:string, address?:string)=>void} addCacheEntry Adds a new entry to the similarity cache.
*/
/** /**
* Runtime orchestrator for fetching, normalizing, filtering, deduplicating, storing, * Runtime orchestrator for fetching, normalizing, filtering, deduplicating, storing,
@@ -48,43 +40,43 @@ import booleanPointInPolygon from '@turf/boolean-point-in-polygon';
* 5) Identify new listings (vs. previously stored hashes) * 5) Identify new listings (vs. previously stored hashes)
* 6) Persist new listings * 6) Persist new listings
* 7) Filter out entries similar to already seen ones * 7) Filter out entries similar to already seen ones
* 8) Dispatch notifications * 8) Filter out entries that do not match the job's specFilter
* 9) Filter out entries that do not match the job's spatialFilter
* 10) Dispatch notifications
*/ */
class FredyPipelineExecutioner { class FredyPipelineExecutioner {
/** /**
* Create a new runtime instance for a single provider/job execution. * Create a new runtime instance for a single provider/job execution.
* *
* @param {Object} providerConfig Provider configuration. * @param {ProviderConfig} providerConfig Provider configuration.
* @param {string} providerConfig.url Base URL to crawl. * @param {Job} job Job configuration.
* @param {string} [providerConfig.sortByDateParam] Query parameter used to enforce sorting by date (provider-specific).
* @param {string} [providerConfig.waitForSelector] CSS selector to wait for before parsing content.
* @param {Object.<string, string>} providerConfig.crawlFields Mapping of field names to selectors/paths to extract.
* @param {string} providerConfig.crawlContainer CSS selector for the container holding listing items.
* @param {(raw:any)=>Listing} providerConfig.normalize Function to convert raw scraped data into a Listing shape.
* @param {(listing:Listing)=>boolean} providerConfig.filter Function to filter out unwanted listings.
* @param {(url:string, waitForSelector?:string)=>Promise<void>|Promise<Listing[]>} [providerConfig.getListings] Optional override to fetch listings.
* @param {(listing:Listing, browser:any)=>Promise<Listing>} [providerConfig.fetchDetails] Optional per-listing detail enrichment. Called in parallel for each new listing after deduplication. Receives the shared browser instance. Must always resolve (never reject).
* @param {Object} notificationConfig Notification configuration passed to notification adapters.
* @param {Object} spatialFilter Optional spatial filter configuration.
* @param {string} providerId The ID of the provider currently in use. * @param {string} providerId The ID of the provider currently in use.
* @param {string} jobKey Key of the job that is currently running (from within the config).
* @param {SimilarityCache} similarityCache Cache instance for checking similar entries. * @param {SimilarityCache} similarityCache Cache instance for checking similar entries.
* @param browser * @param {Browser} browser Puppeteer browser instance.
*/ */
constructor(providerConfig, notificationConfig, spatialFilter, providerId, jobKey, similarityCache, browser) { constructor(providerConfig, job, providerId, similarityCache, browser) {
/** @type {ProviderConfig} */
this._providerConfig = providerConfig; this._providerConfig = providerConfig;
this._notificationConfig = notificationConfig; /** @type {Object} */
this._spatialFilter = spatialFilter; this._jobNotificationConfig = job.notificationAdapter;
/** @type {string} */
this._jobKey = job.id;
/** @type {SpecFilter | null} */
this._jobSpecFilter = job.specFilter;
/** @type {SpatialFilter | null} */
this._jobSpatialFilter = job.spatialFilter;
/** @type {string} */
this._providerId = providerId; this._providerId = providerId;
this._jobKey = jobKey; /** @type {SimilarityCache} */
this._similarityCache = similarityCache; this._similarityCache = similarityCache;
/** @type {Browser} */
this._browser = browser; this._browser = browser;
} }
/** /**
* Execute the end-to-end pipeline for a single provider run. * Execute the end-to-end pipeline for a single provider run.
* *
* @returns {Promise<Listing[]|void>} Resolves to the list of new (and similarity-filtered) listings * @returns {Promise<ParsedListing[]|void>} Resolves to the list of new (and similarity-filtered) listings
* after notifications have been sent; resolves to void when there are no new listings. * after notifications have been sent; resolves to void when there are no new listings.
*/ */
execute() { execute() {
@@ -98,6 +90,7 @@ class FredyPipelineExecutioner {
.then(this._save.bind(this)) .then(this._save.bind(this))
.then(this._calculateDistance.bind(this)) .then(this._calculateDistance.bind(this))
.then(this._filterBySimilarListings.bind(this)) .then(this._filterBySimilarListings.bind(this))
.then(this._filterBySpecs.bind(this))
.then(this._filterByArea.bind(this)) .then(this._filterByArea.bind(this))
.then(this._notify.bind(this)) .then(this._notify.bind(this))
.catch(this._handleError.bind(this)); .catch(this._handleError.bind(this));
@@ -132,8 +125,8 @@ class FredyPipelineExecutioner {
/** /**
* Geocode new listings. * Geocode new listings.
* *
* @param {Listing[]} newListings New listings to geocode. * @param {ParsedListing[]} newListings New listings to geocode.
* @returns {Promise<Listing[]>} Resolves with the listings (potentially with added coordinates). * @returns {Promise<ParsedListing[]>} Resolves with the listings (potentially with added coordinates).
*/ */
async _geocode(newListings) { async _geocode(newListings) {
for (const listing of newListings) { for (const listing of newListings) {
@@ -152,18 +145,18 @@ class FredyPipelineExecutioner {
* Filter listings by area using the provider's area filter if available. * Filter listings by area using the provider's area filter if available.
* Only filters if areaFilter is set on the provider AND the listing has coordinates. * Only filters if areaFilter is set on the provider AND the listing has coordinates.
* *
* @param {Listing[]} newListings New listings to filter by area. * @param {ParsedListing[]} newListings New listings to filter by area.
* @returns {Promise<Listing[]>} Resolves with listings that are within the area (or not filtered if no area is set). * @returns {ParsedListing[]} Resolves with listings that are within the area (or not filtered if no area is set).
*/ */
_filterByArea(newListings) { _filterByArea(newListings) {
const polygonFeatures = this._spatialFilter?.features?.filter((f) => f.geometry?.type === 'Polygon'); const polygonFeatures = this._jobSpatialFilter?.features?.filter((f) => f.geometry?.type === 'Polygon');
// If no area filter is set, return all listings // If no area filter is set, return all listings
if (!polygonFeatures?.length) { if (!polygonFeatures?.length) {
return newListings; return newListings;
} }
const filteredIds = []; const toDeleteListingByIds = [];
// Filter listings by area - keep only those within the polygon // Filter listings by area - keep only those within the polygon
const keptListings = newListings.filter((listing) => { const keptListings = newListings.filter((listing) => {
// If listing doesn't have coordinates, keep it (don't filter out) // If listing doesn't have coordinates, keep it (don't filter out)
@@ -176,14 +169,48 @@ class FredyPipelineExecutioner {
const isInPolygon = polygonFeatures.some((feature) => booleanPointInPolygon(point, feature)); const isInPolygon = polygonFeatures.some((feature) => booleanPointInPolygon(point, feature));
if (!isInPolygon) { if (!isInPolygon) {
filteredIds.push(listing.id); toDeleteListingByIds.push(listing.id);
} }
return isInPolygon; return isInPolygon;
}); });
if (filteredIds.length > 0) { if (toDeleteListingByIds.length > 0) {
deleteListingsById(filteredIds); deleteListingsById(toDeleteListingByIds);
}
return keptListings;
}
/**
* Filter listings based on its specifications (minRooms, minSize, maxPrice).
*
* @param {ParsedListing[]} newListings New listings to filter.
* @returns {ParsedListing[]} Resolves with listings that pass the specification filters.
*/
_filterBySpecs(newListings) {
const { minRooms, minSize, maxPrice } = this._jobSpecFilter || {};
// If no specs are set, return all listings
if (!minRooms && !minSize && !maxPrice) {
return newListings;
}
const toDeleteListingByIds = [];
const keptListings = newListings.filter((listing) => {
const filterOut =
(minRooms && listing.rooms && listing.rooms < minRooms) ||
(minSize && listing.size && listing.size < minSize) ||
(maxPrice && listing.price && listing.price > maxPrice);
if (filterOut) {
toDeleteListingByIds.push(listing.id);
}
return !filterOut;
});
if (toDeleteListingByIds.length > 0) {
deleteListingsById(toDeleteListingByIds);
} }
return keptListings; return keptListings;
@@ -194,7 +221,7 @@ class FredyPipelineExecutioner {
* a provider-specific getListings override is supplied. * a provider-specific getListings override is supplied.
* *
* @param {string} url The provider URL to fetch from. * @param {string} url The provider URL to fetch from.
* @returns {Promise<Listing[]>} Resolves with an array of listings (empty when none found). * @returns {Promise<ParsedListing[]>} Resolves with an array of listings (empty when none found).
*/ */
_getListings(url) { _getListings(url) {
const extractor = new Extractor({ ...this._providerConfig.puppeteerOptions, browser: this._browser }); const extractor = new Extractor({ ...this._providerConfig.puppeteerOptions, browser: this._browser });
@@ -217,33 +244,42 @@ class FredyPipelineExecutioner {
} }
/** /**
* Normalize raw listings into the provider-specific Listing shape. * Normalize raw listings into the provider-specific ParsedListing shape.
* *
* @param {any[]} listings Raw listing entries from the extractor or override. * @param {any[]} listings Raw listing entries from the extractor or override.
* @returns {Listing[]} Normalized listings. * @returns {ParsedListing[]} Normalized listings.
*/ */
_normalize(listings) { _normalize(listings) {
return listings.map(this._providerConfig.normalize); return listings.map((listing) => this._providerConfig.normalize(listing));
} }
/** /**
* Filter out listings that are missing required fields and those rejected by the * Filter out listings that are missing required fields and those rejected by the
* provider's blacklist/filter function. * provider's blacklist/filter function.
* *
* @param {Listing[]} listings Listings to filter. * @param {ParsedListing[]} listings Listings to filter.
* @returns {Listing[]} Filtered listings that pass validation and provider filter. * @returns {ParsedListing[]} Filtered listings that pass validation and provider filter.
*/ */
_filter(listings) { _filter(listings) {
const keys = Object.keys(this._providerConfig.crawlFields); const requiredKeys = this._providerConfig.fieldNames;
const filteredListings = listings.filter((item) => keys.every((key) => key in item)); const requireValues = ['id', 'link', 'title'];
return filteredListings.filter(this._providerConfig.filter);
const filteredListings = listings
// this should never filter some listings out, because the normalize function should always extract all fields.
.filter((item) => requiredKeys.every((key) => key in item))
// TODO: move blacklist filter to this file, so it will handle for all providers in same way.
.filter(this._providerConfig.filter)
// filter out listings that are missing required fields
.filter((item) => requireValues.every((key) => item[key] != null));
return filteredListings;
} }
/** /**
* Determine which listings are new by comparing their IDs against stored hashes. * Determine which listings are new by comparing their IDs against stored hashes.
* *
* @param {Listing[]} listings Listings to evaluate for novelty. * @param {ParsedListing[]} listings Listings to evaluate for novelty.
* @returns {Listing[]} New listings not seen before. * @returns {ParsedListing[]} New listings not seen before.
* @throws {NoNewListingsWarning} When no new listings are found. * @throws {NoNewListingsWarning} When no new listings are found.
*/ */
_findNew(listings) { _findNew(listings) {
@@ -260,23 +296,30 @@ class FredyPipelineExecutioner {
/** /**
* Send notifications for new listings using the configured notification adapter(s). * Send notifications for new listings using the configured notification adapter(s).
* *
* @param {Listing[]} newListings New listings to notify about. * @param {ParsedListing[]} newListings New listings to notify about.
* @returns {Promise<Listing[]>} Resolves to the provided listings after notifications complete. * @returns {Promise<ParsedListing[]>} Resolves to the provided listings after notifications complete.
* @throws {NoNewListingsWarning} When there are no listings to notify about. * @throws {NoNewListingsWarning} When there are no listings to notify about.
*/ */
_notify(newListings) { _notify(newListings) {
if (newListings.length === 0) { if (newListings.length === 0) {
throw new NoNewListingsWarning(); throw new NoNewListingsWarning();
} }
const sendNotifications = notify.send(this._providerId, newListings, this._notificationConfig, this._jobKey); // TODO: move this to the notification adapter, so it will handle for all providers in same way.
const formattedListings = newListings.map(formatListing);
const sendNotifications = notify.send(
this._providerId,
formattedListings,
this._jobNotificationConfig,
this._jobKey,
);
return Promise.all(sendNotifications).then(() => newListings); return Promise.all(sendNotifications).then(() => newListings);
} }
/** /**
* Persist new listings and pass them through. * Persist new listings and pass them through.
* *
* @param {Listing[]} newListings Listings to store. * @param {ParsedListing[]} newListings Listings to store.
* @returns {Listing[]} The same listings, unchanged. * @returns {ParsedListing[]} The same listings, unchanged.
*/ */
_save(newListings) { _save(newListings) {
logger.debug(`Storing ${newListings.length} new listings (Provider: '${this._providerId}')`); logger.debug(`Storing ${newListings.length} new listings (Provider: '${this._providerId}')`);
@@ -287,8 +330,8 @@ class FredyPipelineExecutioner {
/** /**
* Calculate distance for new listings. * Calculate distance for new listings.
* *
* @param {Listing[]} listings * @param {ParsedListing[]} listings
* @returns {Listing[]} * @returns {ParsedListing[]}
* @private * @private
*/ */
_calculateDistance(listings) { _calculateDistance(listings) {
@@ -324,8 +367,8 @@ class FredyPipelineExecutioner {
* Remove listings that are similar to already known entries according to the similarity cache. * Remove listings that are similar to already known entries according to the similarity cache.
* Adds the remaining listings to the cache. * Adds the remaining listings to the cache.
* *
* @param {Listing[]} listings Listings to filter by similarity. * @param {ParsedListing[]} listings Listings to filter by similarity.
* @returns {Listing[]} Listings considered unique enough to keep. * @returns {ParsedListing[]} Listings considered unique enough to keep.
*/ */
_filterBySimilarListings(listings) { _filterBySimilarListings(listings) {
const filteredIds = []; const filteredIds = [];

View File

@@ -172,6 +172,7 @@ jobRouter.post('/', async (req, res) => {
enabled, enabled,
shareWithUsers = [], shareWithUsers = [],
spatialFilter = null, spatialFilter = null,
specFilter = null,
} = req.body; } = req.body;
const settings = await getSettings(); const settings = await getSettings();
try { try {
@@ -197,6 +198,7 @@ jobRouter.post('/', async (req, res) => {
notificationAdapter, notificationAdapter,
shareWithUsers, shareWithUsers,
spatialFilter, spatialFilter,
specFilter,
}); });
} catch (error) { } catch (error) {
res.send(new Error(error)); res.send(new Error(error));

View File

@@ -5,8 +5,16 @@
import { buildHash, isOneOf } from '../utils.js'; import { buildHash, isOneOf } from '../utils.js';
import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
import { extractNumber } from '../utils/extract-number.js';
/** @import { ParsedListing } from '../types/listing.js' */
/** @import { ProviderConfig } from '../types/providerConfig.js' */
let appliedBlackList = []; let appliedBlackList = [];
/**
* @param {any} o
* @returns {ParsedListing}
*/
function normalize(o) { function normalize(o) {
const baseUrl = 'https://www.1a-immobilienmarkt.de'; const baseUrl = 'https://www.1a-immobilienmarkt.de';
const link = `${baseUrl}/expose/${o.id}.html`; const link = `${baseUrl}/expose/${o.id}.html`;
@@ -14,7 +22,17 @@ function normalize(o) {
const id = buildHash(o.id, price); const id = buildHash(o.id, price);
const image = baseUrl + o.image; const image = baseUrl + o.image;
const address = o.address == null ? null : o.address.trim().replaceAll('/', ','); const address = o.address == null ? null : o.address.trim().replaceAll('/', ',');
return Object.assign(o, { id, price, link, image, address }); return {
id,
link,
title: o.title || '',
price: extractNumber(price),
size: extractNumber(o.size),
rooms: extractNumber(o.rooms),
address,
image,
description: undefined,
};
} }
/** /**
@@ -34,13 +52,19 @@ function normalizePrice(price) {
} }
return result[0]; return result[0];
} }
/**
* @param {ParsedListing} o
* @returns {boolean}
*/
function applyBlacklist(o) { function applyBlacklist(o) {
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted; return titleNotBlacklisted && descNotBlacklisted;
} }
/** @type {ProviderConfig} */
const config = { const config = {
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
url: null, url: null,
crawlContainer: '.tabelle', crawlContainer: '.tabelle',
sortByDateParam: 'sort_type=newest', sortByDateParam: 'sort_type=newest',
@@ -48,7 +72,8 @@ const config = {
crawlFields: { crawlFields: {
id: '.inner_object_data input[name="marker_objekt_id"]@value | int', id: '.inner_object_data input[name="marker_objekt_id"]@value | int',
price: '.inner_object_data .single_data_price | removeNewline | trim', price: '.inner_object_data .single_data_price | removeNewline | trim',
size: '.tabelle .tabelle_inhalt_infos .single_data_box | removeNewline | trim', size: '.tabelle .tabelle_inhalt_infos .single_data_box:nth-of-type(1) | removeNewline | trim',
rooms: '.tabelle .tabelle_inhalt_infos .single_data_box:nth-of-type(2) | removeNewline | trim',
title: '.inner_object_data .tabelle_inhalt_titel_black | removeNewline | trim', title: '.inner_object_data .tabelle_inhalt_titel_black | removeNewline | trim',
image: '.inner_object_pic img@src', image: '.inner_object_pic img@src',
address: '.tabelle .tabelle_inhalt_infos .left_information > div:nth-child(2) | removeNewline | trim', address: '.tabelle .tabelle_inhalt_infos .left_information > div:nth-child(2) | removeNewline | trim',

View File

@@ -5,9 +5,12 @@
import { buildHash, isOneOf } from '../utils.js'; import { buildHash, isOneOf } from '../utils.js';
import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
import { extractNumber } from '../utils/extract-number.js';
import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js'; import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js';
import * as cheerio from 'cheerio'; import * as cheerio from 'cheerio';
import logger from '../services/logger.js'; import logger from '../services/logger.js';
/** @import { ParsedListing } from '../types/listing.js' */
/** @import { ProviderConfig } from '../types/providerConfig.js' */
let appliedBlackList = []; let appliedBlackList = [];
@@ -65,27 +68,44 @@ async function fetchDetails(listing, browser) {
return listing; return listing;
} }
} }
/**
* @param {any} o
* @returns {ParsedListing}
*/
function normalize(o) { function normalize(o) {
const baseUrl = 'https://www.immobilien.de'; const baseUrl = 'https://www.immobilien.de';
const size = o.size || null; const title = o.title || '';
const price = o.price || null;
const title = o.title || 'No title available';
const address = o.address || null; const address = o.address || null;
const shortLink = shortenLink(o.link); const shortLink = shortenLink(o.link);
const link = shortLink ? (shortLink.startsWith('http') ? shortLink : baseUrl + shortLink) : baseUrl; const link = shortLink ? (shortLink.startsWith('http') ? shortLink : baseUrl + shortLink) : baseUrl;
const image = o.image ? (o.image.startsWith('http') ? o.image : baseUrl + o.image) : null; const image = o.image ? (o.image.startsWith('http') ? o.image : baseUrl + o.image) : null;
const id = buildHash(parseId(shortLink), o.price); const id = buildHash(parseId(shortLink), o.price);
return Object.assign(o, { id, price, size, title, address, link, image }); return {
id,
link,
title,
price: extractNumber(o.price),
size: extractNumber(o.size),
rooms: extractNumber(o.rooms),
address,
image,
description: o.description,
};
} }
/**
* @param {ParsedListing} o
* @returns {boolean}
*/
function applyBlacklist(o) { function applyBlacklist(o) {
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted; return titleNotBlacklisted && descNotBlacklisted;
} }
/** @type {ProviderConfig} */
const config = { const config = {
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
url: null, url: null,
crawlContainer: 'a.lr-card', crawlContainer: 'a.lr-card',
sortByDateParam: 'sort_col=*created_ts&sort_dir=desc', sortByDateParam: 'sort_col=*created_ts&sort_dir=desc',
@@ -94,6 +114,7 @@ const config = {
id: '@href', //will be transformed later id: '@href', //will be transformed later
price: '.lr-card__price-amount | trim', price: '.lr-card__price-amount | trim',
size: '.lr-card__fact:has(.lr-card__fact-label:contains("Fläche")) .lr-card__fact-value | trim', size: '.lr-card__fact:has(.lr-card__fact-label:contains("Fläche")) .lr-card__fact-value | trim',
rooms: '.zimmer .label_info',
title: '.lr-card__title | trim', title: '.lr-card__title | trim',
description: '.description | trim', description: '.description | trim',
link: '@href', link: '@href',

View File

@@ -46,6 +46,10 @@ import {
convertWebToMobile, convertWebToMobile,
} from '../services/immoscout/immoscout-web-translator.js'; } from '../services/immoscout/immoscout-web-translator.js';
import logger from '../services/logger.js'; import logger from '../services/logger.js';
import { extractNumber } from '../utils/extract-number.js';
/** @import { ParsedListing } from '../types/listing.js' */
/** @import { ProviderConfig } from '../types/providerConfig.js' */
let appliedBlackList = []; let appliedBlackList = [];
async function getListings(url) { async function getListings(url) {
@@ -168,22 +172,44 @@ async function isListingActive(link) {
function nullOrEmpty(val) { function nullOrEmpty(val) {
return val == null || val.length === 0; return val == null || val.length === 0;
} }
/**
* @param {any} o
* @returns {ParsedListing}
*/
function normalize(o) { function normalize(o) {
const title = nullOrEmpty(o.title) ? 'NO TITLE FOUND' : o.title.replace('NEU', ''); const title = (o.title || '').replace('NEU', '').trim();
const address = nullOrEmpty(o.address) ? 'NO ADDRESS FOUND' : (o.address || '').replace(/\(.*\),.*$/, '').trim(); const address = nullOrEmpty(o.address) ? 'NO ADDRESS FOUND' : (o.address || '').replace(/\(.*\),.*$/, '').trim();
const id = buildHash(o.id, o.price); const id = buildHash(o.id, o.price);
return Object.assign(o, { id, title, address }); return {
id,
link: o.link,
title,
price: extractNumber(o.price),
size: extractNumber(o.size),
rooms: extractNumber(o.rooms),
address,
image: o.image,
description: o.description,
};
} }
/**
* @param {ParsedListing} o
* @returns {boolean}
*/
function applyBlacklist(o) { function applyBlacklist(o) {
return !isOneOf(o.title, appliedBlackList); return !isOneOf(o.title, appliedBlackList);
} }
/** @type {ProviderConfig} */
const config = { const config = {
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
url: null, url: null,
crawlFields: { crawlFields: {
id: 'id', id: 'id',
title: 'title', title: 'title',
price: 'price', price: 'price',
size: 'size', size: 'size',
rooms: 'rooms',
link: 'link', link: 'link',
address: 'address', address: 'address',
}, },

View File

@@ -5,27 +5,46 @@
import { isOneOf, buildHash } from '../utils.js'; import { isOneOf, buildHash } from '../utils.js';
import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
import { extractNumber } from '../utils/extract-number.js';
/** @import { ParsedListing } from '../types/listing.js' */
/** @import { ProviderConfig } from '../types/providerConfig.js' */
let appliedBlackList = []; let appliedBlackList = [];
/**
* @param {any} o
* @returns {ParsedListing}
*/
function normalize(o) { function normalize(o) {
const size = o.size || 'N/A m²';
const price = (o.price || '--- €').replace('Preis auf Anfrage', '--- €');
const title = o.title || 'No title available';
const immoId = o.id.substring(o.id.indexOf('-') + 1, o.id.length); const immoId = o.id.substring(o.id.indexOf('-') + 1, o.id.length);
const link = `https://immo.swp.de/immobilien/${immoId}`; const link = `https://immo.swp.de/immobilien/${immoId}`;
const description = o.description; const id = buildHash(immoId, o.price);
const id = buildHash(immoId, price); return {
return Object.assign(o, { id, price, size, title, link, description }); id,
link,
title: o.title || '',
price: extractNumber(o.price),
size: extractNumber(o.size),
rooms: extractNumber(o.rooms),
address: o.address,
image: o.image,
description: undefined,
};
} }
/**
* @param {ParsedListing} o
* @returns {boolean}
*/
function applyBlacklist(o) { function applyBlacklist(o) {
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted; return titleNotBlacklisted && descNotBlacklisted;
} }
/** @type {ProviderConfig} */
const config = { const config = {
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
url: null, url: null,
crawlContainer: '.js-serp-item', crawlContainer: '.js-serp-item',
sortByDateParam: 's=most_recently_updated_first', sortByDateParam: 's=most_recently_updated_first',
@@ -34,9 +53,10 @@ const config = {
id: '.js-bookmark-btn@data-id', id: '.js-bookmark-btn@data-id',
price: 'div.align-items-start div:first-child | trim', price: 'div.align-items-start div:first-child | trim',
size: 'div.align-items-start div:nth-child(3) | trim', size: 'div.align-items-start div:nth-child(3) | trim',
rooms: 'div.align-items-start div:nth-child(2) | trim',
address: '.js-bookmark-btn@data-address',
title: '.js-item-title-link@title | trim', title: '.js-item-title-link@title | trim',
link: '.ci-search-result__link@href', link: '.ci-search-result__link@href',
description: '.js-show-more-item-sm | removeNewline | trim',
image: 'img@src', image: 'img@src',
}, },
normalize: normalize, normalize: normalize,

View File

@@ -5,9 +5,12 @@
import { buildHash, isOneOf } from '../utils.js'; import { buildHash, isOneOf } from '../utils.js';
import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
import { extractNumber } from '../utils/extract-number.js';
import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js'; import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js';
import * as cheerio from 'cheerio'; import * as cheerio from 'cheerio';
import logger from '../services/logger.js'; import logger from '../services/logger.js';
/** @import { ParsedListing } from '../types/listing.js' */
/** @import { ProviderConfig } from '../types/providerConfig.js' */
let appliedBlackList = []; let appliedBlackList = [];
@@ -48,18 +51,38 @@ async function fetchDetails(listing, browser) {
} }
} }
/**
* @param {any} o
* @returns {ParsedListing}
*/
function normalize(o) { function normalize(o) {
const id = buildHash(o.id, o.price); const id = buildHash(o.id, o.price);
return Object.assign(o, { id }); return {
id,
link: o.link,
title: o.title || '',
price: extractNumber(o.price),
size: extractNumber(o.size),
rooms: extractNumber(o.rooms),
address: o.address,
image: o.image,
description: o.description,
};
} }
/**
* @param {ParsedListing} o
* @returns {boolean}
*/
function applyBlacklist(o) { function applyBlacklist(o) {
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted; return titleNotBlacklisted && descNotBlacklisted;
} }
/** @type {ProviderConfig} */
const config = { const config = {
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
url: null, url: null,
crawlContainer: crawlContainer:
'div[data-testid="serp-core-scrollablelistview-testid"]:not(div[data-testid="serp-enlargementlist-testid"] div[data-testid="serp-card-testid"]) div[data-testid="serp-core-classified-card-testid"]', 'div[data-testid="serp-core-scrollablelistview-testid"]:not(div[data-testid="serp-enlargementlist-testid"] div[data-testid="serp-card-testid"]) div[data-testid="serp-core-classified-card-testid"]',
@@ -68,7 +91,8 @@ const config = {
crawlFields: { crawlFields: {
id: 'a@href', id: 'a@href',
price: 'div[data-testid="cardmfe-price-testid"] | removeNewline | trim', price: 'div[data-testid="cardmfe-price-testid"] | removeNewline | trim',
size: 'div[data-testid="cardmfe-keyfacts-testid"] | removeNewline | trim', size: 'div[data-testid="cardmfe-keyfacts-testid"] div:nth-of-type(3) | removeNewline | trim',
rooms: 'div[data-testid="cardmfe-keyfacts-testid"] div:nth-of-type(1) | removeNewline | trim',
title: 'div[data-testid="cardmfe-description-box-text-test-id"] > div:nth-of-type(2)', title: 'div[data-testid="cardmfe-description-box-text-test-id"] > div:nth-of-type(2)',
link: 'a@href', link: 'a@href',
description: 'div[data-testid="cardmfe-description-text-test-id"] > div:nth-of-type(2) | removeNewline | trim', description: 'div[data-testid="cardmfe-description-text-test-id"] > div:nth-of-type(2) | removeNewline | trim',

View File

@@ -5,6 +5,9 @@
import { buildHash, isOneOf } from '../utils.js'; import { buildHash, isOneOf } from '../utils.js';
import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
import { extractNumber } from '../utils/extract-number.js';
/** @import { ParsedListing } from '../types/listing.js' */
/** @import { ProviderConfig } from '../types/providerConfig.js' */
import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js'; import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js';
import logger from '../services/logger.js'; import logger from '../services/logger.js';
import * as cheerio from 'cheerio'; import * as cheerio from 'cheerio';
@@ -146,13 +149,33 @@ async function fetchDetails(listing, browser) {
return enrichListingFromDetails(listing, browser); return enrichListingFromDetails(listing, browser);
} }
/**
* @param {any} o
* @returns {ParsedListing}
*/
function normalize(o) { function normalize(o) {
const size = o.size || '--- m²'; const parts = (o.tags || '').split('·').map((p) => p.trim());
const size = parts.find((p) => p.includes('m²'));
const rooms = parts.find((p) => p.includes('Zi.'));
const id = buildHash(o.id, o.price); const id = buildHash(o.id, o.price);
const link = toAbsoluteLink(o.link) || o.link;
return Object.assign(o, { id, size, link }); return {
id,
title: o.title,
link: toAbsoluteLink(o.link) || o.link,
price: extractNumber(o.price),
size: extractNumber(size),
rooms: extractNumber(rooms),
address: o.address,
description: o.description,
image: o.image,
};
} }
/**
* @param {ParsedListing} o
* @returns {boolean}
*/
function applyBlacklist(o) { function applyBlacklist(o) {
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
@@ -161,16 +184,18 @@ function applyBlacklist(o) {
return o.title != null && !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted; return o.title != null && !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
} }
/** @type {ProviderConfig} */
const config = { const config = {
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
url: null, url: null,
crawlContainer: '#srchrslt-adtable .ad-listitem ', crawlContainer: '#srchrslt-adtable .ad-listitem ',
//sort by date is standard oO //sort by date is standard oO
sortByDateParam: null, sortByDateParam: null,
waitForSelector: 'body', waitForSelector: 'body',
crawlFields: { crawlFields: {
id: '.aditem@data-adid | int', id: '.aditem@data-adid',
price: '.aditem-main--middle--price-shipping--price | removeNewline | trim', price: '.aditem-main--middle--price-shipping--price | removeNewline | trim',
size: '.aditem-main .text-module-end | removeNewline | trim', tags: '.aditem-main--middle--tags | removeNewline | trim',
title: '.aditem-main .text-module-begin a | removeNewline | trim', title: '.aditem-main .text-module-begin a | removeNewline | trim',
link: '.aditem-main .text-module-begin a@href | removeNewline | trim', link: '.aditem-main .text-module-begin a@href | removeNewline | trim',
description: '.aditem-main .aditem-main--middle--description | removeNewline | trim', description: '.aditem-main .aditem-main--middle--description | removeNewline | trim',

View File

@@ -5,23 +5,46 @@
import { isOneOf, buildHash } from '../utils.js'; import { isOneOf, buildHash } from '../utils.js';
import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
import { extractNumber } from '../utils/extract-number.js';
/** @import { ParsedListing } from '../types/listing.js' */
/** @import { ProviderConfig } from '../types/providerConfig.js' */
let appliedBlackList = []; let appliedBlackList = [];
/**
* @param {any} o
* @returns {ParsedListing}
*/
function normalize(o) { function normalize(o) {
const originalId = o.id.split('/').pop(); const originalId = o.id.split('/').pop();
const id = buildHash(originalId, o.price); const id = buildHash(originalId, o.price);
const size = o.size ?? 'N/A m²'; const link = o.link != null ? `https://www.mcmakler.de${o.link}` : o.link;
const title = o.title || 'No title available'; const [rooms, size] = o.tags.split(' | ');
const address = o.address?.replace(' / ', ' ') || null; const address = o.address?.replace(' / ', ' ') || null;
const link = o.link != null ? `https://www.mcmakler.de${o.link}` : config.url; return {
return Object.assign(o, { id, size, title, link, address }); id,
link,
title: o.title || '',
price: extractNumber(o.price),
size: extractNumber(size),
rooms: extractNumber(rooms),
address,
image: o.image,
description: undefined,
};
} }
/**
* @param {ParsedListing} o
* @returns {boolean}
*/
function applyBlacklist(o) { function applyBlacklist(o) {
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted; return titleNotBlacklisted && descNotBlacklisted;
} }
/** @type {ProviderConfig} */
const config = { const config = {
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
url: null, url: null,
crawlContainer: 'article[data-testid="propertyCard"]', crawlContainer: 'article[data-testid="propertyCard"]',
sortByDateParam: 'sortBy=DATE&sortOn=DESC', sortByDateParam: 'sortBy=DATE&sortOn=DESC',
@@ -30,7 +53,7 @@ const config = {
id: 'h2 a@href', id: 'h2 a@href',
title: 'h2 a | removeNewline | trim', title: 'h2 a | removeNewline | trim',
price: 'footer > p:first-of-type | trim', price: 'footer > p:first-of-type | trim',
size: 'footer > p:nth-of-type(2) | trim', tags: 'footer > p:nth-of-type(2) | trim',
address: 'div > h2 + p | removeNewline | trim', address: 'div > h2 + p | removeNewline | trim',
image: 'img@src', image: 'img@src',
link: 'h2 a@href', link: 'h2 a@href',

View File

@@ -5,6 +5,9 @@
import { isOneOf, buildHash } from '../utils.js'; import { isOneOf, buildHash } from '../utils.js';
import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
import { extractNumber } from '../utils/extract-number.js';
/** @import { ParsedListing } from '../types/listing.js' */
/** @import { ProviderConfig } from '../types/providerConfig.js' */
let appliedBlackList = []; let appliedBlackList = [];
@@ -12,19 +15,39 @@ function nullOrEmpty(val) {
return val == null || val.length === 0; return val == null || val.length === 0;
} }
/**
* @param {any} o
* @returns {ParsedListing}
*/
function normalize(o) { function normalize(o) {
const link = nullOrEmpty(o.link) const link = nullOrEmpty(o.link)
? 'NO LINK' ? 'NO LINK'
: `https://www.neubaukompass.de${o.link.substring(o.link.indexOf('/neubau'))}`; : `https://www.neubaukompass.de${o.link.substring(o.link.indexOf('/neubau'))}`;
const id = buildHash(o.link, o.price); const id = buildHash(o.link, o.price);
return Object.assign(o, { id, link }); return {
id,
link,
title: o.title || '',
price: extractNumber(o.price),
size: extractNumber(o.size),
rooms: extractNumber(o.rooms),
address: o.address,
image: o.image,
description: o.description,
};
} }
/**
* @param {ParsedListing} o
* @returns {boolean}
*/
function applyBlacklist(o) { function applyBlacklist(o) {
return !isOneOf(o.title, appliedBlackList); return !isOneOf(o.title, appliedBlackList);
} }
/** @type {ProviderConfig} */
const config = { const config = {
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
url: null, url: null,
crawlContainer: '.col-12.mb-4', crawlContainer: '.col-12.mb-4',
sortByDateParam: 'Sortierung=Id&Richtung=DESC', sortByDateParam: 'Sortierung=Id&Richtung=DESC',
@@ -34,7 +57,9 @@ const config = {
title: 'a@title | removeNewline | trim', title: 'a@title | removeNewline | trim',
link: 'a@href', link: 'a@href',
address: '.nbk-project-card__description | removeNewline | trim', address: '.nbk-project-card__description | removeNewline | trim',
price: '.nbk-project-card__spec-item .nbk-project-card__spec-value | removeNewline | trim', price: '.nbk-project-card__spec-item:nth-child(1) .nbk-project-card__spec-value | removeNewline | trim',
size: '.nbk-project-card__spec-item:nth-child(2) .nbk-project-card__spec-value | removeNewline | trim',
rooms: '.nbk-project-card__spec-item:nth-child(3) .nbk-project-card__spec-value | removeNewline | trim',
image: '.nbk-project-card__image@src', image: '.nbk-project-card__image@src',
}, },
normalize: normalize, normalize: normalize,

View File

@@ -5,19 +5,43 @@
import { isOneOf, buildHash } from '../utils.js'; import { isOneOf, buildHash } from '../utils.js';
import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
import { extractNumber } from '../utils/extract-number.js';
/** @import { ParsedListing } from '../types/listing.js' */
/** @import { ProviderConfig } from '../types/providerConfig.js' */
let appliedBlackList = []; let appliedBlackList = [];
/**
* @param {any} o
* @returns {ParsedListing}
*/
function normalize(o) { function normalize(o) {
const link = metaInformation.baseUrl + o.link; const link = metaInformation.baseUrl + o.link;
const id = buildHash(o.title, o.link, o.price); const id = buildHash(o.title, o.link, o.price);
return Object.assign(o, { link, id }); return {
id,
link,
title: o.title || '',
price: extractNumber(o.price),
size: extractNumber(o.size),
rooms: extractNumber(o.rooms),
address: o.address,
image: o.image,
description: o.description,
};
} }
/**
* @param {ParsedListing} o
* @returns {boolean}
*/
function applyBlacklist(o) { function applyBlacklist(o) {
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted; return titleNotBlacklisted && descNotBlacklisted;
} }
/** @type {ProviderConfig} */
const config = { const config = {
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
url: null, url: null,
crawlContainer: 'div[data-livecomponent-id*="search/property_list"] .grid > div', crawlContainer: 'div[data-livecomponent-id*="search/property_list"] .grid > div',
sortByDateParam: null, sortByDateParam: null,
@@ -27,6 +51,7 @@ const config = {
title: 'h4 | removeNewline | trim', title: 'h4 | removeNewline | trim',
price: '.text-xl | trim', price: '.text-xl | trim',
size: 'div[title="Wohnfläche"] | trim', size: 'div[title="Wohnfläche"] | trim',
rooms: 'div[title="Zimmer"] | trim',
address: '.text-slate-800 | removeNewline | trim', address: '.text-slate-800 | removeNewline | trim',
image: 'img@src', image: 'img@src',
link: 'a@href', link: 'a@href',

View File

@@ -5,24 +5,47 @@
import { isOneOf, buildHash } from '../utils.js'; import { isOneOf, buildHash } from '../utils.js';
import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
import { extractNumber } from '../utils/extract-number.js';
/** @import { ParsedListing } from '../types/listing.js' */
/** @import { ProviderConfig } from '../types/providerConfig.js' */
let appliedBlackList = []; let appliedBlackList = [];
/**
* @param {any} o
* @returns {ParsedListing}
*/
function normalize(o) { function normalize(o) {
const id = buildHash(o.id, o.price); const id = buildHash(o.id, o.price);
const address = o.address?.replace(/^adresse /i, '') ?? null; const address = o.address?.replace(/^adresse /i, '') ?? null;
const title = o.title || 'No title available';
const link = o.link != null ? decodeURIComponent(o.link) : config.url; const link = o.link != null ? decodeURIComponent(o.link) : config.url;
const urlReg = new RegExp(/url\((.*?)\)/gim); const urlReg = new RegExp(/url\((.*?)\)/gim);
const image = o.image != null ? urlReg.exec(o.image)[1] : null; const image = o.image != null ? urlReg.exec(o.image)[1] : null;
return Object.assign(o, { id, address, title, link, image }); return {
id,
link,
title: o.title || '',
price: extractNumber(o.price),
size: extractNumber(o.size),
rooms: extractNumber(o.rooms),
address,
image,
description: o.description,
};
} }
/**
* @param {ParsedListing} o
* @returns {boolean}
*/
function applyBlacklist(o) { function applyBlacklist(o) {
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted; return titleNotBlacklisted && descNotBlacklisted;
} }
/** @type {ProviderConfig} */
const config = { const config = {
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
url: null, url: null,
crawlContainer: '.listentry-content', crawlContainer: '.listentry-content',
sortByDateParam: null, // sort by date is standard sortByDateParam: null, // sort by date is standard
@@ -32,6 +55,7 @@ const config = {
title: 'h2 | trim', title: 'h2 | trim',
price: '.listentry-details-price .listentry-details-v | trim', price: '.listentry-details-price .listentry-details-v | trim',
size: '.listentry-details-size .listentry-details-v | trim', size: '.listentry-details-size .listentry-details-v | trim',
rooms: '.listentry-details-rooms .listentry-details-v | trim',
address: '.listentry-adress | trim', address: '.listentry-adress | trim',
image: '.listentry-img@style', image: '.listentry-img@style',
link: '.shariff@data-url', link: '.shariff@data-url',

View File

@@ -8,6 +8,9 @@ import checkIfListingIsActive from '../services/listings/listingActiveTester.js'
import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js'; import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js';
import * as cheerio from 'cheerio'; import * as cheerio from 'cheerio';
import logger from '../services/logger.js'; import logger from '../services/logger.js';
import { extractNumber } from '../utils/extract-number.js';
/** @import { ParsedListing } from '../types/listing.js' */
/** @import { ProviderConfig } from '../types/providerConfig.js' */
let appliedBlackList = []; let appliedBlackList = [];
@@ -55,20 +58,39 @@ async function fetchDetails(listing, browser) {
} }
} }
/**
* @param {any} o
* @returns {ParsedListing}
*/
function normalize(o) { function normalize(o) {
const originalId = o.id.split('/').pop().replace('.html', ''); const originalId = o.id.split('/').pop().replace('.html', '');
const id = buildHash(originalId, o.price); const id = buildHash(originalId, o.price);
const size = o.size?.replace(' Wohnfläche', '').replace(' m²', 'm²') ?? null;
const title = o.title || 'No title available';
const link = o.link != null ? `https://immobilien.sparkasse.de${o.link}` : config.url; const link = o.link != null ? `https://immobilien.sparkasse.de${o.link}` : config.url;
return Object.assign(o, { id, size, title, link });
return {
id,
link,
title: o.title || '',
price: extractNumber(o.price),
size: extractNumber(o.size),
rooms: extractNumber(o.rooms),
address: o.address,
image: o.image,
description: o.description,
};
} }
/**
* @param {ParsedListing} o
* @returns {boolean}
*/
function applyBlacklist(o) { function applyBlacklist(o) {
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted; return titleNotBlacklisted && descNotBlacklisted;
} }
/** @type {ProviderConfig} */
const config = { const config = {
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
url: null, url: null,
crawlContainer: 'div[data-testid="estate-link"]', crawlContainer: 'div[data-testid="estate-link"]',
sortByDateParam: 'sortBy=date_desc', sortByDateParam: 'sortBy=date_desc',
@@ -77,7 +99,8 @@ const config = {
id: 'a@href', id: 'a@href',
title: 'h3 | trim', title: 'h3 | trim',
price: '.estate-list-price | trim', price: '.estate-list-price | trim',
size: '.estate-mainfact span | trim', size: '.estate-mainfact:nth-child(1) span | trim',
rooms: '.estate-mainfact:nth-child(2) span | trim',
address: 'h6 | trim', address: 'h6 | trim',
image: 'img@src', image: 'img@src',
link: 'a@href', link: 'a@href',

View File

@@ -5,9 +5,12 @@
import { isOneOf, buildHash } from '../utils.js'; import { isOneOf, buildHash } from '../utils.js';
import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
import { extractNumber } from '../utils/extract-number.js';
import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js'; import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js';
import * as cheerio from 'cheerio'; import * as cheerio from 'cheerio';
import logger from '../services/logger.js'; import logger from '../services/logger.js';
/** @import { ParsedListing } from '../types/listing.js' */
/** @import { ProviderConfig } from '../types/providerConfig.js' */
let appliedBlackList = []; let appliedBlackList = [];
@@ -32,20 +35,39 @@ async function fetchDetails(listing, browser) {
return listing; return listing;
} }
} }
/**
* @param {any} o
* @returns {ParsedListing}
*/
function normalize(o) { function normalize(o) {
const id = buildHash(o.id, o.price); const id = buildHash(o.id, o.price);
const link = `https://www.wg-gesucht.de${o.link}`; const link = `https://www.wg-gesucht.de${o.link}`;
const image = o.image != null ? o.image.replace('small', 'large') : null; const image = o.image != null ? o.image.replace('small', 'large') : null;
return Object.assign(o, { id, link, image }); const [rooms, city, road] = o.details?.split(' | ') || [];
return {
id,
link,
title: o.title || '',
price: extractNumber(o.price),
size: extractNumber(o.size),
rooms: extractNumber(rooms),
address: `${city}, ${road}`,
image,
description: o.description,
};
} }
/**
* @param {ParsedListing} o
* @returns {boolean}
*/
function applyBlacklist(o) { function applyBlacklist(o) {
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
return o.id != null && titleNotBlacklisted && descNotBlacklisted; return o.id != null && titleNotBlacklisted && descNotBlacklisted;
} }
/** @type {ProviderConfig} */
const config = { const config = {
url: null, url: null,
crawlContainer: '#main_column .wgg_card', crawlContainer: '#main_column .wgg_card',
@@ -56,10 +78,13 @@ const config = {
details: '.row .noprint .col-xs-11 |removeNewline |trim', details: '.row .noprint .col-xs-11 |removeNewline |trim',
price: '.middle .col-xs-3 |removeNewline |trim', price: '.middle .col-xs-3 |removeNewline |trim',
size: '.middle .text-right |removeNewline |trim', size: '.middle .text-right |removeNewline |trim',
rooms: '.middle .text-right |removeNewline |trim',
title: '.truncate_title a |removeNewline |trim', title: '.truncate_title a |removeNewline |trim',
link: '.truncate_title a@href', link: '.truncate_title a@href',
image: '.img-responsive@src', image: '.img-responsive@src',
description: '.row .noprint .col-xs-11 |removeNewline |trim',
}, },
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
normalize: normalize, normalize: normalize,
filter: applyBlacklist, filter: applyBlacklist,
fetchDetails, fetchDetails,

View File

@@ -5,26 +5,45 @@
import * as utils from '../utils.js'; import * as utils from '../utils.js';
import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
import { extractNumber } from '../utils/extract-number.js';
/** @import { ParsedListing } from '../types/listing.js' */
/** @import { ProviderConfig } from '../types/providerConfig.js' */
let appliedBlackList = []; let appliedBlackList = [];
/**
* @param {any} o
* @returns {ParsedListing}
*/
function normalize(o) { function normalize(o) {
const id = o.link.split('/').pop();
const price = o.price;
const size = o.size;
const rooms = o.rooms;
const [city = '', part = ''] = (o.description || '').split('-').map((v) => v.trim()); const [city = '', part = ''] = (o.description || '').split('-').map((v) => v.trim());
const address = `${part}, ${city}`; const address = `${part}, ${city}`;
return Object.assign(o, { id, price, size, rooms, address }); return {
id: o.link.split('/').pop(),
link: o.link,
title: o.title || '',
price: extractNumber(o.price),
size: extractNumber(o.size),
rooms: extractNumber(o.rooms),
address,
image: o.image,
description: o.description,
};
} }
/**
* @param {ParsedListing} o
* @returns {boolean}
*/
function applyBlacklist(o) { function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList); const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList); const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
return o.id != null && o.title != null && titleNotBlacklisted && descNotBlacklisted && o.link.startsWith(o.link); return o.id != null && o.title != null && titleNotBlacklisted && descNotBlacklisted && o.link.startsWith(o.link);
} }
/** @type {ProviderConfig} */
const config = { const config = {
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
url: null, url: null,
sortByDateParam: null, sortByDateParam: null,
waitForSelector: 'body', waitForSelector: 'body',
@@ -37,7 +56,7 @@ const config = {
size: 'dl:nth-of-type(3) dd | removeNewline | trim', size: 'dl:nth-of-type(3) dd | removeNewline | trim',
description: 'div.before\\:icon-location_marker | trim', description: 'div.before\\:icon-location_marker | trim',
link: '@href', link: '@href',
imageUrl: 'img@src', image: 'img@src',
}, },
normalize: normalize, normalize: normalize,
filter: applyBlacklist, filter: applyBlacklist,

View File

@@ -178,15 +178,7 @@ export function initJobExecutionService({ providers, settings, intervalMs }) {
browser = await puppeteerExtractor.launchBrowser(matchedProvider.config.url, {}); browser = await puppeteerExtractor.launchBrowser(matchedProvider.config.url, {});
} }
await new FredyPipelineExecutioner( await new FredyPipelineExecutioner(matchedProvider.config, job, prov.id, similarityCache, browser).execute();
matchedProvider.config,
job.notificationAdapter,
job.spatialFilter,
prov.id,
job.id,
similarityCache,
browser,
).execute();
} catch (err) { } catch (err) {
logger.error(err); logger.error(err);
} }

View File

@@ -31,6 +31,7 @@ export const upsertJob = ({
userId, userId,
shareWithUsers = [], shareWithUsers = [],
spatialFilter = null, spatialFilter = null,
specFilter = null,
}) => { }) => {
const id = jobId || nanoid(); const id = jobId || nanoid();
const existing = SqliteConnection.query(`SELECT id, user_id FROM jobs WHERE id = @id LIMIT 1`, { id })[0]; const existing = SqliteConnection.query(`SELECT id, user_id FROM jobs WHERE id = @id LIMIT 1`, { id })[0];
@@ -44,7 +45,8 @@ export const upsertJob = ({
provider = @provider, provider = @provider,
notification_adapter = @notification_adapter, notification_adapter = @notification_adapter,
shared_with_user = @shareWithUsers, shared_with_user = @shareWithUsers,
spatial_filter = @spatialFilter spatial_filter = @spatialFilter,
spec_filter = @specFilter
WHERE id = @id`, WHERE id = @id`,
{ {
id, id,
@@ -55,12 +57,13 @@ export const upsertJob = ({
provider: toJson(provider ?? []), provider: toJson(provider ?? []),
notification_adapter: toJson(notificationAdapter ?? []), notification_adapter: toJson(notificationAdapter ?? []),
spatialFilter: spatialFilter ? toJson(spatialFilter) : null, spatialFilter: spatialFilter ? toJson(spatialFilter) : null,
specFilter: specFilter ? toJson(specFilter) : null,
}, },
); );
} else { } else {
SqliteConnection.execute( SqliteConnection.execute(
`INSERT INTO jobs (id, user_id, enabled, name, blacklist, provider, notification_adapter, shared_with_user, spatial_filter) `INSERT INTO jobs (id, user_id, enabled, name, blacklist, provider, notification_adapter, shared_with_user, spatial_filter, spec_filter)
VALUES (@id, @user_id, @enabled, @name, @blacklist, @provider, @notification_adapter, @shareWithUsers, @spatialFilter)`, VALUES (@id, @user_id, @enabled, @name, @blacklist, @provider, @notification_adapter, @shareWithUsers, @spatialFilter, @specFilter)`,
{ {
id, id,
user_id: ownerId, user_id: ownerId,
@@ -71,6 +74,7 @@ export const upsertJob = ({
shareWithUsers: toJson(shareWithUsers ?? []), shareWithUsers: toJson(shareWithUsers ?? []),
notification_adapter: toJson(notificationAdapter ?? []), notification_adapter: toJson(notificationAdapter ?? []),
spatialFilter: spatialFilter ? toJson(spatialFilter) : null, spatialFilter: spatialFilter ? toJson(spatialFilter) : null,
specFilter: specFilter ? toJson(specFilter) : null,
}, },
); );
} }
@@ -92,6 +96,7 @@ export const getJob = (jobId) => {
j.shared_with_user, j.shared_with_user,
j.notification_adapter AS notificationAdapter, j.notification_adapter AS notificationAdapter,
j.spatial_filter AS spatialFilter, j.spatial_filter AS spatialFilter,
j.spec_filter AS specFilter,
(SELECT COUNT(1) FROM listings l WHERE l.job_id = j.id AND l.is_active = 1 AND l.manually_deleted = 0) AS numberOfFoundListings (SELECT COUNT(1) FROM listings l WHERE l.job_id = j.id AND l.is_active = 1 AND l.manually_deleted = 0) AS numberOfFoundListings
FROM jobs j FROM jobs j
WHERE j.id = @id WHERE j.id = @id
@@ -107,6 +112,7 @@ export const getJob = (jobId) => {
shared_with_user: fromJson(row.shared_with_user, []), shared_with_user: fromJson(row.shared_with_user, []),
notificationAdapter: fromJson(row.notificationAdapter, []), notificationAdapter: fromJson(row.notificationAdapter, []),
spatialFilter: fromJson(row.spatialFilter, null), spatialFilter: fromJson(row.spatialFilter, null),
specFilter: fromJson(row.specFilter, null),
}; };
}; };
@@ -157,6 +163,7 @@ export const getJobs = () => {
j.shared_with_user, j.shared_with_user,
j.notification_adapter AS notificationAdapter, j.notification_adapter AS notificationAdapter,
j.spatial_filter AS spatialFilter, j.spatial_filter AS spatialFilter,
j.spec_filter AS specFilter,
(SELECT COUNT(1) FROM listings l WHERE l.job_id = j.id AND l.is_active = 1 AND l.manually_deleted = 0) AS numberOfFoundListings (SELECT COUNT(1) FROM listings l WHERE l.job_id = j.id AND l.is_active = 1 AND l.manually_deleted = 0) AS numberOfFoundListings
FROM jobs j FROM jobs j
WHERE j.enabled = 1 WHERE j.enabled = 1
@@ -170,6 +177,7 @@ export const getJobs = () => {
shared_with_user: fromJson(row.shared_with_user, []), shared_with_user: fromJson(row.shared_with_user, []),
notificationAdapter: fromJson(row.notificationAdapter, []), notificationAdapter: fromJson(row.notificationAdapter, []),
spatialFilter: fromJson(row.spatialFilter, null), spatialFilter: fromJson(row.spatialFilter, null),
specFilter: fromJson(row.specFilter, null),
})); }));
}; };
@@ -260,6 +268,7 @@ export const queryJobs = ({
j.shared_with_user, j.shared_with_user,
j.notification_adapter AS notificationAdapter, j.notification_adapter AS notificationAdapter,
j.spatial_filter AS spatialFilter, j.spatial_filter AS spatialFilter,
j.spec_filter AS specFilter,
(SELECT COUNT(1) FROM listings l WHERE l.job_id = j.id AND l.is_active = 1 AND l.manually_deleted = 0) AS numberOfFoundListings (SELECT COUNT(1) FROM listings l WHERE l.job_id = j.id AND l.is_active = 1 AND l.manually_deleted = 0) AS numberOfFoundListings
FROM jobs j FROM jobs j
${whereSql} ${whereSql}
@@ -276,6 +285,7 @@ export const queryJobs = ({
shared_with_user: fromJson(row.shared_with_user, []), shared_with_user: fromJson(row.shared_with_user, []),
notificationAdapter: fromJson(row.notificationAdapter, []), notificationAdapter: fromJson(row.notificationAdapter, []),
spatialFilter: fromJson(row.spatialFilter, null), spatialFilter: fromJson(row.spatialFilter, null),
specFilter: fromJson(row.specFilter, null),
})); }));
return { totalNumber, page: safePage, result }; return { totalNumber, page: safePage, result };

View File

@@ -174,9 +174,9 @@ export const storeListings = (jobId, providerId, listings) => {
SqliteConnection.withTransaction((db) => { SqliteConnection.withTransaction((db) => {
const stmt = db.prepare( const stmt = db.prepare(
`INSERT INTO listings (id, hash, provider, job_id, price, size, title, image_url, description, address, `INSERT INTO listings (id, hash, provider, job_id, price, size, rooms, title, image_url, description, address,
link, created_at, is_active, latitude, longitude) link, created_at, is_active, latitude, longitude)
VALUES (@id, @hash, @provider, @job_id, @price, @size, @title, @image_url, @description, @address, @link, VALUES (@id, @hash, @provider, @job_id, @price, @size, @rooms, @title, @image_url, @description, @address, @link,
@created_at, 1, @latitude, @longitude) @created_at, 1, @latitude, @longitude)
ON CONFLICT(job_id, hash) DO NOTHING`, ON CONFLICT(job_id, hash) DO NOTHING`,
); );
@@ -187,8 +187,9 @@ export const storeListings = (jobId, providerId, listings) => {
hash: item.id, hash: item.id,
provider: providerId, provider: providerId,
job_id: jobId, job_id: jobId,
price: extractNumber(item.price), price: item.price,
size: extractNumber(item.size), size: item.size,
rooms: item.rooms,
title: item.title, title: item.title,
image_url: item.image, image_url: item.image,
description: item.description, description: item.description,
@@ -202,19 +203,6 @@ export const storeListings = (jobId, providerId, listings) => {
} }
}); });
/**
* Extract the first number from a string like "1.234 €" or "70 m²".
* Removes dots/commas before parsing. Returns null on invalid input.
* @param {string|undefined|null} str
* @returns {number|null}
*/
function extractNumber(str) {
if (!str) return null;
const cleaned = str.replace(/\./g, '').replace(',', '.');
const num = parseFloat(cleaned);
return isNaN(num) ? null : num;
}
/** /**
* Remove any parentheses segments (including surrounding whitespace) from a string. * Remove any parentheses segments (including surrounding whitespace) from a string.
* Returns null for empty input. * Returns null for empty input.

View File

@@ -0,0 +1,10 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
export function up(db) {
db.exec(`
ALTER TABLE jobs ADD COLUMN spec_filter JSONB DEFAULT NULL;
`);
}

View File

@@ -0,0 +1,10 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
export function up(db) {
db.exec(`
ALTER TABLE listings ADD COLUMN rooms INTEGER;
`);
}

10
lib/types/browser.js Normal file
View File

@@ -0,0 +1,10 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
/**
* @typedef {import('puppeteer').Browser} Browser
*/
export {};

19
lib/types/filter.js Normal file
View File

@@ -0,0 +1,19 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
/**
* @typedef {Object} SpecFilter
* @property {number} [minRooms] Minimum number of rooms.
* @property {number} [minSize] Minimum size in m².
* @property {number} [maxPrice] Maximum price.
*/
/**
* @typedef {Object} SpatialFilter GeoJSON FeatureCollection.
* @property {Array<Object>} [features] GeoJSON features for spatial filtering (typically Polygons).
* @property {string} [type] Type 'FeatureCollection'.
*/
export {};

23
lib/types/job.js Normal file
View File

@@ -0,0 +1,23 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
/** @import { SpecFilter, SpatialFilter } from './filter.js' */
/**
* @typedef {Object} Job
* @property {string} id Job ID.
* @property {string} [userId] Owner user id.
* @property {string} [name] Job display name.
* @property {boolean} [enabled] Whether the job is enabled.
* @property {Array<any>} [blacklist] Blacklist entries.
* @property {Array<any>} [provider] Provider configuration list.
* @property {Object} [notificationAdapter] Notification configuration.
* @property {Array<string>} [shared_with_user] Users this job is shared with.
* @property {SpatialFilter | null} [spatialFilter] Optional spatial filter configuration as GeoJSON FeatureCollection.
* @property {SpecFilter | null} [specFilter] Optional listing specifications.
* @property {number} [numberOfFoundListings] Count of active listings for this job.
*/
export {};

22
lib/types/listing.js Normal file
View File

@@ -0,0 +1,22 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
/**
* @typedef {Object} ParsedListing
* @property {string} id Stable unique identifier (hash) of the listing.
* @property {string} link Link to the listing detail page.
* @property {string} image Link to the listing image.
* @property {string} title Title or headline of the listing.
* @property {string} [description] Description of the listing.
* @property {string} [address] Optional address/location text.
* @property {number} [price] Optional price of the listing.
* @property {number} [size] Optional size of the listing.
* @property {number} [rooms] Optional number of rooms.
* @property {number} [latitude] Optional latitude.
* @property {number} [longitude] Optional longitude.
* @property {number} [distance_to_destination] Optional distance to destination.
*/
export {};

View File

@@ -0,0 +1,25 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
/** @import { ParsedListing } from './listing.js' */
/**
* @typedef {Object} ProviderConfig
* @property {string} [url] Base URL to crawl.
* @property {string} [sortByDateParam] Query parameter used to enforce sorting by date.
* @property {string} [waitForSelector] CSS selector to wait for before parsing content.
* @property {Object.<string, string>} crawlFields Mapping of field names to selectors/paths.
* @property {string[]} fieldNames List of field names that this provider supports.
* @property {string} [crawlContainer] CSS selector for the container holding listing items.
* @property {(raw: any) => ParsedListing} normalize Function to convert raw scraped data into a ParsedListing shape.
* @property {(listing: ParsedListing) => boolean} filter Function to filter out unwanted listings.
* @property {(url: string, waitForSelector?: string) => Promise<any[]>} [getListings] Optional override to fetch listings.
* @property {(listing:ParsedListing, browser:any)=>Promise<ParsedListing>} [providerConfig.fetchDetails] Optional per-listing detail enrichment. Called in parallel for each new listing after deduplication. Receives the shared browser instance. Must always resolve (never reject).
* @property {Object} [puppeteerOptions] Puppeteer specific options.
* @property {boolean} [enabled] Whether the provider is enabled.
* @property {(url: string) => Promise<number> | number} [activeTester] Function to check if a listing is still active.
*/
export {};

View File

@@ -0,0 +1,11 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
/**
* @typedef {Object} SimilarityCache
* @property {(params: { title?: string, address?: string, price?: number|string }) => boolean} checkAndAddEntry Checks if a listing is similar and adds it if not.
*/
export {};

View File

@@ -0,0 +1,18 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
/**
* Extract the first number from a string like "1.234 €" or "70 m²".
* Removes dots/commas before parsing. Returns null on invalid input.
* @param {string|undefined|null} str
* @returns {number|null}
*/
export const extractNumber = (str) => {
if (str == null) return null;
if (typeof str === 'number') return str;
const cleaned = str.replace(/\./g, '').replace(',', '.');
const num = parseFloat(cleaned);
return isNaN(num) ? null : num;
};

View File

@@ -0,0 +1,29 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
/** @import { ParsedListing } from '../types/listing.js' */
/**
* @typedef {Omit<import('../types/listing.js').ParsedListing, 'price' | 'size' | 'rooms'> & {
* price: string | null,
* size: string | null,
* rooms: string | null,
* }} FormattedListing
*/
/**
* Formats a listing's numerical fields (price, size, rooms) into strings with their respective units.
*
* @param {import('../types/listing.js').ParsedListing} listing The original listing object.
* @returns {FormattedListing} A copy of the listing with formatted strings for price, size, and rooms.
*/
export const formatListing = (listing) => {
return {
...listing,
price: listing.price != null ? `${listing.price}` : null,
size: listing.size != null ? `${listing.size}` : null,
rooms: listing.rooms != null ? `${listing.rooms} Zimmer` : null,
};
};

View File

@@ -17,13 +17,22 @@ describe('Issue reproduction: listings filtered by similarity or area should be
const providerConfig = { const providerConfig = {
url: 'http://example.com', url: 'http://example.com',
getListings: () => Promise.resolve([{ id: '1', title: 'test', address: 'addr', price: '100' }]), getListings: () =>
Promise.resolve([{ id: '1', title: 'test', address: 'addr', price: '100', link: 'http://example.com/1' }]),
normalize: (l) => l, normalize: (l) => l,
filter: () => true, filter: () => true,
crawlFields: { id: 'id', title: 'title', address: 'address', price: 'price' }, crawlFields: { id: 'id', title: 'title', address: 'address', price: 'price' },
fieldNames: ['id', 'title', 'address', 'price'],
}; };
const fredy = new Fredy(providerConfig, null, null, 'test-provider', 'test-job', mockSimilarityCache); const mockedJob = {
id: 'test-job',
notificationAdapter: null,
specFilter: null,
spatialFilter: null,
};
const fredy = new Fredy(providerConfig, mockedJob, 'test-provider', mockSimilarityCache, undefined);
// Clear deletedIds before test // Clear deletedIds before test
mockStore.deletedIds.length = 0; mockStore.deletedIds.length = 0;
@@ -64,18 +73,35 @@ describe('Issue reproduction: listings filtered by similarity or area should be
], ],
}; };
const mockedJob = {
id: 'test-job',
notificationAdapter: null,
specFilter: null,
spatialFilter: spatialFilter,
};
const providerConfig = { const providerConfig = {
url: 'http://example.com', url: 'http://example.com',
getListings: () => getListings: () =>
Promise.resolve([{ id: '2', title: 'test', address: 'addr', price: '100', latitude: 2, longitude: 2 }]), // outside polygon Promise.resolve([
{
id: '2',
title: 'test',
address: 'addr',
price: '100',
latitude: 2,
longitude: 2,
link: 'http://example.com/2',
},
]), // outside polygon
normalize: (l) => l, normalize: (l) => l,
filter: () => true, filter: () => true,
crawlFields: { id: 'id', title: 'title', address: 'address', price: 'price' }, crawlFields: { id: 'id', title: 'title', address: 'address', price: 'price' },
fieldNames: ['id', 'title', 'address', 'price'],
}; };
const fredy = new Fredy(providerConfig, null, spatialFilter, 'test-provider', 'test-job', mockSimilarityCache); const fredy = new Fredy(providerConfig, mockedJob, 'test-provider', mockSimilarityCache, undefined);
// Clear deletedIds before test
mockStore.deletedIds.length = 0; mockStore.deletedIds.length = 0;
try { try {

View File

@@ -10,18 +10,17 @@ import { expect } from 'vitest';
import * as provider from '../../lib/provider/einsAImmobilien.js'; import * as provider from '../../lib/provider/einsAImmobilien.js';
describe('#einsAImmobilien testsuite()', () => { describe('#einsAImmobilien testsuite()', () => {
provider.init(providerConfig.einsAImmobilien, [], []); provider.init(providerConfig.einsAImmobilien, []);
it('should test einsAImmobilien provider', async () => { it('should test einsAImmobilien provider', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
const mockedJob = {
id: 'einsAImmobilien',
notificationAdapter: null,
spatialFilter: null,
specFilter: null,
};
return await new Promise((resolve, reject) => { return await new Promise((resolve, reject) => {
const fredy = new Fredy( const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
provider.config,
null,
null,
provider.metaInformation.id,
'einsAImmobilien',
similarityCache,
);
fredy.execute().then((listings) => { fredy.execute().then((listings) => {
if (listings == null || listings.length === 0) { if (listings == null || listings.length === 0) {
reject('Listings is empty!'); reject('Listings is empty!');
@@ -35,12 +34,14 @@ describe('#einsAImmobilien testsuite()', () => {
/** check the actual structure **/ /** check the actual structure **/
expect(notify.id).toBeTypeOf('string'); expect(notify.id).toBeTypeOf('string');
expect(notify.price).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string');
expect(notify.price).toContain('€');
expect(notify.size).toBeTypeOf('string'); expect(notify.size).toBeTypeOf('string');
expect(notify.size).toContain('m²');
expect(notify.title).toBeTypeOf('string'); expect(notify.title).toBeTypeOf('string');
expect(notify.link).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string');
expect(notify.address).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string');
/** check the values if possible **/ /** check the values if possible **/
expect(notify.size).not.toBe(''); expect(notify.size).toBeTypeOf('string');
expect(notify.title).not.toBe(''); expect(notify.title).not.toBe('');
expect(notify.link).toContain('https://www.1a-immobilienmarkt.de'); expect(notify.link).toContain('https://www.1a-immobilienmarkt.de');
}); });

View File

@@ -13,8 +13,15 @@ import * as mockStore from '../mocks/mockStore.js';
describe('#immobilien.de testsuite()', () => { describe('#immobilien.de testsuite()', () => {
provider.init(providerConfig.immobilienDe, [], []); provider.init(providerConfig.immobilienDe, [], []);
it('should test immobilien.de provider', async () => { it('should test immobilien.de provider', async () => {
const mockedJob = {
id: 'test1',
notificationAdapter: null,
spatialFilter: null,
specFilter: null,
};
const Fredy = await mockFredy(); const Fredy = await mockFredy();
const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'test1', similarityCache); const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
const listing = await fredy.execute(); const listing = await fredy.execute();
if (listing == null || listing.length === 0) { if (listing == null || listing.length === 0) {
@@ -55,9 +62,15 @@ describe('#immobilien.de testsuite()', () => {
it('should enrich listings with details', async () => { it('should enrich listings with details', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
provider.init(providerConfig.immobilienDe, [], []); provider.init(providerConfig.immobilienDe, [], []);
const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'test1', { const mockedJob = { id: 'test1', notificationAdapter: null, specFilter: null, spatialFilter: null };
checkAndAddEntry: () => false,
}); const fredy = new Fredy(
provider.config,
mockedJob,
provider.metaInformation.id,
{ checkAndAddEntry: () => false },
undefined,
);
const listings = await fredy.execute(); const listings = await fredy.execute();
if (listings == null) return; if (listings == null) return;
expect(listings).toBeInstanceOf(Array); expect(listings).toBeInstanceOf(Array);

View File

@@ -14,8 +14,15 @@ describe('#immoscout provider testsuite()', () => {
provider.init(providerConfig.immoscout, [], []); provider.init(providerConfig.immoscout, [], []);
it('should test immoscout provider', async () => { it('should test immoscout provider', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
const mockedJob = {
id: '',
notificationAdapter: null,
spatialFilter: null,
specFilter: null,
};
return await new Promise((resolve, reject) => { return await new Promise((resolve, reject) => {
const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, '', similarityCache); const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
fredy.execute().then((listings) => { fredy.execute().then((listings) => {
if (listings == null || listings.length === 0) { if (listings == null || listings.length === 0) {
reject('Listings is empty!'); reject('Listings is empty!');
@@ -25,20 +32,24 @@ describe('#immoscout provider testsuite()', () => {
expect(listings).toBeInstanceOf(Array); expect(listings).toBeInstanceOf(Array);
const notificationObj = get(); const notificationObj = get();
expect(notificationObj).toBeTypeOf('object'); expect(notificationObj).toBeTypeOf('object');
expect(notificationObj.serviceName).toBe('immoscout');
notificationObj.payload.forEach((notify) => { // check if there is at least one valid notification
/** check the actual structure **/ const hasValidNotification = notificationObj.payload.some((notify) => {
expect(notify.id).toBeTypeOf('string'); return (
expect(notify.price).toBeTypeOf('string'); typeof notify.id === 'string' &&
expect(notify.size).toBeTypeOf('string'); typeof notify.price === 'string' &&
expect(notify.title).toBeTypeOf('string'); notify.price.includes('€') &&
expect(notify.link).toBeTypeOf('string'); typeof notify.size === 'string' &&
expect(notify.address).toBeTypeOf('string'); notify.size.includes('m²') &&
/** check the values if possible **/ typeof notify.title === 'string' &&
expect(notify.size).not.toBe(''); notify.title !== '' &&
expect(notify.title).not.toBe(''); typeof notify.link === 'string' &&
expect(notify.link).toContain('https://www.immobilienscout24.de/'); notify.link.includes('https://www.immobilienscout24.de/') &&
typeof notify.address === 'string'
);
}); });
expect(hasValidNotification).toBe(true);
resolve(); resolve();
}); });
}); });
@@ -57,9 +68,14 @@ describe('#immoscout provider testsuite()', () => {
it('should enrich listings with details', async () => { it('should enrich listings with details', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
provider.init(providerConfig.immoscout, [], []); provider.init(providerConfig.immoscout, [], []);
const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, '', { const mockedJob = { id: '', notificationAdapter: null, specFilter: null, spatialFilter: null };
checkAndAddEntry: () => false, const fredy = new Fredy(
}); provider.config,
mockedJob,
provider.metaInformation.id,
{ checkAndAddEntry: () => false },
undefined,
);
const listings = await fredy.execute(); const listings = await fredy.execute();
expect(listings).toBeInstanceOf(Array); expect(listings).toBeInstanceOf(Array);
listings.forEach((listing) => { listings.forEach((listing) => {

View File

@@ -13,8 +13,16 @@ describe('#immoswp testsuite()', () => {
provider.init(providerConfig.immoswp, [], []); provider.init(providerConfig.immoswp, [], []);
it('should test immoswp provider', async () => { it('should test immoswp provider', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
const mockedJob = {
id: 'immoswp',
notificationAdapter: null,
spatialFilter: null,
specFilter: null,
};
return await new Promise((resolve, reject) => { return await new Promise((resolve, reject) => {
const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'immoswp', similarityCache); const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
fredy.execute().then((listing) => { fredy.execute().then((listing) => {
if (listing == null || listing.length === 0) { if (listing == null || listing.length === 0) {
reject('Listings is empty!'); reject('Listings is empty!');
@@ -29,11 +37,13 @@ describe('#immoswp testsuite()', () => {
/** check the actual structure **/ /** check the actual structure **/
expect(notify.id).toBeTypeOf('string'); expect(notify.id).toBeTypeOf('string');
expect(notify.price).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string');
expect(notify.price).toContain('€');
expect(notify.size).toBeTypeOf('string'); expect(notify.size).toBeTypeOf('string');
expect(notify.size).toContain('m²');
expect(notify.title).toBeTypeOf('string'); expect(notify.title).toBeTypeOf('string');
expect(notify.link).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string');
/** check the values if possible **/ /** check the values if possible **/
expect(notify.price).toContain('€'); expect(notify.size).toBeTypeOf('string');
expect(notify.title).not.toBe(''); expect(notify.title).not.toBe('');
expect(notify.link).toContain('https://immo.swp.de'); expect(notify.link).toContain('https://immo.swp.de');
}); });

View File

@@ -13,9 +13,16 @@ import * as mockStore from '../mocks/mockStore.js';
describe('#immowelt testsuite()', () => { describe('#immowelt testsuite()', () => {
it('should test immowelt provider', async () => { it('should test immowelt provider', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
const mockedJob = {
id: 'immowelt',
notificationAdapter: null,
spatialFilter: null,
specFilter: null,
};
provider.init(providerConfig.immowelt, [], []); provider.init(providerConfig.immowelt, [], []);
const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'immowelt', similarityCache); const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
const listing = await fredy.execute(); const listing = await fredy.execute();
if (listing == null || listing.length === 0) { if (listing == null || listing.length === 0) {
@@ -29,12 +36,16 @@ describe('#immowelt testsuite()', () => {
notificationObj.payload.forEach((notify) => { notificationObj.payload.forEach((notify) => {
/** check the actual structure **/ /** check the actual structure **/
expect(notify.id).toBeTypeOf('string'); expect(notify.id).toBeTypeOf('string');
expect(notify.price).toBeTypeOf('string'); if (notify.price != null) {
expect(notify.price).toBeTypeOf('string');
expect(notify.price).toContain('€');
}
expect(notify.title).toBeTypeOf('string'); expect(notify.title).toBeTypeOf('string');
expect(notify.link).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string');
expect(notify.address).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string');
/** check the values if possible **/ /** check the values if possible **/
if (notify.size != null && notify.size.trim().toLowerCase() !== 'k.a.') { if (notify.size != null && notify.size.trim().toLowerCase() !== 'k.a.') {
expect(notify.size).toBeTypeOf('string');
expect(notify.size).toContain('m²'); expect(notify.size).toContain('m²');
} }
expect(notify.title).not.toBe(''); expect(notify.title).not.toBe('');
@@ -56,9 +67,15 @@ describe('#immowelt testsuite()', () => {
it('should enrich listings with details', async () => { it('should enrich listings with details', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
provider.init(providerConfig.immowelt, [], []); provider.init(providerConfig.immowelt, [], []);
const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'immowelt', { const mockedJob = { id: 'immowelt', notificationAdapter: null, specFilter: null, spatialFilter: null };
checkAndAddEntry: () => false,
}); const fredy = new Fredy(
provider.config,
mockedJob,
provider.metaInformation.id,
{ checkAndAddEntry: () => false },
undefined,
);
const listings = await fredy.execute(); const listings = await fredy.execute();
expect(listings).toBeInstanceOf(Array); expect(listings).toBeInstanceOf(Array);
listings.forEach((listing) => { listings.forEach((listing) => {

View File

@@ -13,16 +13,16 @@ import * as mockStore from '../mocks/mockStore.js';
describe('#kleinanzeigen testsuite()', () => { describe('#kleinanzeigen testsuite()', () => {
it('should test kleinanzeigen provider', async () => { it('should test kleinanzeigen provider', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
const mockedJob = {
id: 'kleinanzeigen',
notificationAdapter: null,
spatialFilter: null,
specFilter: null,
};
provider.init(providerConfig.kleinanzeigen, [], []); provider.init(providerConfig.kleinanzeigen, [], []);
return await new Promise((resolve, reject) => { return await new Promise((resolve, reject) => {
const fredy = new Fredy( const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
provider.config,
null,
null,
provider.metaInformation.id,
'kleinanzeigen',
similarityCache,
);
fredy.execute().then((listing) => { fredy.execute().then((listing) => {
if (listing == null || listing.length === 0) { if (listing == null || listing.length === 0) {
reject('Listings is empty!'); reject('Listings is empty!');
@@ -62,9 +62,15 @@ describe('#kleinanzeigen testsuite()', () => {
it('should enrich listings with details', async () => { it('should enrich listings with details', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
provider.init(providerConfig.kleinanzeigen, [], []); provider.init(providerConfig.kleinanzeigen, [], []);
const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'kleinanzeigen', { const mockedJob = { id: 'kleinanzeigen', notificationAdapter: null, specFilter: null, spatialFilter: null };
checkAndAddEntry: () => false,
}); const fredy = new Fredy(
provider.config,
mockedJob,
provider.metaInformation.id,
{ checkAndAddEntry: () => false },
undefined,
);
const listings = await fredy.execute(); const listings = await fredy.execute();
expect(listings).toBeInstanceOf(Array); expect(listings).toBeInstanceOf(Array);
listings.forEach((listing) => { listings.forEach((listing) => {

View File

@@ -12,9 +12,16 @@ import * as provider from '../../lib/provider/mcMakler.js';
describe('#mcMakler testsuite()', () => { describe('#mcMakler testsuite()', () => {
it('should test mcMakler provider', async () => { it('should test mcMakler provider', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
const mockedJob = {
id: 'mcMakler',
notificationAdapter: null,
spatialFilter: null,
specFilter: null,
};
provider.init(providerConfig.mcMakler, []); provider.init(providerConfig.mcMakler, []);
const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'mcMakler', similarityCache); const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
const listing = await fredy.execute(); const listing = await fredy.execute();
if (listing == null || listing.length === 0) { if (listing == null || listing.length === 0) {
@@ -29,12 +36,14 @@ describe('#mcMakler testsuite()', () => {
/** check the actual structure **/ /** check the actual structure **/
expect(notify.id).toBeTypeOf('string'); expect(notify.id).toBeTypeOf('string');
expect(notify.price).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string');
expect(notify.price).toContain('€');
expect(notify.size).toBeTypeOf('string'); expect(notify.size).toBeTypeOf('string');
expect(notify.size).toContain('m²');
expect(notify.title).toBeTypeOf('string'); expect(notify.title).toBeTypeOf('string');
expect(notify.link).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string');
expect(notify.address).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string');
/** check the values if possible **/ /** check the values if possible **/
expect(notify.size).toContain('m²'); expect(notify.size).toBeTypeOf('string');
expect(notify.title).not.toBe(''); expect(notify.title).not.toBe('');
expect(notify.address).not.toBe(''); expect(notify.address).not.toBe('');
}); });

View File

@@ -13,15 +13,16 @@ describe('#neubauKompass testsuite()', () => {
provider.init(providerConfig.neubauKompass, [], []); provider.init(providerConfig.neubauKompass, [], []);
it('should test neubauKompass provider', async () => { it('should test neubauKompass provider', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
const mockedJob = {
id: 'neubauKompass',
notificationAdapter: null,
spatialFilter: null,
specFilter: null,
};
return await new Promise((resolve, reject) => { return await new Promise((resolve, reject) => {
const fredy = new Fredy( const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
provider.config,
null,
null,
provider.metaInformation.id,
'neubauKompass',
similarityCache,
);
fredy.execute().then((listing) => { fredy.execute().then((listing) => {
if (listing == null || listing.length === 0) { if (listing == null || listing.length === 0) {
reject('Listings is empty!'); reject('Listings is empty!');

View File

@@ -12,9 +12,16 @@ import * as provider from '../../lib/provider/ohneMakler.js';
describe('#ohneMakler testsuite()', () => { describe('#ohneMakler testsuite()', () => {
it('should test ohneMakler provider', async () => { it('should test ohneMakler provider', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
const mockedJob = {
id: 'ohneMakler',
notificationAdapter: null,
spatialFilter: null,
specFilter: null,
};
provider.init(providerConfig.ohneMakler, []); provider.init(providerConfig.ohneMakler, []);
const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'ohneMakler', similarityCache); const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
const listing = await fredy.execute(); const listing = await fredy.execute();
if (listing == null || listing.length === 0) { if (listing == null || listing.length === 0) {
@@ -29,12 +36,14 @@ describe('#ohneMakler testsuite()', () => {
/** check the actual structure **/ /** check the actual structure **/
expect(notify.id).toBeTypeOf('string'); expect(notify.id).toBeTypeOf('string');
expect(notify.price).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string');
expect(notify.price).toContain('€');
expect(notify.size).toBeTypeOf('string'); expect(notify.size).toBeTypeOf('string');
expect(notify.size).toContain('m²');
expect(notify.title).toBeTypeOf('string'); expect(notify.title).toBeTypeOf('string');
expect(notify.link).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string');
expect(notify.address).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string');
/** check the values if possible **/ /** check the values if possible **/
expect(notify.size).toContain('m²'); expect(notify.size).toBeTypeOf('string');
expect(notify.title).not.toBe(''); expect(notify.title).not.toBe('');
expect(notify.address).not.toBe(''); expect(notify.address).not.toBe('');
}); });

View File

@@ -12,16 +12,16 @@ import * as provider from '../../lib/provider/regionalimmobilien24.js';
describe('#regionalimmobilien24 testsuite()', () => { describe('#regionalimmobilien24 testsuite()', () => {
it('should test regionalimmobilien24 provider', async () => { it('should test regionalimmobilien24 provider', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
const mockedJob = {
id: 'regionalimmobilien24',
notificationAdapter: null,
spatialFilter: null,
specFilter: null,
};
provider.init(providerConfig.regionalimmobilien24, []); provider.init(providerConfig.regionalimmobilien24, []);
const fredy = new Fredy( const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
provider.config,
null,
null,
provider.metaInformation.id,
'regionalimmobilien24',
similarityCache,
);
const listing = await fredy.execute(); const listing = await fredy.execute();
if (listing == null || listing.length === 0) { if (listing == null || listing.length === 0) {
@@ -36,12 +36,14 @@ describe('#regionalimmobilien24 testsuite()', () => {
/** check the actual structure **/ /** check the actual structure **/
expect(notify.id).toBeTypeOf('string'); expect(notify.id).toBeTypeOf('string');
expect(notify.price).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string');
expect(notify.price).toContain('€');
expect(notify.size).toBeTypeOf('string'); expect(notify.size).toBeTypeOf('string');
expect(notify.size).toContain('m²');
expect(notify.title).toBeTypeOf('string'); expect(notify.title).toBeTypeOf('string');
expect(notify.link).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string');
expect(notify.address).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string');
/** check the values if possible **/ /** check the values if possible **/
expect(notify.size).toContain('m²'); expect(notify.size).toBeTypeOf('string');
expect(notify.title).not.toBe(''); expect(notify.title).not.toBe('');
expect(notify.address).not.toBe(''); expect(notify.address).not.toBe('');
}); });

View File

@@ -13,9 +13,16 @@ import * as mockStore from '../mocks/mockStore.js';
describe('#sparkasse testsuite()', () => { describe('#sparkasse testsuite()', () => {
it('should test sparkasse provider', async () => { it('should test sparkasse provider', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
const mockedJob = {
id: 'sparkasse',
notificationAdapter: null,
spatialFilter: null,
specFilter: null,
};
provider.init(providerConfig.sparkasse, []); provider.init(providerConfig.sparkasse, []);
const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'sparkasse', similarityCache); const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
const listing = await fredy.execute(); const listing = await fredy.execute();
if (listing == null || listing.length === 0) { if (listing == null || listing.length === 0) {
@@ -30,11 +37,14 @@ describe('#sparkasse testsuite()', () => {
/** check the actual structure **/ /** check the actual structure **/
expect(notify.id).toBeTypeOf('string'); expect(notify.id).toBeTypeOf('string');
expect(notify.price).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string');
expect(notify.price).toContain('€');
expect(notify.size).toBeTypeOf('string');
expect(notify.size).toContain('m²');
expect(notify.title).toBeTypeOf('string'); expect(notify.title).toBeTypeOf('string');
expect(notify.link).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string');
expect(notify.address).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string');
/** check the values if possible **/ /** check the values if possible **/
expect(notify.size).toContain('m²'); expect(notify.size).toBeTypeOf('string');
expect(notify.title).not.toBe(''); expect(notify.title).not.toBe('');
expect(notify.address).not.toBe(''); expect(notify.address).not.toBe('');
}); });
@@ -53,9 +63,15 @@ describe('#sparkasse testsuite()', () => {
it('should enrich listings with details', async () => { it('should enrich listings with details', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
provider.init(providerConfig.sparkasse, []); provider.init(providerConfig.sparkasse, []);
const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'sparkasse', { const mockedJob = { id: 'sparkasse', notificationAdapter: null, specFilter: null, spatialFilter: null };
checkAndAddEntry: () => false,
}); const fredy = new Fredy(
provider.config,
mockedJob,
provider.metaInformation.id,
{ checkAndAddEntry: () => false },
undefined,
);
const listings = await fredy.execute(); const listings = await fredy.execute();
expect(listings).toBeInstanceOf(Array); expect(listings).toBeInstanceOf(Array);
listings.forEach((listing) => { listings.forEach((listing) => {

View File

@@ -12,10 +12,18 @@ import * as mockStore from '../mocks/mockStore.js';
describe('#wgGesucht testsuite()', () => { describe('#wgGesucht testsuite()', () => {
provider.init(providerConfig.wgGesucht, [], []); provider.init(providerConfig.wgGesucht, [], []);
it('should test wgGesucht provider', async () => { it('should test wgGesucht provider', { timeout: 120000 }, async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
const mockedJob = {
id: 'wgGesucht',
notificationAdapter: null,
spatialFilter: null,
specFilter: null,
};
return await new Promise((resolve, reject) => { return await new Promise((resolve, reject) => {
const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'wgGesucht', similarityCache); const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
fredy.execute().then((listing) => { fredy.execute().then((listing) => {
if (listing == null || listing.length === 0) { if (listing == null || listing.length === 0) {
reject('Listings is empty!'); reject('Listings is empty!');
@@ -30,8 +38,9 @@ describe('#wgGesucht testsuite()', () => {
/** check the actual structure **/ /** check the actual structure **/
expect(notify.id).toBeTypeOf('string'); expect(notify.id).toBeTypeOf('string');
expect(notify.title).toBeTypeOf('string'); expect(notify.title).toBeTypeOf('string');
expect(notify.details).toBeTypeOf('string'); // expect(notify.details).toBeTypeOf('string');
expect(notify.price).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string');
expect(notify.price).toContain('€');
expect(notify.link).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string');
}); });
resolve(); resolve();
@@ -52,9 +61,15 @@ describe('#wgGesucht testsuite()', () => {
it('should enrich listings with details', async () => { it('should enrich listings with details', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
provider.init(providerConfig.wgGesucht, [], []); provider.init(providerConfig.wgGesucht, [], []);
const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'wgGesucht', { const mockedJob = { id: 'wgGesucht', notificationAdapter: null, specFilter: null, spatialFilter: null };
checkAndAddEntry: () => false,
}); const fredy = new Fredy(
provider.config,
mockedJob,
provider.metaInformation.id,
{ checkAndAddEntry: () => false },
undefined,
);
const listings = await fredy.execute(); const listings = await fredy.execute();
expect(listings).toBeInstanceOf(Array); expect(listings).toBeInstanceOf(Array);
listings.forEach((listing) => { listings.forEach((listing) => {

View File

@@ -13,15 +13,16 @@ describe('#wohnungsboerse testsuite()', () => {
provider.init(providerConfig.wohnungsboerse, [], []); provider.init(providerConfig.wohnungsboerse, [], []);
it('should test wohnungsboerse provider', async () => { it('should test wohnungsboerse provider', async () => {
const Fredy = await mockFredy(); const Fredy = await mockFredy();
const mockedJob = {
id: 'wohnungsboerse',
notificationAdapter: null,
spatialFilter: null,
specFilter: null,
};
return await new Promise((resolve, reject) => { return await new Promise((resolve, reject) => {
const fredy = new Fredy( const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
provider.config,
null,
null,
provider.metaInformation.id,
'wohnungsboerse',
similarityCache,
);
fredy.execute().then((listings) => { fredy.execute().then((listings) => {
if (listings == null || listings.length === 0) { if (listings == null || listings.length === 0) {
reject('Listings is empty!'); reject('Listings is empty!');
@@ -36,12 +37,14 @@ describe('#wohnungsboerse testsuite()', () => {
/** check the actual structure **/ /** check the actual structure **/
expect(notify.id).toBeTypeOf('string'); expect(notify.id).toBeTypeOf('string');
expect(notify.price).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string');
expect(notify.price).toContain('€');
expect(notify.size).toBeTypeOf('string'); expect(notify.size).toBeTypeOf('string');
expect(notify.size).toContain('m²');
expect(notify.title).toBeTypeOf('string'); expect(notify.title).toBeTypeOf('string');
expect(notify.link).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string');
expect(notify.address).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string');
/** check the values if possible **/ /** check the values if possible **/
expect(notify.size).not.toBe(''); expect(notify.size).toBeTypeOf('string');
expect(notify.title).not.toBe(''); expect(notify.title).not.toBe('');
expect(notify.link).toContain('https://www.wohnungsboerse.net'); expect(notify.link).toContain('https://www.wohnungsboerse.net');
}); });

View File

@@ -8,7 +8,9 @@ import { readFile } from 'fs/promises';
import * as mockStore from './mocks/mockStore.js'; import * as mockStore from './mocks/mockStore.js';
import { send } from './mocks/mockNotification.js'; import { send } from './mocks/mockNotification.js';
export const providerConfig = JSON.parse(await readFile(new URL('./provider/testProvider.json', import.meta.url))); export const providerConfig = JSON.parse(
await readFile(new URL('./provider/testProvider.json', import.meta.url), 'utf-8'),
);
vi.mock('../lib/services/storage/listingsStorage.js', () => mockStore); vi.mock('../lib/services/storage/listingsStorage.js', () => mockStore);
vi.mock('../lib/services/storage/settingsStorage.js', () => mockStore); vi.mock('../lib/services/storage/settingsStorage.js', () => mockStore);
@@ -20,7 +22,10 @@ vi.mock('../lib/services/storage/jobStorage.js', () => ({
})); }));
vi.mock('../lib/notification/notify.js', () => ({ send })); vi.mock('../lib/notification/notify.js', () => ({ send }));
/**
* @returns {Promise<typeof import('../lib/FredyPipelineExecutioner.js').default>}
*/
export const mockFredy = async () => { export const mockFredy = async () => {
const mod = await import('../lib/FredyPipelineExecutioner.js'); const mod = await import('../lib/FredyPipelineExecutioner.js');
return mod.default ?? mod; return mod.default;
}; };

View File

@@ -25,6 +25,7 @@ import {
Empty, Empty,
Radio, Radio,
RadioGroup, RadioGroup,
Space,
} from '@douyinfe/semi-ui-19'; } from '@douyinfe/semi-ui-19';
import { import {
IconBriefcase, IconBriefcase,
@@ -293,12 +294,14 @@ const ListingsGrid = () => {
> >
{item.address || 'No address provided'} {item.address || 'No address provided'}
</Text> </Text>
<Text type="tertiary" size="small" icon={<IconClock />}> <Space spacing={12} wrap>
{timeService.format(item.created_at, false)} <Text type="tertiary" size="small" icon={<IconBriefcase />}>
</Text> {item.provider.charAt(0).toUpperCase() + item.provider.slice(1)}
<Text type="tertiary" size="small" icon={<IconBriefcase />}> </Text>
{item.provider.charAt(0).toUpperCase() + item.provider.slice(1)} <Text type="tertiary" size="small" icon={<IconClock />}>
</Text> {timeService.format(item.created_at, false)}
</Text>
</Space>
{item.distance_to_destination ? ( {item.distance_to_destination ? (
<Text type="tertiary" size="small" icon={<IconActivity />}> <Text type="tertiary" size="small" icon={<IconActivity />}>
{item.distance_to_destination} m to chosen address {item.distance_to_destination} m to chosen address

View File

@@ -69,6 +69,7 @@
} }
&--inactive { &--inactive {
.listingsGrid__imageContainer, .listingsGrid__imageContainer,
.listingsGrid__content { .listingsGrid__content {
opacity: 0.6; opacity: 0.6;
@@ -169,4 +170,16 @@
background: var(--semi-color-primary-hover); background: var(--semi-color-primary-hover);
} }
} }
// Ensure icons and text are vertically aligned
.semi-typography {
display: inline-flex;
align-items: center;
.semi-typography-icon {
display: flex;
align-items: center;
margin-top: 1px; // Minor nudge if needed, but flex should handle most
}
}
} }

View File

@@ -24,9 +24,15 @@ import {
IconPlayCircle, IconPlayCircle,
IconPlusCircle, IconPlusCircle,
IconUser, IconUser,
IconClear, IconFilter,
} from '@douyinfe/semi-icons'; } from '@douyinfe/semi-icons';
const SPEC_FILTERS = [
{ key: 'maxPrice', translation: 'Max Price' },
{ key: 'minSize', translation: 'Min Size (m²)' },
{ key: 'minRooms', translation: 'Min Rooms' },
];
export default function JobMutator() { export default function JobMutator() {
const jobs = useSelector((state) => state.jobsData.jobs); const jobs = useSelector((state) => state.jobsData.jobs);
const shareableUserList = useSelector((state) => state.jobsData.shareableUserList); const shareableUserList = useSelector((state) => state.jobsData.shareableUserList);
@@ -46,6 +52,7 @@ export default function JobMutator() {
const defaultEnabled = sourceJob?.enabled ?? true; const defaultEnabled = sourceJob?.enabled ?? true;
const defaultShareWithUsers = sourceJob?.shared_with_user ?? []; const defaultShareWithUsers = sourceJob?.shared_with_user ?? [];
const defaultSpatialFilter = sourceJob?.spatialFilter || null; const defaultSpatialFilter = sourceJob?.spatialFilter || null;
const defaultSpecFilter = sourceJob?.specFilter || null;
const [providerToEdit, setProviderToEdit] = useState(null); const [providerToEdit, setProviderToEdit] = useState(null);
const [providerCreationVisible, setProviderCreationVisibility] = useState(false); const [providerCreationVisible, setProviderCreationVisibility] = useState(false);
@@ -58,6 +65,7 @@ export default function JobMutator() {
const [shareWithUsers, setShareWithUsers] = useState(defaultShareWithUsers); const [shareWithUsers, setShareWithUsers] = useState(defaultShareWithUsers);
const [enabled, setEnabled] = useState(defaultEnabled); const [enabled, setEnabled] = useState(defaultEnabled);
const [spatialFilter, setSpatialFilter] = useState(defaultSpatialFilter); const [spatialFilter, setSpatialFilter] = useState(defaultSpatialFilter);
const [specFilter, setSpecFilter] = useState(defaultSpecFilter);
const navigate = useNavigate(); const navigate = useNavigate();
const actions = useActions(); const actions = useActions();
@@ -66,6 +74,12 @@ export default function JobMutator() {
setSpatialFilter(data); setSpatialFilter(data);
}, []); }, []);
const handleSpecFilterChange = (key, value) => {
if (!SPEC_FILTERS.map(({ key }) => key).includes(key)) return;
setSpecFilter({ ...specFilter, [key]: value ? parseFloat(value) : null });
};
const isSavingEnabled = () => { const isSavingEnabled = () => {
return Boolean(notificationAdapterData.length && providerData.length && name); return Boolean(notificationAdapterData.length && providerData.length && name);
}; };
@@ -85,6 +99,7 @@ export default function JobMutator() {
name, name,
blacklist, blacklist,
spatialFilter, spatialFilter,
specFilter,
enabled, enabled,
jobId: jobToBeEdit?.id || null, jobId: jobToBeEdit?.id || null,
}); });
@@ -204,7 +219,7 @@ export default function JobMutator() {
</SegmentPart> </SegmentPart>
<Divider margin="1rem" /> <Divider margin="1rem" />
<SegmentPart <SegmentPart
Icon={IconClear} Icon={IconFilter}
name="Blacklist" name="Blacklist"
helpText="If a listing contains one of these words, it will be filtered out. Type in a word, then hit enter." helpText="If a listing contains one of these words, it will be filtered out. Type in a word, then hit enter."
> >
@@ -216,6 +231,27 @@ export default function JobMutator() {
</SegmentPart> </SegmentPart>
<Divider margin="1rem" /> <Divider margin="1rem" />
<SegmentPart <SegmentPart
Icon={IconFilter}
name="Criteria Filter"
helpText="Filter listings by specific criteria. Only numbers are allowed. You can leave fields empty if you don't want to filter by them."
>
<div className="jobMutation__specFilter">
{SPEC_FILTERS.map((filter) => (
<div key={filter.key} className="jobMutation__specFilterItem">
<div className="jobMutation__specFilterLabel">{filter.translation}</div>
<Input
type="number"
placeholder="Add a number"
value={specFilter?.[filter.key]}
onChange={(value) => handleSpecFilterChange(filter.key, value)}
/>
</div>
))}
</div>
</SegmentPart>
<Divider margin="1rem" />
<SegmentPart
Icon={IconFilter}
name="Area Filter" name="Area Filter"
helpText="Define multiple geographic areas on the map to filter listings. Start drawing by clicking on the square symbol in the top left corner of the map. Click on the map to add points of the polygon. Select the first point to close the polygon. After that, click on a free area of the map to apply this polygon (the color will change from yellow to blue). To delete a polygon, select it first and then click on the trash symbol." helpText="Define multiple geographic areas on the map to filter listings. Start drawing by clicking on the square symbol in the top left corner of the map. Click on the map to add points of the polygon. Select the first point to close the polygon. After that, click on a free area of the map to apply this polygon (the color will change from yellow to blue). To delete a polygon, select it first and then click on the trash symbol."
> >

View File

@@ -3,6 +3,24 @@
float: right; float: right;
margin-bottom: 1rem; margin-bottom: 1rem;
} }
&__specFilter {
display: flex;
gap: 1.5rem;
flex-wrap: wrap;
}
&__specFilterItem {
display: flex;
flex-direction: column;
gap: 0.5rem;
flex: 1;
min-width: 150px;
}
&__specFilterLabel {
font-weight: 500;
}
} }
.semi-select-option-list-wrapper { .semi-select-option-list-wrapper {

View File

@@ -31,7 +31,8 @@ import {
IconLink, IconLink,
IconStar, IconStar,
IconStarStroked, IconStarStroked,
IconRealSize, IconExpand,
IconGridView,
} from '@douyinfe/semi-icons'; } from '@douyinfe/semi-icons';
import maplibregl from 'maplibre-gl'; import maplibregl from 'maplibre-gl';
import 'maplibre-gl/dist/maplibre-gl.css'; import 'maplibre-gl/dist/maplibre-gl.css';
@@ -259,6 +260,17 @@ export default function ListingDetail() {
if (!listing) return null; if (!listing) return null;
const data = [ const data = [
{ key: 'Price', value: `${listing.price}`, Icon: <IconCart /> },
{
key: 'Size',
value: listing.size ? `${listing.size}` : 'N/A',
Icon: <IconExpand />,
},
{
key: 'Rooms',
value: listing.rooms ? `${listing.rooms} Rooms` : 'N/A',
Icon: <IconGridView />,
},
{ {
key: 'Job', key: 'Job',
value: listing.job_name, value: listing.job_name,
@@ -269,12 +281,6 @@ export default function ListingDetail() {
value: listing.provider.charAt(0).toUpperCase() + listing.provider.slice(1), value: listing.provider.charAt(0).toUpperCase() + listing.provider.slice(1),
Icon: <IconBriefcase />, Icon: <IconBriefcase />,
}, },
{ key: 'Price', value: `${listing.price}`, Icon: <IconCart /> },
{
key: 'Size',
value: listing.size ? `${listing.size}` : 'N/A',
Icon: <IconRealSize />,
},
{ {
key: 'Added', key: 'Added',
value: timeService.format(listing.created_at), value: timeService.format(listing.created_at),