From 10c94eea0af4226f31795ad9ca1410156212bc5f Mon Sep 17 00:00:00 2001 From: Stephan Date: Sun, 12 Apr 2026 09:17:23 +0200 Subject: [PATCH] Feature/spec filter (#276) * feat(): create map component, add area filtering to the job config * feat(): filter listings by area filter * chore(): cleanup * feat(): solve feedback * feat(): solve most providers * feat(): solve maybe other providers * feat(): add specFilter config, also add rooms to listing * feat(): change tests * feat(): fix kleinanzeigen parser * feat(): add spec filter switch for listing overviiews * feat(): add rooms and size to the overview and detail of a listing * feat(): rem label * feat(): add types, update providers, they now return specs as numbers * feat(): add jsonconfig to enable type checks * feat: add type for prividerConfig, add fieldNames per provider * feat: fix tests, provider, add formatListing * chore: remov duplicates * feat(): fix tests * feat: fix immoscout * chore: geojson typing * feat: solve requested changes --- jsconfig.json | 12 ++ lib/FredyPipelineExecutioner.js | 171 +++++++++++------- lib/api/routes/jobRouter.js | 2 + lib/provider/einsAImmobilien.js | 29 ++- lib/provider/immobilienDe.js | 31 +++- lib/provider/immoscout.js | 30 ++- lib/provider/immoswp.js | 34 +++- lib/provider/immowelt.js | 28 ++- lib/provider/kleinanzeigen.js | 35 +++- lib/provider/mcMakler.js | 33 +++- lib/provider/neubauKompass.js | 29 ++- lib/provider/ohneMakler.js | 27 ++- lib/provider/regionalimmobilien24.js | 28 ++- lib/provider/sparkasse.js | 31 +++- lib/provider/wgGesucht.js | 29 ++- lib/provider/wohnungsboerse.js | 31 +++- lib/services/jobs/jobExecutionService.js | 10 +- lib/services/storage/jobStorage.js | 16 +- lib/services/storage/listingsStorage.js | 22 +-- .../migrations/sql/15.add-listing-specs.js | 10 + .../sql/16.add-rooms-to-listings.js | 10 + lib/types/browser.js | 10 + lib/types/filter.js | 19 ++ lib/types/job.js | 23 +++ lib/types/listing.js | 22 +++ lib/types/providerConfig.js | 25 +++ lib/types/similarityCache.js | 11 ++ lib/utils/extract-number.js | 18 ++ lib/utils/formatListing.js | 29 +++ test/pipeline_filtering.test.js | 36 +++- test/provider/einsAImmobilien.test.js | 21 ++- test/provider/immobilienDe.test.js | 21 ++- test/provider/immoscout.test.js | 50 +++-- test/provider/immoswp.test.js | 14 +- test/provider/immowelt.test.js | 27 ++- test/provider/kleinanzeigen.test.js | 28 +-- test/provider/mcMakler.test.js | 13 +- test/provider/neubauKompass.test.js | 17 +- test/provider/ohneMakler.test.js | 13 +- test/provider/regionalimmobilien24.test.js | 20 +- test/provider/sparkasse.test.js | 26 ++- test/provider/wgGesucht.test.js | 27 ++- test/provider/wohnungsboerse.test.js | 21 ++- test/utils.js | 9 +- .../components/grid/listings/ListingsGrid.jsx | 15 +- .../grid/listings/ListingsGrid.less | 13 ++ ui/src/views/jobs/mutation/JobMutation.jsx | 40 +++- ui/src/views/jobs/mutation/JobMutation.less | 18 ++ ui/src/views/listings/ListingDetail.jsx | 20 +- 49 files changed, 1004 insertions(+), 250 deletions(-) create mode 100644 jsconfig.json create mode 100644 lib/services/storage/migrations/sql/15.add-listing-specs.js create mode 100644 lib/services/storage/migrations/sql/16.add-rooms-to-listings.js create mode 100644 lib/types/browser.js create mode 100644 lib/types/filter.js create mode 100644 lib/types/job.js create mode 100644 lib/types/listing.js create mode 100644 lib/types/providerConfig.js create mode 100644 lib/types/similarityCache.js create mode 100644 lib/utils/extract-number.js create mode 100644 lib/utils/formatListing.js diff --git a/jsconfig.json b/jsconfig.json new file mode 100644 index 0000000..ce5fa56 --- /dev/null +++ b/jsconfig.json @@ -0,0 +1,12 @@ +{ + "compilerOptions": { + "module": "NodeNext", + "moduleResolution": "NodeNext", + "target": "ESNext", + "checkJs": true, + "allowJs": true, + "noEmit": true, + "strict": false + }, + "exclude": ["node_modules", "ui"] +} diff --git a/lib/FredyPipelineExecutioner.js b/lib/FredyPipelineExecutioner.js index f7f2c31..c7e79f1 100755 --- a/lib/FredyPipelineExecutioner.js +++ b/lib/FredyPipelineExecutioner.js @@ -19,22 +19,14 @@ import { distanceMeters } from './services/listings/distanceCalculator.js'; import { getUserSettings } from './services/storage/settingsStorage.js'; import { updateListingDistance } from './services/storage/listingsStorage.js'; import booleanPointInPolygon from '@turf/boolean-point-in-polygon'; +import { formatListing } from './utils/formatListing.js'; -/** - * @typedef {Object} Listing - * @property {string} id Stable unique identifier (hash) of the listing. - * @property {string} title Title or headline of the listing. - * @property {string} [address] Optional address/location text. - * @property {string} [price] Optional price text/value. - * @property {string} [url] Link to the listing detail page. - * @property {any} [meta] Provider-specific additional metadata. - */ - -/** - * @typedef {Object} SimilarityCache - * @property {(title:string, address?:string)=>boolean} hasSimilarEntries Returns true if a similar entry is known. - * @property {(title:string, address?:string)=>void} addCacheEntry Adds a new entry to the similarity cache. - */ +/** @import { ParsedListing } from './types/listing.js' */ +/** @import { Job } from './types/job.js' */ +/** @import { ProviderConfig } from './types/providerConfig.js' */ +/** @import { SpecFilter, SpatialFilter } from './types/filter.js' */ +/** @import { SimilarityCache } from './types/similarityCache.js' */ +/** @import { Browser } from './types/browser.js' */ /** * Runtime orchestrator for fetching, normalizing, filtering, deduplicating, storing, @@ -48,43 +40,43 @@ import booleanPointInPolygon from '@turf/boolean-point-in-polygon'; * 5) Identify new listings (vs. previously stored hashes) * 6) Persist new listings * 7) Filter out entries similar to already seen ones - * 8) Dispatch notifications + * 8) Filter out entries that do not match the job's specFilter + * 9) Filter out entries that do not match the job's spatialFilter + * 10) Dispatch notifications */ class FredyPipelineExecutioner { /** * Create a new runtime instance for a single provider/job execution. * - * @param {Object} providerConfig Provider configuration. - * @param {string} providerConfig.url Base URL to crawl. - * @param {string} [providerConfig.sortByDateParam] Query parameter used to enforce sorting by date (provider-specific). - * @param {string} [providerConfig.waitForSelector] CSS selector to wait for before parsing content. - * @param {Object.} providerConfig.crawlFields Mapping of field names to selectors/paths to extract. - * @param {string} providerConfig.crawlContainer CSS selector for the container holding listing items. - * @param {(raw:any)=>Listing} providerConfig.normalize Function to convert raw scraped data into a Listing shape. - * @param {(listing:Listing)=>boolean} providerConfig.filter Function to filter out unwanted listings. - * @param {(url:string, waitForSelector?:string)=>Promise|Promise} [providerConfig.getListings] Optional override to fetch listings. - * @param {(listing:Listing, browser:any)=>Promise} [providerConfig.fetchDetails] Optional per-listing detail enrichment. Called in parallel for each new listing after deduplication. Receives the shared browser instance. Must always resolve (never reject). - * @param {Object} notificationConfig Notification configuration passed to notification adapters. - * @param {Object} spatialFilter Optional spatial filter configuration. + * @param {ProviderConfig} providerConfig Provider configuration. + * @param {Job} job Job configuration. * @param {string} providerId The ID of the provider currently in use. - * @param {string} jobKey Key of the job that is currently running (from within the config). * @param {SimilarityCache} similarityCache Cache instance for checking similar entries. - * @param browser + * @param {Browser} browser Puppeteer browser instance. */ - constructor(providerConfig, notificationConfig, spatialFilter, providerId, jobKey, similarityCache, browser) { + constructor(providerConfig, job, providerId, similarityCache, browser) { + /** @type {ProviderConfig} */ this._providerConfig = providerConfig; - this._notificationConfig = notificationConfig; - this._spatialFilter = spatialFilter; + /** @type {Object} */ + this._jobNotificationConfig = job.notificationAdapter; + /** @type {string} */ + this._jobKey = job.id; + /** @type {SpecFilter | null} */ + this._jobSpecFilter = job.specFilter; + /** @type {SpatialFilter | null} */ + this._jobSpatialFilter = job.spatialFilter; + /** @type {string} */ this._providerId = providerId; - this._jobKey = jobKey; + /** @type {SimilarityCache} */ this._similarityCache = similarityCache; + /** @type {Browser} */ this._browser = browser; } /** * Execute the end-to-end pipeline for a single provider run. * - * @returns {Promise} Resolves to the list of new (and similarity-filtered) listings + * @returns {Promise} Resolves to the list of new (and similarity-filtered) listings * after notifications have been sent; resolves to void when there are no new listings. */ execute() { @@ -98,6 +90,7 @@ class FredyPipelineExecutioner { .then(this._save.bind(this)) .then(this._calculateDistance.bind(this)) .then(this._filterBySimilarListings.bind(this)) + .then(this._filterBySpecs.bind(this)) .then(this._filterByArea.bind(this)) .then(this._notify.bind(this)) .catch(this._handleError.bind(this)); @@ -132,8 +125,8 @@ class FredyPipelineExecutioner { /** * Geocode new listings. * - * @param {Listing[]} newListings New listings to geocode. - * @returns {Promise} Resolves with the listings (potentially with added coordinates). + * @param {ParsedListing[]} newListings New listings to geocode. + * @returns {Promise} Resolves with the listings (potentially with added coordinates). */ async _geocode(newListings) { for (const listing of newListings) { @@ -152,18 +145,18 @@ class FredyPipelineExecutioner { * Filter listings by area using the provider's area filter if available. * Only filters if areaFilter is set on the provider AND the listing has coordinates. * - * @param {Listing[]} newListings New listings to filter by area. - * @returns {Promise} Resolves with listings that are within the area (or not filtered if no area is set). + * @param {ParsedListing[]} newListings New listings to filter by area. + * @returns {ParsedListing[]} Resolves with listings that are within the area (or not filtered if no area is set). */ _filterByArea(newListings) { - const polygonFeatures = this._spatialFilter?.features?.filter((f) => f.geometry?.type === 'Polygon'); + const polygonFeatures = this._jobSpatialFilter?.features?.filter((f) => f.geometry?.type === 'Polygon'); // If no area filter is set, return all listings if (!polygonFeatures?.length) { return newListings; } - const filteredIds = []; + const toDeleteListingByIds = []; // Filter listings by area - keep only those within the polygon const keptListings = newListings.filter((listing) => { // If listing doesn't have coordinates, keep it (don't filter out) @@ -176,14 +169,48 @@ class FredyPipelineExecutioner { const isInPolygon = polygonFeatures.some((feature) => booleanPointInPolygon(point, feature)); if (!isInPolygon) { - filteredIds.push(listing.id); + toDeleteListingByIds.push(listing.id); } return isInPolygon; }); - if (filteredIds.length > 0) { - deleteListingsById(filteredIds); + if (toDeleteListingByIds.length > 0) { + deleteListingsById(toDeleteListingByIds); + } + + return keptListings; + } + + /** + * Filter listings based on its specifications (minRooms, minSize, maxPrice). + * + * @param {ParsedListing[]} newListings New listings to filter. + * @returns {ParsedListing[]} Resolves with listings that pass the specification filters. + */ + _filterBySpecs(newListings) { + const { minRooms, minSize, maxPrice } = this._jobSpecFilter || {}; + + // If no specs are set, return all listings + if (!minRooms && !minSize && !maxPrice) { + return newListings; + } + + const toDeleteListingByIds = []; + const keptListings = newListings.filter((listing) => { + const filterOut = + (minRooms && listing.rooms && listing.rooms < minRooms) || + (minSize && listing.size && listing.size < minSize) || + (maxPrice && listing.price && listing.price > maxPrice); + + if (filterOut) { + toDeleteListingByIds.push(listing.id); + } + return !filterOut; + }); + + if (toDeleteListingByIds.length > 0) { + deleteListingsById(toDeleteListingByIds); } return keptListings; @@ -194,7 +221,7 @@ class FredyPipelineExecutioner { * a provider-specific getListings override is supplied. * * @param {string} url The provider URL to fetch from. - * @returns {Promise} Resolves with an array of listings (empty when none found). + * @returns {Promise} Resolves with an array of listings (empty when none found). */ _getListings(url) { const extractor = new Extractor({ ...this._providerConfig.puppeteerOptions, browser: this._browser }); @@ -217,33 +244,42 @@ class FredyPipelineExecutioner { } /** - * Normalize raw listings into the provider-specific Listing shape. + * Normalize raw listings into the provider-specific ParsedListing shape. * * @param {any[]} listings Raw listing entries from the extractor or override. - * @returns {Listing[]} Normalized listings. + * @returns {ParsedListing[]} Normalized listings. */ _normalize(listings) { - return listings.map(this._providerConfig.normalize); + return listings.map((listing) => this._providerConfig.normalize(listing)); } /** * Filter out listings that are missing required fields and those rejected by the * provider's blacklist/filter function. * - * @param {Listing[]} listings Listings to filter. - * @returns {Listing[]} Filtered listings that pass validation and provider filter. + * @param {ParsedListing[]} listings Listings to filter. + * @returns {ParsedListing[]} Filtered listings that pass validation and provider filter. */ _filter(listings) { - const keys = Object.keys(this._providerConfig.crawlFields); - const filteredListings = listings.filter((item) => keys.every((key) => key in item)); - return filteredListings.filter(this._providerConfig.filter); + const requiredKeys = this._providerConfig.fieldNames; + const requireValues = ['id', 'link', 'title']; + + const filteredListings = listings + // this should never filter some listings out, because the normalize function should always extract all fields. + .filter((item) => requiredKeys.every((key) => key in item)) + // TODO: move blacklist filter to this file, so it will handle for all providers in same way. + .filter(this._providerConfig.filter) + // filter out listings that are missing required fields + .filter((item) => requireValues.every((key) => item[key] != null)); + + return filteredListings; } /** * Determine which listings are new by comparing their IDs against stored hashes. * - * @param {Listing[]} listings Listings to evaluate for novelty. - * @returns {Listing[]} New listings not seen before. + * @param {ParsedListing[]} listings Listings to evaluate for novelty. + * @returns {ParsedListing[]} New listings not seen before. * @throws {NoNewListingsWarning} When no new listings are found. */ _findNew(listings) { @@ -260,23 +296,30 @@ class FredyPipelineExecutioner { /** * Send notifications for new listings using the configured notification adapter(s). * - * @param {Listing[]} newListings New listings to notify about. - * @returns {Promise} Resolves to the provided listings after notifications complete. + * @param {ParsedListing[]} newListings New listings to notify about. + * @returns {Promise} Resolves to the provided listings after notifications complete. * @throws {NoNewListingsWarning} When there are no listings to notify about. */ _notify(newListings) { if (newListings.length === 0) { throw new NoNewListingsWarning(); } - const sendNotifications = notify.send(this._providerId, newListings, this._notificationConfig, this._jobKey); + // TODO: move this to the notification adapter, so it will handle for all providers in same way. + const formattedListings = newListings.map(formatListing); + const sendNotifications = notify.send( + this._providerId, + formattedListings, + this._jobNotificationConfig, + this._jobKey, + ); return Promise.all(sendNotifications).then(() => newListings); } /** * Persist new listings and pass them through. * - * @param {Listing[]} newListings Listings to store. - * @returns {Listing[]} The same listings, unchanged. + * @param {ParsedListing[]} newListings Listings to store. + * @returns {ParsedListing[]} The same listings, unchanged. */ _save(newListings) { logger.debug(`Storing ${newListings.length} new listings (Provider: '${this._providerId}')`); @@ -287,8 +330,8 @@ class FredyPipelineExecutioner { /** * Calculate distance for new listings. * - * @param {Listing[]} listings - * @returns {Listing[]} + * @param {ParsedListing[]} listings + * @returns {ParsedListing[]} * @private */ _calculateDistance(listings) { @@ -324,8 +367,8 @@ class FredyPipelineExecutioner { * Remove listings that are similar to already known entries according to the similarity cache. * Adds the remaining listings to the cache. * - * @param {Listing[]} listings Listings to filter by similarity. - * @returns {Listing[]} Listings considered unique enough to keep. + * @param {ParsedListing[]} listings Listings to filter by similarity. + * @returns {ParsedListing[]} Listings considered unique enough to keep. */ _filterBySimilarListings(listings) { const filteredIds = []; diff --git a/lib/api/routes/jobRouter.js b/lib/api/routes/jobRouter.js index 1798cf7..f059bc8 100644 --- a/lib/api/routes/jobRouter.js +++ b/lib/api/routes/jobRouter.js @@ -172,6 +172,7 @@ jobRouter.post('/', async (req, res) => { enabled, shareWithUsers = [], spatialFilter = null, + specFilter = null, } = req.body; const settings = await getSettings(); try { @@ -197,6 +198,7 @@ jobRouter.post('/', async (req, res) => { notificationAdapter, shareWithUsers, spatialFilter, + specFilter, }); } catch (error) { res.send(new Error(error)); diff --git a/lib/provider/einsAImmobilien.js b/lib/provider/einsAImmobilien.js index 0c9a592..dbc2d86 100755 --- a/lib/provider/einsAImmobilien.js +++ b/lib/provider/einsAImmobilien.js @@ -5,8 +5,16 @@ import { buildHash, isOneOf } from '../utils.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; +import { extractNumber } from '../utils/extract-number.js'; +/** @import { ParsedListing } from '../types/listing.js' */ +/** @import { ProviderConfig } from '../types/providerConfig.js' */ + let appliedBlackList = []; +/** + * @param {any} o + * @returns {ParsedListing} + */ function normalize(o) { const baseUrl = 'https://www.1a-immobilienmarkt.de'; const link = `${baseUrl}/expose/${o.id}.html`; @@ -14,7 +22,17 @@ function normalize(o) { const id = buildHash(o.id, price); const image = baseUrl + o.image; const address = o.address == null ? null : o.address.trim().replaceAll('/', ','); - return Object.assign(o, { id, price, link, image, address }); + return { + id, + link, + title: o.title || '', + price: extractNumber(price), + size: extractNumber(o.size), + rooms: extractNumber(o.rooms), + address, + image, + description: undefined, + }; } /** @@ -34,13 +52,19 @@ function normalizePrice(price) { } return result[0]; } +/** + * @param {ParsedListing} o + * @returns {boolean} + */ function applyBlacklist(o) { const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); return titleNotBlacklisted && descNotBlacklisted; } +/** @type {ProviderConfig} */ const config = { + fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'], url: null, crawlContainer: '.tabelle', sortByDateParam: 'sort_type=newest', @@ -48,7 +72,8 @@ const config = { crawlFields: { id: '.inner_object_data input[name="marker_objekt_id"]@value | int', price: '.inner_object_data .single_data_price | removeNewline | trim', - size: '.tabelle .tabelle_inhalt_infos .single_data_box | removeNewline | trim', + size: '.tabelle .tabelle_inhalt_infos .single_data_box:nth-of-type(1) | removeNewline | trim', + rooms: '.tabelle .tabelle_inhalt_infos .single_data_box:nth-of-type(2) | removeNewline | trim', title: '.inner_object_data .tabelle_inhalt_titel_black | removeNewline | trim', image: '.inner_object_pic img@src', address: '.tabelle .tabelle_inhalt_infos .left_information > div:nth-child(2) | removeNewline | trim', diff --git a/lib/provider/immobilienDe.js b/lib/provider/immobilienDe.js index 8076752..2de2ae1 100644 --- a/lib/provider/immobilienDe.js +++ b/lib/provider/immobilienDe.js @@ -5,9 +5,12 @@ import { buildHash, isOneOf } from '../utils.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; +import { extractNumber } from '../utils/extract-number.js'; import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js'; import * as cheerio from 'cheerio'; import logger from '../services/logger.js'; +/** @import { ParsedListing } from '../types/listing.js' */ +/** @import { ProviderConfig } from '../types/providerConfig.js' */ let appliedBlackList = []; @@ -65,27 +68,44 @@ async function fetchDetails(listing, browser) { return listing; } } - +/** + * @param {any} o + * @returns {ParsedListing} + */ function normalize(o) { const baseUrl = 'https://www.immobilien.de'; - const size = o.size || null; - const price = o.price || null; - const title = o.title || 'No title available'; + const title = o.title || ''; const address = o.address || null; const shortLink = shortenLink(o.link); const link = shortLink ? (shortLink.startsWith('http') ? shortLink : baseUrl + shortLink) : baseUrl; const image = o.image ? (o.image.startsWith('http') ? o.image : baseUrl + o.image) : null; const id = buildHash(parseId(shortLink), o.price); - return Object.assign(o, { id, price, size, title, address, link, image }); + return { + id, + link, + title, + price: extractNumber(o.price), + size: extractNumber(o.size), + rooms: extractNumber(o.rooms), + address, + image, + description: o.description, + }; } +/** + * @param {ParsedListing} o + * @returns {boolean} + */ function applyBlacklist(o) { const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); return titleNotBlacklisted && descNotBlacklisted; } +/** @type {ProviderConfig} */ const config = { + fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'], url: null, crawlContainer: 'a.lr-card', sortByDateParam: 'sort_col=*created_ts&sort_dir=desc', @@ -94,6 +114,7 @@ const config = { id: '@href', //will be transformed later price: '.lr-card__price-amount | trim', size: '.lr-card__fact:has(.lr-card__fact-label:contains("Fläche")) .lr-card__fact-value | trim', + rooms: '.zimmer .label_info', title: '.lr-card__title | trim', description: '.description | trim', link: '@href', diff --git a/lib/provider/immoscout.js b/lib/provider/immoscout.js index 2a52967..9aa0312 100644 --- a/lib/provider/immoscout.js +++ b/lib/provider/immoscout.js @@ -46,6 +46,10 @@ import { convertWebToMobile, } from '../services/immoscout/immoscout-web-translator.js'; import logger from '../services/logger.js'; +import { extractNumber } from '../utils/extract-number.js'; +/** @import { ParsedListing } from '../types/listing.js' */ +/** @import { ProviderConfig } from '../types/providerConfig.js' */ + let appliedBlackList = []; async function getListings(url) { @@ -168,22 +172,44 @@ async function isListingActive(link) { function nullOrEmpty(val) { return val == null || val.length === 0; } + +/** + * @param {any} o + * @returns {ParsedListing} + */ function normalize(o) { - const title = nullOrEmpty(o.title) ? 'NO TITLE FOUND' : o.title.replace('NEU', ''); + const title = (o.title || '').replace('NEU', '').trim(); const address = nullOrEmpty(o.address) ? 'NO ADDRESS FOUND' : (o.address || '').replace(/\(.*\),.*$/, '').trim(); const id = buildHash(o.id, o.price); - return Object.assign(o, { id, title, address }); + return { + id, + link: o.link, + title, + price: extractNumber(o.price), + size: extractNumber(o.size), + rooms: extractNumber(o.rooms), + address, + image: o.image, + description: o.description, + }; } +/** + * @param {ParsedListing} o + * @returns {boolean} + */ function applyBlacklist(o) { return !isOneOf(o.title, appliedBlackList); } +/** @type {ProviderConfig} */ const config = { + fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'], url: null, crawlFields: { id: 'id', title: 'title', price: 'price', size: 'size', + rooms: 'rooms', link: 'link', address: 'address', }, diff --git a/lib/provider/immoswp.js b/lib/provider/immoswp.js index 5126982..4798df6 100755 --- a/lib/provider/immoswp.js +++ b/lib/provider/immoswp.js @@ -5,27 +5,46 @@ import { isOneOf, buildHash } from '../utils.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; +import { extractNumber } from '../utils/extract-number.js'; +/** @import { ParsedListing } from '../types/listing.js' */ +/** @import { ProviderConfig } from '../types/providerConfig.js' */ let appliedBlackList = []; +/** + * @param {any} o + * @returns {ParsedListing} + */ function normalize(o) { - const size = o.size || 'N/A m²'; - const price = (o.price || '--- €').replace('Preis auf Anfrage', '--- €'); - const title = o.title || 'No title available'; const immoId = o.id.substring(o.id.indexOf('-') + 1, o.id.length); const link = `https://immo.swp.de/immobilien/${immoId}`; - const description = o.description; - const id = buildHash(immoId, price); - return Object.assign(o, { id, price, size, title, link, description }); + const id = buildHash(immoId, o.price); + return { + id, + link, + title: o.title || '', + price: extractNumber(o.price), + size: extractNumber(o.size), + rooms: extractNumber(o.rooms), + address: o.address, + image: o.image, + description: undefined, + }; } +/** + * @param {ParsedListing} o + * @returns {boolean} + */ function applyBlacklist(o) { const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); return titleNotBlacklisted && descNotBlacklisted; } +/** @type {ProviderConfig} */ const config = { + fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'], url: null, crawlContainer: '.js-serp-item', sortByDateParam: 's=most_recently_updated_first', @@ -34,9 +53,10 @@ const config = { id: '.js-bookmark-btn@data-id', price: 'div.align-items-start div:first-child | trim', size: 'div.align-items-start div:nth-child(3) | trim', + rooms: 'div.align-items-start div:nth-child(2) | trim', + address: '.js-bookmark-btn@data-address', title: '.js-item-title-link@title | trim', link: '.ci-search-result__link@href', - description: '.js-show-more-item-sm | removeNewline | trim', image: 'img@src', }, normalize: normalize, diff --git a/lib/provider/immowelt.js b/lib/provider/immowelt.js index ce05c8b..1e99787 100755 --- a/lib/provider/immowelt.js +++ b/lib/provider/immowelt.js @@ -5,9 +5,12 @@ import { buildHash, isOneOf } from '../utils.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; +import { extractNumber } from '../utils/extract-number.js'; import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js'; import * as cheerio from 'cheerio'; import logger from '../services/logger.js'; +/** @import { ParsedListing } from '../types/listing.js' */ +/** @import { ProviderConfig } from '../types/providerConfig.js' */ let appliedBlackList = []; @@ -48,18 +51,38 @@ async function fetchDetails(listing, browser) { } } +/** + * @param {any} o + * @returns {ParsedListing} + */ function normalize(o) { const id = buildHash(o.id, o.price); - return Object.assign(o, { id }); + return { + id, + link: o.link, + title: o.title || '', + price: extractNumber(o.price), + size: extractNumber(o.size), + rooms: extractNumber(o.rooms), + address: o.address, + image: o.image, + description: o.description, + }; } +/** + * @param {ParsedListing} o + * @returns {boolean} + */ function applyBlacklist(o) { const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); return titleNotBlacklisted && descNotBlacklisted; } +/** @type {ProviderConfig} */ const config = { + fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'], url: null, crawlContainer: 'div[data-testid="serp-core-scrollablelistview-testid"]:not(div[data-testid="serp-enlargementlist-testid"] div[data-testid="serp-card-testid"]) div[data-testid="serp-core-classified-card-testid"]', @@ -68,7 +91,8 @@ const config = { crawlFields: { id: 'a@href', price: 'div[data-testid="cardmfe-price-testid"] | removeNewline | trim', - size: 'div[data-testid="cardmfe-keyfacts-testid"] | removeNewline | trim', + size: 'div[data-testid="cardmfe-keyfacts-testid"] div:nth-of-type(3) | removeNewline | trim', + rooms: 'div[data-testid="cardmfe-keyfacts-testid"] div:nth-of-type(1) | removeNewline | trim', title: 'div[data-testid="cardmfe-description-box-text-test-id"] > div:nth-of-type(2)', link: 'a@href', description: 'div[data-testid="cardmfe-description-text-test-id"] > div:nth-of-type(2) | removeNewline | trim', diff --git a/lib/provider/kleinanzeigen.js b/lib/provider/kleinanzeigen.js index cc5cdaa..a49542f 100755 --- a/lib/provider/kleinanzeigen.js +++ b/lib/provider/kleinanzeigen.js @@ -5,6 +5,9 @@ import { buildHash, isOneOf } from '../utils.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; +import { extractNumber } from '../utils/extract-number.js'; +/** @import { ParsedListing } from '../types/listing.js' */ +/** @import { ProviderConfig } from '../types/providerConfig.js' */ import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js'; import logger from '../services/logger.js'; import * as cheerio from 'cheerio'; @@ -146,13 +149,33 @@ async function fetchDetails(listing, browser) { return enrichListingFromDetails(listing, browser); } +/** + * @param {any} o + * @returns {ParsedListing} + */ function normalize(o) { - const size = o.size || '--- m²'; + const parts = (o.tags || '').split('·').map((p) => p.trim()); + const size = parts.find((p) => p.includes('m²')); + const rooms = parts.find((p) => p.includes('Zi.')); const id = buildHash(o.id, o.price); - const link = toAbsoluteLink(o.link) || o.link; - return Object.assign(o, { id, size, link }); + + return { + id, + title: o.title, + link: toAbsoluteLink(o.link) || o.link, + price: extractNumber(o.price), + size: extractNumber(size), + rooms: extractNumber(rooms), + address: o.address, + description: o.description, + image: o.image, + }; } +/** + * @param {ParsedListing} o + * @returns {boolean} + */ function applyBlacklist(o) { const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); @@ -161,16 +184,18 @@ function applyBlacklist(o) { return o.title != null && !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted; } +/** @type {ProviderConfig} */ const config = { + fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'], url: null, crawlContainer: '#srchrslt-adtable .ad-listitem ', //sort by date is standard oO sortByDateParam: null, waitForSelector: 'body', crawlFields: { - id: '.aditem@data-adid | int', + id: '.aditem@data-adid', price: '.aditem-main--middle--price-shipping--price | removeNewline | trim', - size: '.aditem-main .text-module-end | removeNewline | trim', + tags: '.aditem-main--middle--tags | removeNewline | trim', title: '.aditem-main .text-module-begin a | removeNewline | trim', link: '.aditem-main .text-module-begin a@href | removeNewline | trim', description: '.aditem-main .aditem-main--middle--description | removeNewline | trim', diff --git a/lib/provider/mcMakler.js b/lib/provider/mcMakler.js index 5a25cd8..9d8ba41 100755 --- a/lib/provider/mcMakler.js +++ b/lib/provider/mcMakler.js @@ -5,23 +5,46 @@ import { isOneOf, buildHash } from '../utils.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; +import { extractNumber } from '../utils/extract-number.js'; +/** @import { ParsedListing } from '../types/listing.js' */ +/** @import { ProviderConfig } from '../types/providerConfig.js' */ + let appliedBlackList = []; +/** + * @param {any} o + * @returns {ParsedListing} + */ function normalize(o) { const originalId = o.id.split('/').pop(); const id = buildHash(originalId, o.price); - const size = o.size ?? 'N/A m²'; - const title = o.title || 'No title available'; + const link = o.link != null ? `https://www.mcmakler.de${o.link}` : o.link; + const [rooms, size] = o.tags.split(' | '); const address = o.address?.replace(' / ', ' ') || null; - const link = o.link != null ? `https://www.mcmakler.de${o.link}` : config.url; - return Object.assign(o, { id, size, title, link, address }); + return { + id, + link, + title: o.title || '', + price: extractNumber(o.price), + size: extractNumber(size), + rooms: extractNumber(rooms), + address, + image: o.image, + description: undefined, + }; } +/** + * @param {ParsedListing} o + * @returns {boolean} + */ function applyBlacklist(o) { const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); return titleNotBlacklisted && descNotBlacklisted; } +/** @type {ProviderConfig} */ const config = { + fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'], url: null, crawlContainer: 'article[data-testid="propertyCard"]', sortByDateParam: 'sortBy=DATE&sortOn=DESC', @@ -30,7 +53,7 @@ const config = { id: 'h2 a@href', title: 'h2 a | removeNewline | trim', price: 'footer > p:first-of-type | trim', - size: 'footer > p:nth-of-type(2) | trim', + tags: 'footer > p:nth-of-type(2) | trim', address: 'div > h2 + p | removeNewline | trim', image: 'img@src', link: 'h2 a@href', diff --git a/lib/provider/neubauKompass.js b/lib/provider/neubauKompass.js index bde911d..ab05bdf 100755 --- a/lib/provider/neubauKompass.js +++ b/lib/provider/neubauKompass.js @@ -5,6 +5,9 @@ import { isOneOf, buildHash } from '../utils.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; +import { extractNumber } from '../utils/extract-number.js'; +/** @import { ParsedListing } from '../types/listing.js' */ +/** @import { ProviderConfig } from '../types/providerConfig.js' */ let appliedBlackList = []; @@ -12,19 +15,39 @@ function nullOrEmpty(val) { return val == null || val.length === 0; } +/** + * @param {any} o + * @returns {ParsedListing} + */ function normalize(o) { const link = nullOrEmpty(o.link) ? 'NO LINK' : `https://www.neubaukompass.de${o.link.substring(o.link.indexOf('/neubau'))}`; const id = buildHash(o.link, o.price); - return Object.assign(o, { id, link }); + return { + id, + link, + title: o.title || '', + price: extractNumber(o.price), + size: extractNumber(o.size), + rooms: extractNumber(o.rooms), + address: o.address, + image: o.image, + description: o.description, + }; } +/** + * @param {ParsedListing} o + * @returns {boolean} + */ function applyBlacklist(o) { return !isOneOf(o.title, appliedBlackList); } +/** @type {ProviderConfig} */ const config = { + fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'], url: null, crawlContainer: '.col-12.mb-4', sortByDateParam: 'Sortierung=Id&Richtung=DESC', @@ -34,7 +57,9 @@ const config = { title: 'a@title | removeNewline | trim', link: 'a@href', address: '.nbk-project-card__description | removeNewline | trim', - price: '.nbk-project-card__spec-item .nbk-project-card__spec-value | removeNewline | trim', + price: '.nbk-project-card__spec-item:nth-child(1) .nbk-project-card__spec-value | removeNewline | trim', + size: '.nbk-project-card__spec-item:nth-child(2) .nbk-project-card__spec-value | removeNewline | trim', + rooms: '.nbk-project-card__spec-item:nth-child(3) .nbk-project-card__spec-value | removeNewline | trim', image: '.nbk-project-card__image@src', }, normalize: normalize, diff --git a/lib/provider/ohneMakler.js b/lib/provider/ohneMakler.js index ca90363..9eb4603 100755 --- a/lib/provider/ohneMakler.js +++ b/lib/provider/ohneMakler.js @@ -5,19 +5,43 @@ import { isOneOf, buildHash } from '../utils.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; +import { extractNumber } from '../utils/extract-number.js'; +/** @import { ParsedListing } from '../types/listing.js' */ +/** @import { ProviderConfig } from '../types/providerConfig.js' */ + let appliedBlackList = []; +/** + * @param {any} o + * @returns {ParsedListing} + */ function normalize(o) { const link = metaInformation.baseUrl + o.link; const id = buildHash(o.title, o.link, o.price); - return Object.assign(o, { link, id }); + return { + id, + link, + title: o.title || '', + price: extractNumber(o.price), + size: extractNumber(o.size), + rooms: extractNumber(o.rooms), + address: o.address, + image: o.image, + description: o.description, + }; } +/** + * @param {ParsedListing} o + * @returns {boolean} + */ function applyBlacklist(o) { const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); return titleNotBlacklisted && descNotBlacklisted; } +/** @type {ProviderConfig} */ const config = { + fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'], url: null, crawlContainer: 'div[data-livecomponent-id*="search/property_list"] .grid > div', sortByDateParam: null, @@ -27,6 +51,7 @@ const config = { title: 'h4 | removeNewline | trim', price: '.text-xl | trim', size: 'div[title="Wohnfläche"] | trim', + rooms: 'div[title="Zimmer"] | trim', address: '.text-slate-800 | removeNewline | trim', image: 'img@src', link: 'a@href', diff --git a/lib/provider/regionalimmobilien24.js b/lib/provider/regionalimmobilien24.js index baaae6f..aef3171 100755 --- a/lib/provider/regionalimmobilien24.js +++ b/lib/provider/regionalimmobilien24.js @@ -5,24 +5,47 @@ import { isOneOf, buildHash } from '../utils.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; +import { extractNumber } from '../utils/extract-number.js'; +/** @import { ParsedListing } from '../types/listing.js' */ +/** @import { ProviderConfig } from '../types/providerConfig.js' */ + let appliedBlackList = []; +/** + * @param {any} o + * @returns {ParsedListing} + */ function normalize(o) { const id = buildHash(o.id, o.price); const address = o.address?.replace(/^adresse /i, '') ?? null; - const title = o.title || 'No title available'; const link = o.link != null ? decodeURIComponent(o.link) : config.url; const urlReg = new RegExp(/url\((.*?)\)/gim); const image = o.image != null ? urlReg.exec(o.image)[1] : null; - return Object.assign(o, { id, address, title, link, image }); + return { + id, + link, + title: o.title || '', + price: extractNumber(o.price), + size: extractNumber(o.size), + rooms: extractNumber(o.rooms), + address, + image, + description: o.description, + }; } +/** + * @param {ParsedListing} o + * @returns {boolean} + */ function applyBlacklist(o) { const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); return titleNotBlacklisted && descNotBlacklisted; } +/** @type {ProviderConfig} */ const config = { + fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'], url: null, crawlContainer: '.listentry-content', sortByDateParam: null, // sort by date is standard @@ -32,6 +55,7 @@ const config = { title: 'h2 | trim', price: '.listentry-details-price .listentry-details-v | trim', size: '.listentry-details-size .listentry-details-v | trim', + rooms: '.listentry-details-rooms .listentry-details-v | trim', address: '.listentry-adress | trim', image: '.listentry-img@style', link: '.shariff@data-url', diff --git a/lib/provider/sparkasse.js b/lib/provider/sparkasse.js index 8f4af8c..75db6e7 100755 --- a/lib/provider/sparkasse.js +++ b/lib/provider/sparkasse.js @@ -8,6 +8,9 @@ import checkIfListingIsActive from '../services/listings/listingActiveTester.js' import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js'; import * as cheerio from 'cheerio'; import logger from '../services/logger.js'; +import { extractNumber } from '../utils/extract-number.js'; +/** @import { ParsedListing } from '../types/listing.js' */ +/** @import { ProviderConfig } from '../types/providerConfig.js' */ let appliedBlackList = []; @@ -55,20 +58,39 @@ async function fetchDetails(listing, browser) { } } +/** + * @param {any} o + * @returns {ParsedListing} + */ function normalize(o) { const originalId = o.id.split('/').pop().replace('.html', ''); const id = buildHash(originalId, o.price); - const size = o.size?.replace(' Wohnfläche', '').replace(' m²', 'm²') ?? null; - const title = o.title || 'No title available'; const link = o.link != null ? `https://immobilien.sparkasse.de${o.link}` : config.url; - return Object.assign(o, { id, size, title, link }); + + return { + id, + link, + title: o.title || '', + price: extractNumber(o.price), + size: extractNumber(o.size), + rooms: extractNumber(o.rooms), + address: o.address, + image: o.image, + description: o.description, + }; } +/** + * @param {ParsedListing} o + * @returns {boolean} + */ function applyBlacklist(o) { const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); return titleNotBlacklisted && descNotBlacklisted; } +/** @type {ProviderConfig} */ const config = { + fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'], url: null, crawlContainer: 'div[data-testid="estate-link"]', sortByDateParam: 'sortBy=date_desc', @@ -77,7 +99,8 @@ const config = { id: 'a@href', title: 'h3 | trim', price: '.estate-list-price | trim', - size: '.estate-mainfact span | trim', + size: '.estate-mainfact:nth-child(1) span | trim', + rooms: '.estate-mainfact:nth-child(2) span | trim', address: 'h6 | trim', image: 'img@src', link: 'a@href', diff --git a/lib/provider/wgGesucht.js b/lib/provider/wgGesucht.js index 87e8a37..f355627 100755 --- a/lib/provider/wgGesucht.js +++ b/lib/provider/wgGesucht.js @@ -5,9 +5,12 @@ import { isOneOf, buildHash } from '../utils.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; +import { extractNumber } from '../utils/extract-number.js'; import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js'; import * as cheerio from 'cheerio'; import logger from '../services/logger.js'; +/** @import { ParsedListing } from '../types/listing.js' */ +/** @import { ProviderConfig } from '../types/providerConfig.js' */ let appliedBlackList = []; @@ -32,20 +35,39 @@ async function fetchDetails(listing, browser) { return listing; } } - +/** + * @param {any} o + * @returns {ParsedListing} + */ function normalize(o) { const id = buildHash(o.id, o.price); const link = `https://www.wg-gesucht.de${o.link}`; const image = o.image != null ? o.image.replace('small', 'large') : null; - return Object.assign(o, { id, link, image }); + const [rooms, city, road] = o.details?.split(' | ') || []; + return { + id, + link, + title: o.title || '', + price: extractNumber(o.price), + size: extractNumber(o.size), + rooms: extractNumber(rooms), + address: `${city}, ${road}`, + image, + description: o.description, + }; } +/** + * @param {ParsedListing} o + * @returns {boolean} + */ function applyBlacklist(o) { const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList); const descNotBlacklisted = !isOneOf(o.description, appliedBlackList); return o.id != null && titleNotBlacklisted && descNotBlacklisted; } +/** @type {ProviderConfig} */ const config = { url: null, crawlContainer: '#main_column .wgg_card', @@ -56,10 +78,13 @@ const config = { details: '.row .noprint .col-xs-11 |removeNewline |trim', price: '.middle .col-xs-3 |removeNewline |trim', size: '.middle .text-right |removeNewline |trim', + rooms: '.middle .text-right |removeNewline |trim', title: '.truncate_title a |removeNewline |trim', link: '.truncate_title a@href', image: '.img-responsive@src', + description: '.row .noprint .col-xs-11 |removeNewline |trim', }, + fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'], normalize: normalize, filter: applyBlacklist, fetchDetails, diff --git a/lib/provider/wohnungsboerse.js b/lib/provider/wohnungsboerse.js index f6c37c4..4581029 100644 --- a/lib/provider/wohnungsboerse.js +++ b/lib/provider/wohnungsboerse.js @@ -5,26 +5,45 @@ import * as utils from '../utils.js'; import checkIfListingIsActive from '../services/listings/listingActiveTester.js'; +import { extractNumber } from '../utils/extract-number.js'; +/** @import { ParsedListing } from '../types/listing.js' */ +/** @import { ProviderConfig } from '../types/providerConfig.js' */ let appliedBlackList = []; +/** + * @param {any} o + * @returns {ParsedListing} + */ function normalize(o) { - const id = o.link.split('/').pop(); - const price = o.price; - const size = o.size; - const rooms = o.rooms; const [city = '', part = ''] = (o.description || '').split('-').map((v) => v.trim()); const address = `${part}, ${city}`; - return Object.assign(o, { id, price, size, rooms, address }); + return { + id: o.link.split('/').pop(), + link: o.link, + title: o.title || '', + price: extractNumber(o.price), + size: extractNumber(o.size), + rooms: extractNumber(o.rooms), + address, + image: o.image, + description: o.description, + }; } +/** + * @param {ParsedListing} o + * @returns {boolean} + */ function applyBlacklist(o) { const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList); const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList); return o.id != null && o.title != null && titleNotBlacklisted && descNotBlacklisted && o.link.startsWith(o.link); } +/** @type {ProviderConfig} */ const config = { + fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'], url: null, sortByDateParam: null, waitForSelector: 'body', @@ -37,7 +56,7 @@ const config = { size: 'dl:nth-of-type(3) dd | removeNewline | trim', description: 'div.before\\:icon-location_marker | trim', link: '@href', - imageUrl: 'img@src', + image: 'img@src', }, normalize: normalize, filter: applyBlacklist, diff --git a/lib/services/jobs/jobExecutionService.js b/lib/services/jobs/jobExecutionService.js index b8324b8..99408f7 100644 --- a/lib/services/jobs/jobExecutionService.js +++ b/lib/services/jobs/jobExecutionService.js @@ -178,15 +178,7 @@ export function initJobExecutionService({ providers, settings, intervalMs }) { browser = await puppeteerExtractor.launchBrowser(matchedProvider.config.url, {}); } - await new FredyPipelineExecutioner( - matchedProvider.config, - job.notificationAdapter, - job.spatialFilter, - prov.id, - job.id, - similarityCache, - browser, - ).execute(); + await new FredyPipelineExecutioner(matchedProvider.config, job, prov.id, similarityCache, browser).execute(); } catch (err) { logger.error(err); } diff --git a/lib/services/storage/jobStorage.js b/lib/services/storage/jobStorage.js index 4023459..2a34cda 100644 --- a/lib/services/storage/jobStorage.js +++ b/lib/services/storage/jobStorage.js @@ -31,6 +31,7 @@ export const upsertJob = ({ userId, shareWithUsers = [], spatialFilter = null, + specFilter = null, }) => { const id = jobId || nanoid(); const existing = SqliteConnection.query(`SELECT id, user_id FROM jobs WHERE id = @id LIMIT 1`, { id })[0]; @@ -44,7 +45,8 @@ export const upsertJob = ({ provider = @provider, notification_adapter = @notification_adapter, shared_with_user = @shareWithUsers, - spatial_filter = @spatialFilter + spatial_filter = @spatialFilter, + spec_filter = @specFilter WHERE id = @id`, { id, @@ -55,12 +57,13 @@ export const upsertJob = ({ provider: toJson(provider ?? []), notification_adapter: toJson(notificationAdapter ?? []), spatialFilter: spatialFilter ? toJson(spatialFilter) : null, + specFilter: specFilter ? toJson(specFilter) : null, }, ); } else { SqliteConnection.execute( - `INSERT INTO jobs (id, user_id, enabled, name, blacklist, provider, notification_adapter, shared_with_user, spatial_filter) - VALUES (@id, @user_id, @enabled, @name, @blacklist, @provider, @notification_adapter, @shareWithUsers, @spatialFilter)`, + `INSERT INTO jobs (id, user_id, enabled, name, blacklist, provider, notification_adapter, shared_with_user, spatial_filter, spec_filter) + VALUES (@id, @user_id, @enabled, @name, @blacklist, @provider, @notification_adapter, @shareWithUsers, @spatialFilter, @specFilter)`, { id, user_id: ownerId, @@ -71,6 +74,7 @@ export const upsertJob = ({ shareWithUsers: toJson(shareWithUsers ?? []), notification_adapter: toJson(notificationAdapter ?? []), spatialFilter: spatialFilter ? toJson(spatialFilter) : null, + specFilter: specFilter ? toJson(specFilter) : null, }, ); } @@ -92,6 +96,7 @@ export const getJob = (jobId) => { j.shared_with_user, j.notification_adapter AS notificationAdapter, j.spatial_filter AS spatialFilter, + j.spec_filter AS specFilter, (SELECT COUNT(1) FROM listings l WHERE l.job_id = j.id AND l.is_active = 1 AND l.manually_deleted = 0) AS numberOfFoundListings FROM jobs j WHERE j.id = @id @@ -107,6 +112,7 @@ export const getJob = (jobId) => { shared_with_user: fromJson(row.shared_with_user, []), notificationAdapter: fromJson(row.notificationAdapter, []), spatialFilter: fromJson(row.spatialFilter, null), + specFilter: fromJson(row.specFilter, null), }; }; @@ -157,6 +163,7 @@ export const getJobs = () => { j.shared_with_user, j.notification_adapter AS notificationAdapter, j.spatial_filter AS spatialFilter, + j.spec_filter AS specFilter, (SELECT COUNT(1) FROM listings l WHERE l.job_id = j.id AND l.is_active = 1 AND l.manually_deleted = 0) AS numberOfFoundListings FROM jobs j WHERE j.enabled = 1 @@ -170,6 +177,7 @@ export const getJobs = () => { shared_with_user: fromJson(row.shared_with_user, []), notificationAdapter: fromJson(row.notificationAdapter, []), spatialFilter: fromJson(row.spatialFilter, null), + specFilter: fromJson(row.specFilter, null), })); }; @@ -260,6 +268,7 @@ export const queryJobs = ({ j.shared_with_user, j.notification_adapter AS notificationAdapter, j.spatial_filter AS spatialFilter, + j.spec_filter AS specFilter, (SELECT COUNT(1) FROM listings l WHERE l.job_id = j.id AND l.is_active = 1 AND l.manually_deleted = 0) AS numberOfFoundListings FROM jobs j ${whereSql} @@ -276,6 +285,7 @@ export const queryJobs = ({ shared_with_user: fromJson(row.shared_with_user, []), notificationAdapter: fromJson(row.notificationAdapter, []), spatialFilter: fromJson(row.spatialFilter, null), + specFilter: fromJson(row.specFilter, null), })); return { totalNumber, page: safePage, result }; diff --git a/lib/services/storage/listingsStorage.js b/lib/services/storage/listingsStorage.js index fe788ae..831a3b5 100755 --- a/lib/services/storage/listingsStorage.js +++ b/lib/services/storage/listingsStorage.js @@ -174,9 +174,9 @@ export const storeListings = (jobId, providerId, listings) => { SqliteConnection.withTransaction((db) => { const stmt = db.prepare( - `INSERT INTO listings (id, hash, provider, job_id, price, size, title, image_url, description, address, + `INSERT INTO listings (id, hash, provider, job_id, price, size, rooms, title, image_url, description, address, link, created_at, is_active, latitude, longitude) - VALUES (@id, @hash, @provider, @job_id, @price, @size, @title, @image_url, @description, @address, @link, + VALUES (@id, @hash, @provider, @job_id, @price, @size, @rooms, @title, @image_url, @description, @address, @link, @created_at, 1, @latitude, @longitude) ON CONFLICT(job_id, hash) DO NOTHING`, ); @@ -187,8 +187,9 @@ export const storeListings = (jobId, providerId, listings) => { hash: item.id, provider: providerId, job_id: jobId, - price: extractNumber(item.price), - size: extractNumber(item.size), + price: item.price, + size: item.size, + rooms: item.rooms, title: item.title, image_url: item.image, description: item.description, @@ -202,19 +203,6 @@ export const storeListings = (jobId, providerId, listings) => { } }); - /** - * Extract the first number from a string like "1.234 €" or "70 m²". - * Removes dots/commas before parsing. Returns null on invalid input. - * @param {string|undefined|null} str - * @returns {number|null} - */ - function extractNumber(str) { - if (!str) return null; - const cleaned = str.replace(/\./g, '').replace(',', '.'); - const num = parseFloat(cleaned); - return isNaN(num) ? null : num; - } - /** * Remove any parentheses segments (including surrounding whitespace) from a string. * Returns null for empty input. diff --git a/lib/services/storage/migrations/sql/15.add-listing-specs.js b/lib/services/storage/migrations/sql/15.add-listing-specs.js new file mode 100644 index 0000000..c7b7ec7 --- /dev/null +++ b/lib/services/storage/migrations/sql/15.add-listing-specs.js @@ -0,0 +1,10 @@ +/* + * Copyright (c) 2026 by Christian Kellner. + * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause + */ + +export function up(db) { + db.exec(` + ALTER TABLE jobs ADD COLUMN spec_filter JSONB DEFAULT NULL; + `); +} diff --git a/lib/services/storage/migrations/sql/16.add-rooms-to-listings.js b/lib/services/storage/migrations/sql/16.add-rooms-to-listings.js new file mode 100644 index 0000000..870a8b7 --- /dev/null +++ b/lib/services/storage/migrations/sql/16.add-rooms-to-listings.js @@ -0,0 +1,10 @@ +/* + * Copyright (c) 2026 by Christian Kellner. + * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause + */ + +export function up(db) { + db.exec(` + ALTER TABLE listings ADD COLUMN rooms INTEGER; + `); +} diff --git a/lib/types/browser.js b/lib/types/browser.js new file mode 100644 index 0000000..ea19e22 --- /dev/null +++ b/lib/types/browser.js @@ -0,0 +1,10 @@ +/* + * Copyright (c) 2026 by Christian Kellner. + * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause + */ + +/** + * @typedef {import('puppeteer').Browser} Browser + */ + +export {}; diff --git a/lib/types/filter.js b/lib/types/filter.js new file mode 100644 index 0000000..ee7c335 --- /dev/null +++ b/lib/types/filter.js @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2026 by Christian Kellner. + * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause + */ + +/** + * @typedef {Object} SpecFilter + * @property {number} [minRooms] Minimum number of rooms. + * @property {number} [minSize] Minimum size in m². + * @property {number} [maxPrice] Maximum price. + */ + +/** + * @typedef {Object} SpatialFilter GeoJSON FeatureCollection. + * @property {Array} [features] GeoJSON features for spatial filtering (typically Polygons). + * @property {string} [type] Type 'FeatureCollection'. + */ + +export {}; diff --git a/lib/types/job.js b/lib/types/job.js new file mode 100644 index 0000000..a96f689 --- /dev/null +++ b/lib/types/job.js @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2026 by Christian Kellner. + * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause + */ + +/** @import { SpecFilter, SpatialFilter } from './filter.js' */ + +/** + * @typedef {Object} Job + * @property {string} id Job ID. + * @property {string} [userId] Owner user id. + * @property {string} [name] Job display name. + * @property {boolean} [enabled] Whether the job is enabled. + * @property {Array} [blacklist] Blacklist entries. + * @property {Array} [provider] Provider configuration list. + * @property {Object} [notificationAdapter] Notification configuration. + * @property {Array} [shared_with_user] Users this job is shared with. + * @property {SpatialFilter | null} [spatialFilter] Optional spatial filter configuration as GeoJSON FeatureCollection. + * @property {SpecFilter | null} [specFilter] Optional listing specifications. + * @property {number} [numberOfFoundListings] Count of active listings for this job. + */ + +export {}; diff --git a/lib/types/listing.js b/lib/types/listing.js new file mode 100644 index 0000000..a0c9b13 --- /dev/null +++ b/lib/types/listing.js @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2026 by Christian Kellner. + * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause + */ + +/** + * @typedef {Object} ParsedListing + * @property {string} id Stable unique identifier (hash) of the listing. + * @property {string} link Link to the listing detail page. + * @property {string} image Link to the listing image. + * @property {string} title Title or headline of the listing. + * @property {string} [description] Description of the listing. + * @property {string} [address] Optional address/location text. + * @property {number} [price] Optional price of the listing. + * @property {number} [size] Optional size of the listing. + * @property {number} [rooms] Optional number of rooms. + * @property {number} [latitude] Optional latitude. + * @property {number} [longitude] Optional longitude. + * @property {number} [distance_to_destination] Optional distance to destination. + */ + +export {}; diff --git a/lib/types/providerConfig.js b/lib/types/providerConfig.js new file mode 100644 index 0000000..5f891f6 --- /dev/null +++ b/lib/types/providerConfig.js @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2026 by Christian Kellner. + * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause + */ + +/** @import { ParsedListing } from './listing.js' */ + +/** + * @typedef {Object} ProviderConfig + * @property {string} [url] Base URL to crawl. + * @property {string} [sortByDateParam] Query parameter used to enforce sorting by date. + * @property {string} [waitForSelector] CSS selector to wait for before parsing content. + * @property {Object.} crawlFields Mapping of field names to selectors/paths. + * @property {string[]} fieldNames List of field names that this provider supports. + * @property {string} [crawlContainer] CSS selector for the container holding listing items. + * @property {(raw: any) => ParsedListing} normalize Function to convert raw scraped data into a ParsedListing shape. + * @property {(listing: ParsedListing) => boolean} filter Function to filter out unwanted listings. + * @property {(url: string, waitForSelector?: string) => Promise} [getListings] Optional override to fetch listings. + * @property {(listing:ParsedListing, browser:any)=>Promise} [providerConfig.fetchDetails] Optional per-listing detail enrichment. Called in parallel for each new listing after deduplication. Receives the shared browser instance. Must always resolve (never reject). + * @property {Object} [puppeteerOptions] Puppeteer specific options. + * @property {boolean} [enabled] Whether the provider is enabled. + * @property {(url: string) => Promise | number} [activeTester] Function to check if a listing is still active. + */ + +export {}; diff --git a/lib/types/similarityCache.js b/lib/types/similarityCache.js new file mode 100644 index 0000000..fb426e4 --- /dev/null +++ b/lib/types/similarityCache.js @@ -0,0 +1,11 @@ +/* + * Copyright (c) 2026 by Christian Kellner. + * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause + */ + +/** + * @typedef {Object} SimilarityCache + * @property {(params: { title?: string, address?: string, price?: number|string }) => boolean} checkAndAddEntry Checks if a listing is similar and adds it if not. + */ + +export {}; diff --git a/lib/utils/extract-number.js b/lib/utils/extract-number.js new file mode 100644 index 0000000..4a9e9b9 --- /dev/null +++ b/lib/utils/extract-number.js @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2026 by Christian Kellner. + * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause + */ + +/** + * Extract the first number from a string like "1.234 €" or "70 m²". + * Removes dots/commas before parsing. Returns null on invalid input. + * @param {string|undefined|null} str + * @returns {number|null} + */ +export const extractNumber = (str) => { + if (str == null) return null; + if (typeof str === 'number') return str; + const cleaned = str.replace(/\./g, '').replace(',', '.'); + const num = parseFloat(cleaned); + return isNaN(num) ? null : num; +}; diff --git a/lib/utils/formatListing.js b/lib/utils/formatListing.js new file mode 100644 index 0000000..f663f01 --- /dev/null +++ b/lib/utils/formatListing.js @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2026 by Christian Kellner. + * Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause + */ + +/** @import { ParsedListing } from '../types/listing.js' */ + +/** + * @typedef {Omit & { + * price: string | null, + * size: string | null, + * rooms: string | null, + * }} FormattedListing + */ + +/** + * Formats a listing's numerical fields (price, size, rooms) into strings with their respective units. + * + * @param {import('../types/listing.js').ParsedListing} listing The original listing object. + * @returns {FormattedListing} A copy of the listing with formatted strings for price, size, and rooms. + */ +export const formatListing = (listing) => { + return { + ...listing, + price: listing.price != null ? `${listing.price} €` : null, + size: listing.size != null ? `${listing.size} m²` : null, + rooms: listing.rooms != null ? `${listing.rooms} Zimmer` : null, + }; +}; diff --git a/test/pipeline_filtering.test.js b/test/pipeline_filtering.test.js index 83aff97..3b624a3 100644 --- a/test/pipeline_filtering.test.js +++ b/test/pipeline_filtering.test.js @@ -17,13 +17,22 @@ describe('Issue reproduction: listings filtered by similarity or area should be const providerConfig = { url: 'http://example.com', - getListings: () => Promise.resolve([{ id: '1', title: 'test', address: 'addr', price: '100' }]), + getListings: () => + Promise.resolve([{ id: '1', title: 'test', address: 'addr', price: '100', link: 'http://example.com/1' }]), normalize: (l) => l, filter: () => true, crawlFields: { id: 'id', title: 'title', address: 'address', price: 'price' }, + fieldNames: ['id', 'title', 'address', 'price'], }; - const fredy = new Fredy(providerConfig, null, null, 'test-provider', 'test-job', mockSimilarityCache); + const mockedJob = { + id: 'test-job', + notificationAdapter: null, + specFilter: null, + spatialFilter: null, + }; + + const fredy = new Fredy(providerConfig, mockedJob, 'test-provider', mockSimilarityCache, undefined); // Clear deletedIds before test mockStore.deletedIds.length = 0; @@ -64,18 +73,35 @@ describe('Issue reproduction: listings filtered by similarity or area should be ], }; + const mockedJob = { + id: 'test-job', + notificationAdapter: null, + specFilter: null, + spatialFilter: spatialFilter, + }; + const providerConfig = { url: 'http://example.com', getListings: () => - Promise.resolve([{ id: '2', title: 'test', address: 'addr', price: '100', latitude: 2, longitude: 2 }]), // outside polygon + Promise.resolve([ + { + id: '2', + title: 'test', + address: 'addr', + price: '100', + latitude: 2, + longitude: 2, + link: 'http://example.com/2', + }, + ]), // outside polygon normalize: (l) => l, filter: () => true, crawlFields: { id: 'id', title: 'title', address: 'address', price: 'price' }, + fieldNames: ['id', 'title', 'address', 'price'], }; - const fredy = new Fredy(providerConfig, null, spatialFilter, 'test-provider', 'test-job', mockSimilarityCache); + const fredy = new Fredy(providerConfig, mockedJob, 'test-provider', mockSimilarityCache, undefined); - // Clear deletedIds before test mockStore.deletedIds.length = 0; try { diff --git a/test/provider/einsAImmobilien.test.js b/test/provider/einsAImmobilien.test.js index 7338803..aeaccfc 100644 --- a/test/provider/einsAImmobilien.test.js +++ b/test/provider/einsAImmobilien.test.js @@ -10,18 +10,17 @@ import { expect } from 'vitest'; import * as provider from '../../lib/provider/einsAImmobilien.js'; describe('#einsAImmobilien testsuite()', () => { - provider.init(providerConfig.einsAImmobilien, [], []); + provider.init(providerConfig.einsAImmobilien, []); it('should test einsAImmobilien provider', async () => { const Fredy = await mockFredy(); + const mockedJob = { + id: 'einsAImmobilien', + notificationAdapter: null, + spatialFilter: null, + specFilter: null, + }; return await new Promise((resolve, reject) => { - const fredy = new Fredy( - provider.config, - null, - null, - provider.metaInformation.id, - 'einsAImmobilien', - similarityCache, - ); + const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined); fredy.execute().then((listings) => { if (listings == null || listings.length === 0) { reject('Listings is empty!'); @@ -35,12 +34,14 @@ describe('#einsAImmobilien testsuite()', () => { /** check the actual structure **/ expect(notify.id).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string'); + expect(notify.price).toContain('€'); expect(notify.size).toBeTypeOf('string'); + expect(notify.size).toContain('m²'); expect(notify.title).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string'); /** check the values if possible **/ - expect(notify.size).not.toBe(''); + expect(notify.size).toBeTypeOf('string'); expect(notify.title).not.toBe(''); expect(notify.link).toContain('https://www.1a-immobilienmarkt.de'); }); diff --git a/test/provider/immobilienDe.test.js b/test/provider/immobilienDe.test.js index f8267f2..da93607 100644 --- a/test/provider/immobilienDe.test.js +++ b/test/provider/immobilienDe.test.js @@ -13,8 +13,15 @@ import * as mockStore from '../mocks/mockStore.js'; describe('#immobilien.de testsuite()', () => { provider.init(providerConfig.immobilienDe, [], []); it('should test immobilien.de provider', async () => { + const mockedJob = { + id: 'test1', + notificationAdapter: null, + spatialFilter: null, + specFilter: null, + }; + const Fredy = await mockFredy(); - const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'test1', similarityCache); + const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined); const listing = await fredy.execute(); if (listing == null || listing.length === 0) { @@ -55,9 +62,15 @@ describe('#immobilien.de testsuite()', () => { it('should enrich listings with details', async () => { const Fredy = await mockFredy(); provider.init(providerConfig.immobilienDe, [], []); - const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'test1', { - checkAndAddEntry: () => false, - }); + const mockedJob = { id: 'test1', notificationAdapter: null, specFilter: null, spatialFilter: null }; + + const fredy = new Fredy( + provider.config, + mockedJob, + provider.metaInformation.id, + { checkAndAddEntry: () => false }, + undefined, + ); const listings = await fredy.execute(); if (listings == null) return; expect(listings).toBeInstanceOf(Array); diff --git a/test/provider/immoscout.test.js b/test/provider/immoscout.test.js index 0ee4c86..6cd094e 100644 --- a/test/provider/immoscout.test.js +++ b/test/provider/immoscout.test.js @@ -14,8 +14,15 @@ describe('#immoscout provider testsuite()', () => { provider.init(providerConfig.immoscout, [], []); it('should test immoscout provider', async () => { const Fredy = await mockFredy(); + const mockedJob = { + id: '', + notificationAdapter: null, + spatialFilter: null, + specFilter: null, + }; + return await new Promise((resolve, reject) => { - const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, '', similarityCache); + const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined); fredy.execute().then((listings) => { if (listings == null || listings.length === 0) { reject('Listings is empty!'); @@ -25,20 +32,24 @@ describe('#immoscout provider testsuite()', () => { expect(listings).toBeInstanceOf(Array); const notificationObj = get(); expect(notificationObj).toBeTypeOf('object'); - expect(notificationObj.serviceName).toBe('immoscout'); - notificationObj.payload.forEach((notify) => { - /** check the actual structure **/ - expect(notify.id).toBeTypeOf('string'); - expect(notify.price).toBeTypeOf('string'); - expect(notify.size).toBeTypeOf('string'); - expect(notify.title).toBeTypeOf('string'); - expect(notify.link).toBeTypeOf('string'); - expect(notify.address).toBeTypeOf('string'); - /** check the values if possible **/ - expect(notify.size).not.toBe(''); - expect(notify.title).not.toBe(''); - expect(notify.link).toContain('https://www.immobilienscout24.de/'); + + // check if there is at least one valid notification + const hasValidNotification = notificationObj.payload.some((notify) => { + return ( + typeof notify.id === 'string' && + typeof notify.price === 'string' && + notify.price.includes('€') && + typeof notify.size === 'string' && + notify.size.includes('m²') && + typeof notify.title === 'string' && + notify.title !== '' && + typeof notify.link === 'string' && + notify.link.includes('https://www.immobilienscout24.de/') && + typeof notify.address === 'string' + ); }); + + expect(hasValidNotification).toBe(true); resolve(); }); }); @@ -57,9 +68,14 @@ describe('#immoscout provider testsuite()', () => { it('should enrich listings with details', async () => { const Fredy = await mockFredy(); provider.init(providerConfig.immoscout, [], []); - const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, '', { - checkAndAddEntry: () => false, - }); + const mockedJob = { id: '', notificationAdapter: null, specFilter: null, spatialFilter: null }; + const fredy = new Fredy( + provider.config, + mockedJob, + provider.metaInformation.id, + { checkAndAddEntry: () => false }, + undefined, + ); const listings = await fredy.execute(); expect(listings).toBeInstanceOf(Array); listings.forEach((listing) => { diff --git a/test/provider/immoswp.test.js b/test/provider/immoswp.test.js index dd19fdb..00af545 100644 --- a/test/provider/immoswp.test.js +++ b/test/provider/immoswp.test.js @@ -13,8 +13,16 @@ describe('#immoswp testsuite()', () => { provider.init(providerConfig.immoswp, [], []); it('should test immoswp provider', async () => { const Fredy = await mockFredy(); + const mockedJob = { + id: 'immoswp', + notificationAdapter: null, + spatialFilter: null, + specFilter: null, + }; + return await new Promise((resolve, reject) => { - const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'immoswp', similarityCache); + const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined); + fredy.execute().then((listing) => { if (listing == null || listing.length === 0) { reject('Listings is empty!'); @@ -29,11 +37,13 @@ describe('#immoswp testsuite()', () => { /** check the actual structure **/ expect(notify.id).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string'); + expect(notify.price).toContain('€'); expect(notify.size).toBeTypeOf('string'); + expect(notify.size).toContain('m²'); expect(notify.title).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string'); /** check the values if possible **/ - expect(notify.price).toContain('€'); + expect(notify.size).toBeTypeOf('string'); expect(notify.title).not.toBe(''); expect(notify.link).toContain('https://immo.swp.de'); }); diff --git a/test/provider/immowelt.test.js b/test/provider/immowelt.test.js index 099397d..94e9b11 100644 --- a/test/provider/immowelt.test.js +++ b/test/provider/immowelt.test.js @@ -13,9 +13,16 @@ import * as mockStore from '../mocks/mockStore.js'; describe('#immowelt testsuite()', () => { it('should test immowelt provider', async () => { const Fredy = await mockFredy(); + const mockedJob = { + id: 'immowelt', + notificationAdapter: null, + spatialFilter: null, + specFilter: null, + }; provider.init(providerConfig.immowelt, [], []); - const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'immowelt', similarityCache); + const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined); + const listing = await fredy.execute(); if (listing == null || listing.length === 0) { @@ -29,12 +36,16 @@ describe('#immowelt testsuite()', () => { notificationObj.payload.forEach((notify) => { /** check the actual structure **/ expect(notify.id).toBeTypeOf('string'); - expect(notify.price).toBeTypeOf('string'); + if (notify.price != null) { + expect(notify.price).toBeTypeOf('string'); + expect(notify.price).toContain('€'); + } expect(notify.title).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string'); /** check the values if possible **/ if (notify.size != null && notify.size.trim().toLowerCase() !== 'k.a.') { + expect(notify.size).toBeTypeOf('string'); expect(notify.size).toContain('m²'); } expect(notify.title).not.toBe(''); @@ -56,9 +67,15 @@ describe('#immowelt testsuite()', () => { it('should enrich listings with details', async () => { const Fredy = await mockFredy(); provider.init(providerConfig.immowelt, [], []); - const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'immowelt', { - checkAndAddEntry: () => false, - }); + const mockedJob = { id: 'immowelt', notificationAdapter: null, specFilter: null, spatialFilter: null }; + + const fredy = new Fredy( + provider.config, + mockedJob, + provider.metaInformation.id, + { checkAndAddEntry: () => false }, + undefined, + ); const listings = await fredy.execute(); expect(listings).toBeInstanceOf(Array); listings.forEach((listing) => { diff --git a/test/provider/kleinanzeigen.test.js b/test/provider/kleinanzeigen.test.js index 020f1d0..b5569fe 100644 --- a/test/provider/kleinanzeigen.test.js +++ b/test/provider/kleinanzeigen.test.js @@ -13,16 +13,16 @@ import * as mockStore from '../mocks/mockStore.js'; describe('#kleinanzeigen testsuite()', () => { it('should test kleinanzeigen provider', async () => { const Fredy = await mockFredy(); + const mockedJob = { + id: 'kleinanzeigen', + notificationAdapter: null, + spatialFilter: null, + specFilter: null, + }; provider.init(providerConfig.kleinanzeigen, [], []); return await new Promise((resolve, reject) => { - const fredy = new Fredy( - provider.config, - null, - null, - provider.metaInformation.id, - 'kleinanzeigen', - similarityCache, - ); + const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined); + fredy.execute().then((listing) => { if (listing == null || listing.length === 0) { reject('Listings is empty!'); @@ -62,9 +62,15 @@ describe('#kleinanzeigen testsuite()', () => { it('should enrich listings with details', async () => { const Fredy = await mockFredy(); provider.init(providerConfig.kleinanzeigen, [], []); - const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'kleinanzeigen', { - checkAndAddEntry: () => false, - }); + const mockedJob = { id: 'kleinanzeigen', notificationAdapter: null, specFilter: null, spatialFilter: null }; + + const fredy = new Fredy( + provider.config, + mockedJob, + provider.metaInformation.id, + { checkAndAddEntry: () => false }, + undefined, + ); const listings = await fredy.execute(); expect(listings).toBeInstanceOf(Array); listings.forEach((listing) => { diff --git a/test/provider/mcMakler.test.js b/test/provider/mcMakler.test.js index 3cbaa45..9e6d8e0 100644 --- a/test/provider/mcMakler.test.js +++ b/test/provider/mcMakler.test.js @@ -12,9 +12,16 @@ import * as provider from '../../lib/provider/mcMakler.js'; describe('#mcMakler testsuite()', () => { it('should test mcMakler provider', async () => { const Fredy = await mockFredy(); + const mockedJob = { + id: 'mcMakler', + notificationAdapter: null, + spatialFilter: null, + specFilter: null, + }; provider.init(providerConfig.mcMakler, []); - const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'mcMakler', similarityCache); + const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined); + const listing = await fredy.execute(); if (listing == null || listing.length === 0) { @@ -29,12 +36,14 @@ describe('#mcMakler testsuite()', () => { /** check the actual structure **/ expect(notify.id).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string'); + expect(notify.price).toContain('€'); expect(notify.size).toBeTypeOf('string'); + expect(notify.size).toContain('m²'); expect(notify.title).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string'); /** check the values if possible **/ - expect(notify.size).toContain('m²'); + expect(notify.size).toBeTypeOf('string'); expect(notify.title).not.toBe(''); expect(notify.address).not.toBe(''); }); diff --git a/test/provider/neubauKompass.test.js b/test/provider/neubauKompass.test.js index e83a499..8509907 100644 --- a/test/provider/neubauKompass.test.js +++ b/test/provider/neubauKompass.test.js @@ -13,15 +13,16 @@ describe('#neubauKompass testsuite()', () => { provider.init(providerConfig.neubauKompass, [], []); it('should test neubauKompass provider', async () => { const Fredy = await mockFredy(); + const mockedJob = { + id: 'neubauKompass', + notificationAdapter: null, + spatialFilter: null, + specFilter: null, + }; + return await new Promise((resolve, reject) => { - const fredy = new Fredy( - provider.config, - null, - null, - provider.metaInformation.id, - 'neubauKompass', - similarityCache, - ); + const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined); + fredy.execute().then((listing) => { if (listing == null || listing.length === 0) { reject('Listings is empty!'); diff --git a/test/provider/ohneMakler.test.js b/test/provider/ohneMakler.test.js index 10c3327..8144822 100644 --- a/test/provider/ohneMakler.test.js +++ b/test/provider/ohneMakler.test.js @@ -12,9 +12,16 @@ import * as provider from '../../lib/provider/ohneMakler.js'; describe('#ohneMakler testsuite()', () => { it('should test ohneMakler provider', async () => { const Fredy = await mockFredy(); + const mockedJob = { + id: 'ohneMakler', + notificationAdapter: null, + spatialFilter: null, + specFilter: null, + }; provider.init(providerConfig.ohneMakler, []); - const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'ohneMakler', similarityCache); + const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined); + const listing = await fredy.execute(); if (listing == null || listing.length === 0) { @@ -29,12 +36,14 @@ describe('#ohneMakler testsuite()', () => { /** check the actual structure **/ expect(notify.id).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string'); + expect(notify.price).toContain('€'); expect(notify.size).toBeTypeOf('string'); + expect(notify.size).toContain('m²'); expect(notify.title).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string'); /** check the values if possible **/ - expect(notify.size).toContain('m²'); + expect(notify.size).toBeTypeOf('string'); expect(notify.title).not.toBe(''); expect(notify.address).not.toBe(''); }); diff --git a/test/provider/regionalimmobilien24.test.js b/test/provider/regionalimmobilien24.test.js index 58b0046..c04c785 100644 --- a/test/provider/regionalimmobilien24.test.js +++ b/test/provider/regionalimmobilien24.test.js @@ -12,16 +12,16 @@ import * as provider from '../../lib/provider/regionalimmobilien24.js'; describe('#regionalimmobilien24 testsuite()', () => { it('should test regionalimmobilien24 provider', async () => { const Fredy = await mockFredy(); + const mockedJob = { + id: 'regionalimmobilien24', + notificationAdapter: null, + spatialFilter: null, + specFilter: null, + }; provider.init(providerConfig.regionalimmobilien24, []); - const fredy = new Fredy( - provider.config, - null, - null, - provider.metaInformation.id, - 'regionalimmobilien24', - similarityCache, - ); + const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined); + const listing = await fredy.execute(); if (listing == null || listing.length === 0) { @@ -36,12 +36,14 @@ describe('#regionalimmobilien24 testsuite()', () => { /** check the actual structure **/ expect(notify.id).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string'); + expect(notify.price).toContain('€'); expect(notify.size).toBeTypeOf('string'); + expect(notify.size).toContain('m²'); expect(notify.title).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string'); /** check the values if possible **/ - expect(notify.size).toContain('m²'); + expect(notify.size).toBeTypeOf('string'); expect(notify.title).not.toBe(''); expect(notify.address).not.toBe(''); }); diff --git a/test/provider/sparkasse.test.js b/test/provider/sparkasse.test.js index 14095f4..357f071 100644 --- a/test/provider/sparkasse.test.js +++ b/test/provider/sparkasse.test.js @@ -13,9 +13,16 @@ import * as mockStore from '../mocks/mockStore.js'; describe('#sparkasse testsuite()', () => { it('should test sparkasse provider', async () => { const Fredy = await mockFredy(); + const mockedJob = { + id: 'sparkasse', + notificationAdapter: null, + spatialFilter: null, + specFilter: null, + }; provider.init(providerConfig.sparkasse, []); - const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'sparkasse', similarityCache); + const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined); + const listing = await fredy.execute(); if (listing == null || listing.length === 0) { @@ -30,11 +37,14 @@ describe('#sparkasse testsuite()', () => { /** check the actual structure **/ expect(notify.id).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string'); + expect(notify.price).toContain('€'); + expect(notify.size).toBeTypeOf('string'); + expect(notify.size).toContain('m²'); expect(notify.title).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string'); /** check the values if possible **/ - expect(notify.size).toContain('m²'); + expect(notify.size).toBeTypeOf('string'); expect(notify.title).not.toBe(''); expect(notify.address).not.toBe(''); }); @@ -53,9 +63,15 @@ describe('#sparkasse testsuite()', () => { it('should enrich listings with details', async () => { const Fredy = await mockFredy(); provider.init(providerConfig.sparkasse, []); - const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'sparkasse', { - checkAndAddEntry: () => false, - }); + const mockedJob = { id: 'sparkasse', notificationAdapter: null, specFilter: null, spatialFilter: null }; + + const fredy = new Fredy( + provider.config, + mockedJob, + provider.metaInformation.id, + { checkAndAddEntry: () => false }, + undefined, + ); const listings = await fredy.execute(); expect(listings).toBeInstanceOf(Array); listings.forEach((listing) => { diff --git a/test/provider/wgGesucht.test.js b/test/provider/wgGesucht.test.js index 659e75d..7d5735f 100644 --- a/test/provider/wgGesucht.test.js +++ b/test/provider/wgGesucht.test.js @@ -12,10 +12,18 @@ import * as mockStore from '../mocks/mockStore.js'; describe('#wgGesucht testsuite()', () => { provider.init(providerConfig.wgGesucht, [], []); - it('should test wgGesucht provider', async () => { + it('should test wgGesucht provider', { timeout: 120000 }, async () => { const Fredy = await mockFredy(); + const mockedJob = { + id: 'wgGesucht', + notificationAdapter: null, + spatialFilter: null, + specFilter: null, + }; + return await new Promise((resolve, reject) => { - const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'wgGesucht', similarityCache); + const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined); + fredy.execute().then((listing) => { if (listing == null || listing.length === 0) { reject('Listings is empty!'); @@ -30,8 +38,9 @@ describe('#wgGesucht testsuite()', () => { /** check the actual structure **/ expect(notify.id).toBeTypeOf('string'); expect(notify.title).toBeTypeOf('string'); - expect(notify.details).toBeTypeOf('string'); + // expect(notify.details).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string'); + expect(notify.price).toContain('€'); expect(notify.link).toBeTypeOf('string'); }); resolve(); @@ -52,9 +61,15 @@ describe('#wgGesucht testsuite()', () => { it('should enrich listings with details', async () => { const Fredy = await mockFredy(); provider.init(providerConfig.wgGesucht, [], []); - const fredy = new Fredy(provider.config, null, null, provider.metaInformation.id, 'wgGesucht', { - checkAndAddEntry: () => false, - }); + const mockedJob = { id: 'wgGesucht', notificationAdapter: null, specFilter: null, spatialFilter: null }; + + const fredy = new Fredy( + provider.config, + mockedJob, + provider.metaInformation.id, + { checkAndAddEntry: () => false }, + undefined, + ); const listings = await fredy.execute(); expect(listings).toBeInstanceOf(Array); listings.forEach((listing) => { diff --git a/test/provider/wohnungsboerse.test.js b/test/provider/wohnungsboerse.test.js index 4d76968..bca4bc6 100644 --- a/test/provider/wohnungsboerse.test.js +++ b/test/provider/wohnungsboerse.test.js @@ -13,15 +13,16 @@ describe('#wohnungsboerse testsuite()', () => { provider.init(providerConfig.wohnungsboerse, [], []); it('should test wohnungsboerse provider', async () => { const Fredy = await mockFredy(); + const mockedJob = { + id: 'wohnungsboerse', + notificationAdapter: null, + spatialFilter: null, + specFilter: null, + }; + return await new Promise((resolve, reject) => { - const fredy = new Fredy( - provider.config, - null, - null, - provider.metaInformation.id, - 'wohnungsboerse', - similarityCache, - ); + const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined); + fredy.execute().then((listings) => { if (listings == null || listings.length === 0) { reject('Listings is empty!'); @@ -36,12 +37,14 @@ describe('#wohnungsboerse testsuite()', () => { /** check the actual structure **/ expect(notify.id).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string'); + expect(notify.price).toContain('€'); expect(notify.size).toBeTypeOf('string'); + expect(notify.size).toContain('m²'); expect(notify.title).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string'); /** check the values if possible **/ - expect(notify.size).not.toBe(''); + expect(notify.size).toBeTypeOf('string'); expect(notify.title).not.toBe(''); expect(notify.link).toContain('https://www.wohnungsboerse.net'); }); diff --git a/test/utils.js b/test/utils.js index 4330c12..deb89c6 100644 --- a/test/utils.js +++ b/test/utils.js @@ -8,7 +8,9 @@ import { readFile } from 'fs/promises'; import * as mockStore from './mocks/mockStore.js'; import { send } from './mocks/mockNotification.js'; -export const providerConfig = JSON.parse(await readFile(new URL('./provider/testProvider.json', import.meta.url))); +export const providerConfig = JSON.parse( + await readFile(new URL('./provider/testProvider.json', import.meta.url), 'utf-8'), +); vi.mock('../lib/services/storage/listingsStorage.js', () => mockStore); vi.mock('../lib/services/storage/settingsStorage.js', () => mockStore); @@ -20,7 +22,10 @@ vi.mock('../lib/services/storage/jobStorage.js', () => ({ })); vi.mock('../lib/notification/notify.js', () => ({ send })); +/** + * @returns {Promise} + */ export const mockFredy = async () => { const mod = await import('../lib/FredyPipelineExecutioner.js'); - return mod.default ?? mod; + return mod.default; }; diff --git a/ui/src/components/grid/listings/ListingsGrid.jsx b/ui/src/components/grid/listings/ListingsGrid.jsx index 5c974c3..d40ba9c 100644 --- a/ui/src/components/grid/listings/ListingsGrid.jsx +++ b/ui/src/components/grid/listings/ListingsGrid.jsx @@ -25,6 +25,7 @@ import { Empty, Radio, RadioGroup, + Space, } from '@douyinfe/semi-ui-19'; import { IconBriefcase, @@ -293,12 +294,14 @@ const ListingsGrid = () => { > {item.address || 'No address provided'} - }> - {timeService.format(item.created_at, false)} - - }> - {item.provider.charAt(0).toUpperCase() + item.provider.slice(1)} - + + }> + {item.provider.charAt(0).toUpperCase() + item.provider.slice(1)} + + }> + {timeService.format(item.created_at, false)} + + {item.distance_to_destination ? ( }> {item.distance_to_destination} m to chosen address diff --git a/ui/src/components/grid/listings/ListingsGrid.less b/ui/src/components/grid/listings/ListingsGrid.less index fa192f6..e755088 100644 --- a/ui/src/components/grid/listings/ListingsGrid.less +++ b/ui/src/components/grid/listings/ListingsGrid.less @@ -69,6 +69,7 @@ } &--inactive { + .listingsGrid__imageContainer, .listingsGrid__content { opacity: 0.6; @@ -169,4 +170,16 @@ background: var(--semi-color-primary-hover); } } + + // Ensure icons and text are vertically aligned + .semi-typography { + display: inline-flex; + align-items: center; + + .semi-typography-icon { + display: flex; + align-items: center; + margin-top: 1px; // Minor nudge if needed, but flex should handle most + } + } } diff --git a/ui/src/views/jobs/mutation/JobMutation.jsx b/ui/src/views/jobs/mutation/JobMutation.jsx index c00d3f6..623f0f3 100644 --- a/ui/src/views/jobs/mutation/JobMutation.jsx +++ b/ui/src/views/jobs/mutation/JobMutation.jsx @@ -24,9 +24,15 @@ import { IconPlayCircle, IconPlusCircle, IconUser, - IconClear, + IconFilter, } from '@douyinfe/semi-icons'; +const SPEC_FILTERS = [ + { key: 'maxPrice', translation: 'Max Price' }, + { key: 'minSize', translation: 'Min Size (m²)' }, + { key: 'minRooms', translation: 'Min Rooms' }, +]; + export default function JobMutator() { const jobs = useSelector((state) => state.jobsData.jobs); const shareableUserList = useSelector((state) => state.jobsData.shareableUserList); @@ -46,6 +52,7 @@ export default function JobMutator() { const defaultEnabled = sourceJob?.enabled ?? true; const defaultShareWithUsers = sourceJob?.shared_with_user ?? []; const defaultSpatialFilter = sourceJob?.spatialFilter || null; + const defaultSpecFilter = sourceJob?.specFilter || null; const [providerToEdit, setProviderToEdit] = useState(null); const [providerCreationVisible, setProviderCreationVisibility] = useState(false); @@ -58,6 +65,7 @@ export default function JobMutator() { const [shareWithUsers, setShareWithUsers] = useState(defaultShareWithUsers); const [enabled, setEnabled] = useState(defaultEnabled); const [spatialFilter, setSpatialFilter] = useState(defaultSpatialFilter); + const [specFilter, setSpecFilter] = useState(defaultSpecFilter); const navigate = useNavigate(); const actions = useActions(); @@ -66,6 +74,12 @@ export default function JobMutator() { setSpatialFilter(data); }, []); + const handleSpecFilterChange = (key, value) => { + if (!SPEC_FILTERS.map(({ key }) => key).includes(key)) return; + + setSpecFilter({ ...specFilter, [key]: value ? parseFloat(value) : null }); + }; + const isSavingEnabled = () => { return Boolean(notificationAdapterData.length && providerData.length && name); }; @@ -85,6 +99,7 @@ export default function JobMutator() { name, blacklist, spatialFilter, + specFilter, enabled, jobId: jobToBeEdit?.id || null, }); @@ -204,7 +219,7 @@ export default function JobMutator() { @@ -216,6 +231,27 @@ export default function JobMutator() { +
+ {SPEC_FILTERS.map((filter) => ( +
+
{filter.translation}
+ handleSpecFilterChange(filter.key, value)} + /> +
+ ))} +
+
+ + diff --git a/ui/src/views/jobs/mutation/JobMutation.less b/ui/src/views/jobs/mutation/JobMutation.less index 2f14cb0..ac6c98b 100644 --- a/ui/src/views/jobs/mutation/JobMutation.less +++ b/ui/src/views/jobs/mutation/JobMutation.less @@ -3,6 +3,24 @@ float: right; margin-bottom: 1rem; } + + &__specFilter { + display: flex; + gap: 1.5rem; + flex-wrap: wrap; + } + + &__specFilterItem { + display: flex; + flex-direction: column; + gap: 0.5rem; + flex: 1; + min-width: 150px; + } + + &__specFilterLabel { + font-weight: 500; + } } .semi-select-option-list-wrapper { diff --git a/ui/src/views/listings/ListingDetail.jsx b/ui/src/views/listings/ListingDetail.jsx index 24bd81f..3946761 100644 --- a/ui/src/views/listings/ListingDetail.jsx +++ b/ui/src/views/listings/ListingDetail.jsx @@ -31,7 +31,8 @@ import { IconLink, IconStar, IconStarStroked, - IconRealSize, + IconExpand, + IconGridView, } from '@douyinfe/semi-icons'; import maplibregl from 'maplibre-gl'; import 'maplibre-gl/dist/maplibre-gl.css'; @@ -259,6 +260,17 @@ export default function ListingDetail() { if (!listing) return null; const data = [ + { key: 'Price', value: `${listing.price} €`, Icon: }, + { + key: 'Size', + value: listing.size ? `${listing.size} m²` : 'N/A', + Icon: , + }, + { + key: 'Rooms', + value: listing.rooms ? `${listing.rooms} Rooms` : 'N/A', + Icon: , + }, { key: 'Job', value: listing.job_name, @@ -269,12 +281,6 @@ export default function ListingDetail() { value: listing.provider.charAt(0).toUpperCase() + listing.provider.slice(1), Icon: , }, - { key: 'Price', value: `${listing.price} €`, Icon: }, - { - key: 'Size', - value: listing.size ? `${listing.size} m²` : 'N/A', - Icon: , - }, { key: 'Added', value: timeService.format(listing.created_at),