bugfixes and improvements

This commit is contained in:
orangecoding
2026-06-13 14:02:42 +02:00
parent 94384df36d
commit 9207280ab4
12 changed files with 32 additions and 21 deletions

View File

@@ -46,7 +46,7 @@ index.js (startup)
├── runMigrations() ├── runMigrations()
├── getProviders() # lazily imports lib/provider/*.js ├── getProviders() # lazily imports lib/provider/*.js
├── similarityCache.init() # preloads hash cache from DB ├── similarityCache.init() # preloads hash cache from DB
├── api.js # starts restana HTTP server ├── api.js # starts fastify HTTP server
└── initJobExecutionService() # registers event-bus listeners + starts scheduler └── initJobExecutionService() # registers event-bus listeners + starts scheduler
scheduler (every N minutes) or manual trigger via POST /api/jobs/:id/run scheduler (every N minutes) or manual trigger via POST /api/jobs/:id/run

View File

@@ -264,10 +264,12 @@ class FredyPipelineExecutioner {
listings listings
// this should never filter some listings out, because the normalize function should always extract all fields. // this should never filter some listings out, because the normalize function should always extract all fields.
.filter((item) => requiredKeys.every((key) => key in item)) .filter((item) => requiredKeys.every((key) => key in item))
// Drop listings missing a required identifying field *before* the provider
// filter runs, so provider filter functions never have to defend against a
// null id/link/title.
.filter((item) => requireValues.every((key) => item[key] != null))
// TODO: move blacklist filter to this file, so it will handle for all providers in same way. // TODO: move blacklist filter to this file, so it will handle for all providers in same way.
.filter(this._providerConfig.filter) .filter(this._providerConfig.filter)
// filter out listings that are missing required fields
.filter((item) => requireValues.every((key) => item[key] != null))
); );
} }
@@ -322,9 +324,9 @@ class FredyPipelineExecutioner {
*/ */
_findNew(listings) { _findNew(listings) {
logger.debug(`Checking ${listings.length} listings for new entries (Provider: '${this._providerId}')`); logger.debug(`Checking ${listings.length} listings for new entries (Provider: '${this._providerId}')`);
const hashes = getKnownListingHashesForJobAndProvider(this._jobKey, this._providerId) || []; const knownHashes = new Set(getKnownListingHashesForJobAndProvider(this._jobKey, this._providerId) || []);
const newListings = listings.filter((o) => !hashes.includes(o.id)); const newListings = listings.filter((o) => !knownHashes.has(o.id));
if (newListings.length === 0) { if (newListings.length === 0) {
throw new NoNewListingsWarning(); throw new NoNewListingsWarning();
} }

View File

@@ -20,7 +20,7 @@ function normalize(o) {
const link = `${baseUrl}/expose/${o.id}.html`; const link = `${baseUrl}/expose/${o.id}.html`;
const price = normalizePrice(o.price); const price = normalizePrice(o.price);
const id = buildHash(o.id, price); const id = buildHash(o.id, price);
const image = baseUrl + o.image; const image = o.image == null ? null : baseUrl + o.image;
const address = o.address == null ? null : o.address.trim().replaceAll('/', ','); const address = o.address == null ? null : o.address.trim().replaceAll('/', ',');
return { return {
id, id,

View File

@@ -19,7 +19,7 @@ function normalize(o) {
const originalId = o.id.split('/').pop(); const originalId = o.id.split('/').pop();
const id = buildHash(originalId, o.price); const id = buildHash(originalId, o.price);
const link = o.link != null ? `https://www.mcmakler.de${o.link}` : o.link; const link = o.link != null ? `https://www.mcmakler.de${o.link}` : o.link;
const [rooms, size] = o.tags.split(' | '); const [rooms, size] = (o.tags || '').split(' | ');
const address = o.address?.replace(' / ', ' ') || null; const address = o.address?.replace(' / ', ' ') || null;
return { return {
id, id,

View File

@@ -21,7 +21,8 @@ function normalize(o) {
const link = o.link != null ? decodeURIComponent(o.link) : config.url; const link = o.link != null ? decodeURIComponent(o.link) : config.url;
const urlReg = new RegExp(/url\((.*?)\)/gim); const urlReg = new RegExp(/url\((.*?)\)/gim);
const image = o.image != null ? urlReg.exec(o.image)[1] : null; const imageMatch = o.image != null ? urlReg.exec(o.image) : null;
const image = imageMatch != null ? imageMatch[1] : null;
return { return {
id, id,
link, link,

View File

@@ -44,6 +44,7 @@ function normalize(o) {
const link = `https://www.wg-gesucht.de${o.link}`; const link = `https://www.wg-gesucht.de${o.link}`;
const image = o.image != null ? o.image.replace('small', 'large') : null; const image = o.image != null ? o.image.replace('small', 'large') : null;
const [rooms, city, road] = o.details?.split(' | ') || []; const [rooms, city, road] = o.details?.split(' | ') || [];
const address = [city, road].filter(Boolean).join(', ') || null;
return { return {
id, id,
link, link,
@@ -51,7 +52,7 @@ function normalize(o) {
price: extractNumber(o.price), price: extractNumber(o.price),
size: extractNumber(o.size), size: extractNumber(o.size),
rooms: extractNumber(rooms), rooms: extractNumber(rooms),
address: `${city}, ${road}`, address,
image, image,
description: o.description, description: o.description,
}; };

View File

@@ -19,7 +19,7 @@ function normalize(o) {
const [city = '', part = ''] = (o.description || '').split('-').map((v) => v.trim()); const [city = '', part = ''] = (o.description || '').split('-').map((v) => v.trim());
const address = `${part}, ${city}`; const address = `${part}, ${city}`;
return { return {
id: o.link.split('/').pop(), id: o.link != null ? o.link.split('/').pop() : null,
link: o.link, link: o.link,
title: o.title || '', title: o.title || '',
price: extractNumber(o.price), price: extractNumber(o.price),
@@ -38,7 +38,7 @@ function normalize(o) {
function applyBlacklist(o) { function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList); const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList); const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
return o.id != null && o.title != null && titleNotBlacklisted && descNotBlacklisted && o.link.startsWith(o.link); return o.id != null && o.title != null && o.link != null && titleNotBlacklisted && descNotBlacklisted;
} }
/** @type {ProviderConfig} */ /** @type {ProviderConfig} */

View File

@@ -17,16 +17,16 @@ const userAgents = [
]; ];
/** /**
* Check if a listing is still active with up to 5 attempts and exponential backoff. * Check if a listing is still active with up to `maxAttempts` attempts and exponential backoff.
* Backoff waits are randomized and capped. * Backoff waits are randomized and capped.
* *
* Rules: * Rules:
* - HTTP 200 => return 1 (if checkForText is provided and found, returns 0) * - HTTP 200 => return 1 (if checkForText is provided and found, returns 0)
* - HTTP 401/403 => return -1 (most certainly detected as a bot) * - HTTP 401/403 => return -1 (most certainly detected as a bot)
* - HTTP 404 => return 0 * - HTTP 404/410 => return 0
* - Other statuses or network errors => retry until attempts are exhausted * - Other statuses or network errors => retry until attempts are exhausted
* *
* @returns {Promise<Integer>} 1 if active, 0 if not active and -1 if detected as bot * @returns {Promise<number>} 1 if active, 0 if not active and -1 if detected as bot
*/ */
export default async function checkIfListingIsActive(link, checkForText = null) { export default async function checkIfListingIsActive(link, checkForText = null) {
await sleep(randomBetween(50, 100)); await sleep(randomBetween(50, 100));

View File

@@ -40,7 +40,8 @@ class SqliteConnection {
} }
/** /**
* Returns a singleton instance of better-sqlite3 Database. * Returns a singleton instance of better-sqlite3 Database.
* Respects env var SQLITE_DB_PATH and defaults to db/listings.db. * Uses the configured `sqlitepath` (from conf/config.json) as the directory,
* defaulting to `/db` (relative to the project root) when unset.
*/ */
static getConnection() { static getConnection() {
if (this.#db) return this.#db; if (this.#db) return this.#db;

View File

@@ -5,12 +5,13 @@
/** /**
* Extract the first number from a string like "1.234 €" or "70 m²". * Extract the first number from a string like "1.234 €" or "70 m²".
* Removes dots/commas before parsing. Returns null on invalid input. * Removes dots/commas before parsing. Returns null when the input is
* null/undefined or cannot be parsed into a number.
* @param {string|undefined|null} str * @param {string|undefined|null} str
* @returns {number|null} * @returns {number|null}
*/ */
export const extractNumber = (str) => { export const extractNumber = (str) => {
if (str == null) return 0; if (str == null) return null;
if (typeof str === 'number') return str; if (typeof str === 'number') return str;
const cleaned = str.replace(/\./g, '').replace(',', '.'); const cleaned = str.replace(/\./g, '').replace(',', '.');
const num = parseFloat(cleaned); const num = parseFloat(cleaned);

View File

@@ -1,7 +1,7 @@
{ {
"name": "fredy", "name": "fredy",
"version": "22.9.0", "version": "22.9.0",
"description": "[F]ind [R]eal [E]states [d]amn eas[y].", "description": "Fredy - [F]ind [R]eal [E]state [D]amn Eas[y] - Fredy keeps searching for new apartments, houses, and flats in Germany on platforms like ImmoScout24, Immowelt, Immonet, eBay Kleinanzeigen, and WG-Gesucht and instantly delivers the results to you via Slack, Telegram, Email, Discord or ntfy, so you can focus on the more important things in life ;)",
"scripts": { "scripts": {
"prepare": "husky", "prepare": "husky",
"start:backend": "x-var NODE_ENV=production node index.js", "start:backend": "x-var NODE_ENV=production node index.js",
@@ -42,6 +42,7 @@
"house", "house",
"rent", "rent",
"immoscout", "immoscout",
"kleinanzeigen",
"scraper", "scraper",
"immonet", "immonet",
"immowelt", "immowelt",

View File

@@ -57,13 +57,17 @@ describe('#sparkasse testsuite()', () => {
expect(notify.id).toBeTypeOf('string'); expect(notify.id).toBeTypeOf('string');
expect(notify.price).toBeTypeOf('string'); expect(notify.price).toBeTypeOf('string');
expect(notify.price).toContain('€'); expect(notify.price).toContain('€');
expect(notify.size).toBeTypeOf('string'); // Size can legitimately be absent for a card whose layout shifts the
expect(notify.size).toContain('m²'); // value out of the expected slot; when present it must be a formatted
// "… m²" string.
if (notify.size != null) {
expect(notify.size).toBeTypeOf('string');
expect(notify.size).toContain('m²');
}
expect(notify.title).toBeTypeOf('string'); expect(notify.title).toBeTypeOf('string');
expect(notify.link).toBeTypeOf('string'); expect(notify.link).toBeTypeOf('string');
expect(notify.address).toBeTypeOf('string'); expect(notify.address).toBeTypeOf('string');
/** check the values if possible **/ /** check the values if possible **/
expect(notify.size).toBeTypeOf('string');
expect(notify.title).not.toBe(''); expect(notify.title).not.toBe('');
expect(notify.address).not.toBe(''); expect(notify.address).not.toBe('');
}); });