bugfixes and improvements

This commit is contained in:
orangecoding
2026-06-13 14:02:42 +02:00
parent 94384df36d
commit 9207280ab4
12 changed files with 32 additions and 21 deletions

View File

@@ -46,7 +46,7 @@ index.js (startup)
├── runMigrations()
├── getProviders() # lazily imports lib/provider/*.js
├── similarityCache.init() # preloads hash cache from DB
├── api.js # starts restana HTTP server
├── api.js # starts fastify HTTP server
└── initJobExecutionService() # registers event-bus listeners + starts scheduler
scheduler (every N minutes) or manual trigger via POST /api/jobs/:id/run

View File

@@ -264,10 +264,12 @@ class FredyPipelineExecutioner {
listings
// this should never filter some listings out, because the normalize function should always extract all fields.
.filter((item) => requiredKeys.every((key) => key in item))
// Drop listings missing a required identifying field *before* the provider
// filter runs, so provider filter functions never have to defend against a
// null id/link/title.
.filter((item) => requireValues.every((key) => item[key] != null))
// TODO: move blacklist filter to this file, so it will handle for all providers in same way.
.filter(this._providerConfig.filter)
// filter out listings that are missing required fields
.filter((item) => requireValues.every((key) => item[key] != null))
);
}
@@ -322,9 +324,9 @@ class FredyPipelineExecutioner {
*/
_findNew(listings) {
logger.debug(`Checking ${listings.length} listings for new entries (Provider: '${this._providerId}')`);
const hashes = getKnownListingHashesForJobAndProvider(this._jobKey, this._providerId) || [];
const knownHashes = new Set(getKnownListingHashesForJobAndProvider(this._jobKey, this._providerId) || []);
const newListings = listings.filter((o) => !hashes.includes(o.id));
const newListings = listings.filter((o) => !knownHashes.has(o.id));
if (newListings.length === 0) {
throw new NoNewListingsWarning();
}

View File

@@ -20,7 +20,7 @@ function normalize(o) {
const link = `${baseUrl}/expose/${o.id}.html`;
const price = normalizePrice(o.price);
const id = buildHash(o.id, price);
const image = baseUrl + o.image;
const image = o.image == null ? null : baseUrl + o.image;
const address = o.address == null ? null : o.address.trim().replaceAll('/', ',');
return {
id,

View File

@@ -19,7 +19,7 @@ function normalize(o) {
const originalId = o.id.split('/').pop();
const id = buildHash(originalId, o.price);
const link = o.link != null ? `https://www.mcmakler.de${o.link}` : o.link;
const [rooms, size] = o.tags.split(' | ');
const [rooms, size] = (o.tags || '').split(' | ');
const address = o.address?.replace(' / ', ' ') || null;
return {
id,

View File

@@ -21,7 +21,8 @@ function normalize(o) {
const link = o.link != null ? decodeURIComponent(o.link) : config.url;
const urlReg = new RegExp(/url\((.*?)\)/gim);
const image = o.image != null ? urlReg.exec(o.image)[1] : null;
const imageMatch = o.image != null ? urlReg.exec(o.image) : null;
const image = imageMatch != null ? imageMatch[1] : null;
return {
id,
link,

View File

@@ -44,6 +44,7 @@ function normalize(o) {
const link = `https://www.wg-gesucht.de${o.link}`;
const image = o.image != null ? o.image.replace('small', 'large') : null;
const [rooms, city, road] = o.details?.split(' | ') || [];
const address = [city, road].filter(Boolean).join(', ') || null;
return {
id,
link,
@@ -51,7 +52,7 @@ function normalize(o) {
price: extractNumber(o.price),
size: extractNumber(o.size),
rooms: extractNumber(rooms),
address: `${city}, ${road}`,
address,
image,
description: o.description,
};

View File

@@ -19,7 +19,7 @@ function normalize(o) {
const [city = '', part = ''] = (o.description || '').split('-').map((v) => v.trim());
const address = `${part}, ${city}`;
return {
id: o.link.split('/').pop(),
id: o.link != null ? o.link.split('/').pop() : null,
link: o.link,
title: o.title || '',
price: extractNumber(o.price),
@@ -38,7 +38,7 @@ function normalize(o) {
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
return o.id != null && o.title != null && titleNotBlacklisted && descNotBlacklisted && o.link.startsWith(o.link);
return o.id != null && o.title != null && o.link != null && titleNotBlacklisted && descNotBlacklisted;
}
/** @type {ProviderConfig} */

View File

@@ -17,16 +17,16 @@ const userAgents = [
];
/**
* Check if a listing is still active with up to 5 attempts and exponential backoff.
* Check if a listing is still active with up to `maxAttempts` attempts and exponential backoff.
* Backoff waits are randomized and capped.
*
* Rules:
* - HTTP 200 => return 1 (if checkForText is provided and found, returns 0)
* - HTTP 401/403 => return -1 (most certainly detected as a bot)
* - HTTP 404 => return 0
* - HTTP 404/410 => return 0
* - Other statuses or network errors => retry until attempts are exhausted
*
* @returns {Promise<Integer>} 1 if active, 0 if not active and -1 if detected as bot
* @returns {Promise<number>} 1 if active, 0 if not active and -1 if detected as bot
*/
export default async function checkIfListingIsActive(link, checkForText = null) {
await sleep(randomBetween(50, 100));

View File

@@ -40,7 +40,8 @@ class SqliteConnection {
}
/**
* Returns a singleton instance of better-sqlite3 Database.
* Respects env var SQLITE_DB_PATH and defaults to db/listings.db.
* Uses the configured `sqlitepath` (from conf/config.json) as the directory,
* defaulting to `/db` (relative to the project root) when unset.
*/
static getConnection() {
if (this.#db) return this.#db;

View File

@@ -5,12 +5,13 @@
/**
* Extract the first number from a string like "1.234 €" or "70 m²".
* Removes dots/commas before parsing. Returns null on invalid input.
* Removes dots/commas before parsing. Returns null when the input is
* null/undefined or cannot be parsed into a number.
* @param {string|undefined|null} str
* @returns {number|null}
*/
export const extractNumber = (str) => {
if (str == null) return 0;
if (str == null) return null;
if (typeof str === 'number') return str;
const cleaned = str.replace(/\./g, '').replace(',', '.');
const num = parseFloat(cleaned);

View File

@@ -1,7 +1,7 @@
{
"name": "fredy",
"version": "22.9.0",
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
"description": "Fredy - [F]ind [R]eal [E]state [D]amn Eas[y] - Fredy keeps searching for new apartments, houses, and flats in Germany on platforms like ImmoScout24, Immowelt, Immonet, eBay Kleinanzeigen, and WG-Gesucht and instantly delivers the results to you via Slack, Telegram, Email, Discord or ntfy, so you can focus on the more important things in life ;)",
"scripts": {
"prepare": "husky",
"start:backend": "x-var NODE_ENV=production node index.js",
@@ -42,6 +42,7 @@
"house",
"rent",
"immoscout",
"kleinanzeigen",
"scraper",
"immonet",
"immowelt",

View File

@@ -57,13 +57,17 @@ describe('#sparkasse testsuite()', () => {
expect(notify.id).toBeTypeOf('string');
expect(notify.price).toBeTypeOf('string');
expect(notify.price).toContain('€');
expect(notify.size).toBeTypeOf('string');
expect(notify.size).toContain('m²');
// Size can legitimately be absent for a card whose layout shifts the
// value out of the expected slot; when present it must be a formatted
// "… m²" string.
if (notify.size != null) {
expect(notify.size).toBeTypeOf('string');
expect(notify.size).toContain('m²');
}
expect(notify.title).toBeTypeOf('string');
expect(notify.link).toBeTypeOf('string');
expect(notify.address).toBeTypeOf('string');
/** check the values if possible **/
expect(notify.size).toBeTypeOf('string');
expect(notify.title).not.toBe('');
expect(notify.address).not.toBe('');
});