mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
bugfixes and improvements
This commit is contained in:
@@ -46,7 +46,7 @@ index.js (startup)
|
||||
├── runMigrations()
|
||||
├── getProviders() # lazily imports lib/provider/*.js
|
||||
├── similarityCache.init() # preloads hash cache from DB
|
||||
├── api.js # starts restana HTTP server
|
||||
├── api.js # starts fastify HTTP server
|
||||
└── initJobExecutionService() # registers event-bus listeners + starts scheduler
|
||||
|
||||
scheduler (every N minutes) or manual trigger via POST /api/jobs/:id/run
|
||||
|
||||
@@ -264,10 +264,12 @@ class FredyPipelineExecutioner {
|
||||
listings
|
||||
// this should never filter some listings out, because the normalize function should always extract all fields.
|
||||
.filter((item) => requiredKeys.every((key) => key in item))
|
||||
// Drop listings missing a required identifying field *before* the provider
|
||||
// filter runs, so provider filter functions never have to defend against a
|
||||
// null id/link/title.
|
||||
.filter((item) => requireValues.every((key) => item[key] != null))
|
||||
// TODO: move blacklist filter to this file, so it will handle for all providers in same way.
|
||||
.filter(this._providerConfig.filter)
|
||||
// filter out listings that are missing required fields
|
||||
.filter((item) => requireValues.every((key) => item[key] != null))
|
||||
);
|
||||
}
|
||||
|
||||
@@ -322,9 +324,9 @@ class FredyPipelineExecutioner {
|
||||
*/
|
||||
_findNew(listings) {
|
||||
logger.debug(`Checking ${listings.length} listings for new entries (Provider: '${this._providerId}')`);
|
||||
const hashes = getKnownListingHashesForJobAndProvider(this._jobKey, this._providerId) || [];
|
||||
const knownHashes = new Set(getKnownListingHashesForJobAndProvider(this._jobKey, this._providerId) || []);
|
||||
|
||||
const newListings = listings.filter((o) => !hashes.includes(o.id));
|
||||
const newListings = listings.filter((o) => !knownHashes.has(o.id));
|
||||
if (newListings.length === 0) {
|
||||
throw new NoNewListingsWarning();
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ function normalize(o) {
|
||||
const link = `${baseUrl}/expose/${o.id}.html`;
|
||||
const price = normalizePrice(o.price);
|
||||
const id = buildHash(o.id, price);
|
||||
const image = baseUrl + o.image;
|
||||
const image = o.image == null ? null : baseUrl + o.image;
|
||||
const address = o.address == null ? null : o.address.trim().replaceAll('/', ',');
|
||||
return {
|
||||
id,
|
||||
|
||||
@@ -19,7 +19,7 @@ function normalize(o) {
|
||||
const originalId = o.id.split('/').pop();
|
||||
const id = buildHash(originalId, o.price);
|
||||
const link = o.link != null ? `https://www.mcmakler.de${o.link}` : o.link;
|
||||
const [rooms, size] = o.tags.split(' | ');
|
||||
const [rooms, size] = (o.tags || '').split(' | ');
|
||||
const address = o.address?.replace(' / ', ' ') || null;
|
||||
return {
|
||||
id,
|
||||
|
||||
@@ -21,7 +21,8 @@ function normalize(o) {
|
||||
const link = o.link != null ? decodeURIComponent(o.link) : config.url;
|
||||
|
||||
const urlReg = new RegExp(/url\((.*?)\)/gim);
|
||||
const image = o.image != null ? urlReg.exec(o.image)[1] : null;
|
||||
const imageMatch = o.image != null ? urlReg.exec(o.image) : null;
|
||||
const image = imageMatch != null ? imageMatch[1] : null;
|
||||
return {
|
||||
id,
|
||||
link,
|
||||
|
||||
@@ -44,6 +44,7 @@ function normalize(o) {
|
||||
const link = `https://www.wg-gesucht.de${o.link}`;
|
||||
const image = o.image != null ? o.image.replace('small', 'large') : null;
|
||||
const [rooms, city, road] = o.details?.split(' | ') || [];
|
||||
const address = [city, road].filter(Boolean).join(', ') || null;
|
||||
return {
|
||||
id,
|
||||
link,
|
||||
@@ -51,7 +52,7 @@ function normalize(o) {
|
||||
price: extractNumber(o.price),
|
||||
size: extractNumber(o.size),
|
||||
rooms: extractNumber(rooms),
|
||||
address: `${city}, ${road}`,
|
||||
address,
|
||||
image,
|
||||
description: o.description,
|
||||
};
|
||||
|
||||
@@ -19,7 +19,7 @@ function normalize(o) {
|
||||
const [city = '', part = ''] = (o.description || '').split('-').map((v) => v.trim());
|
||||
const address = `${part}, ${city}`;
|
||||
return {
|
||||
id: o.link.split('/').pop(),
|
||||
id: o.link != null ? o.link.split('/').pop() : null,
|
||||
link: o.link,
|
||||
title: o.title || '',
|
||||
price: extractNumber(o.price),
|
||||
@@ -38,7 +38,7 @@ function normalize(o) {
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
return o.id != null && o.title != null && titleNotBlacklisted && descNotBlacklisted && o.link.startsWith(o.link);
|
||||
return o.id != null && o.title != null && o.link != null && titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
/** @type {ProviderConfig} */
|
||||
|
||||
@@ -17,16 +17,16 @@ const userAgents = [
|
||||
];
|
||||
|
||||
/**
|
||||
* Check if a listing is still active with up to 5 attempts and exponential backoff.
|
||||
* Check if a listing is still active with up to `maxAttempts` attempts and exponential backoff.
|
||||
* Backoff waits are randomized and capped.
|
||||
*
|
||||
* Rules:
|
||||
* - HTTP 200 => return 1 (if checkForText is provided and found, returns 0)
|
||||
* - HTTP 401/403 => return -1 (most certainly detected as a bot)
|
||||
* - HTTP 404 => return 0
|
||||
* - HTTP 404/410 => return 0
|
||||
* - Other statuses or network errors => retry until attempts are exhausted
|
||||
*
|
||||
* @returns {Promise<Integer>} 1 if active, 0 if not active and -1 if detected as bot
|
||||
* @returns {Promise<number>} 1 if active, 0 if not active and -1 if detected as bot
|
||||
*/
|
||||
export default async function checkIfListingIsActive(link, checkForText = null) {
|
||||
await sleep(randomBetween(50, 100));
|
||||
|
||||
@@ -40,7 +40,8 @@ class SqliteConnection {
|
||||
}
|
||||
/**
|
||||
* Returns a singleton instance of better-sqlite3 Database.
|
||||
* Respects env var SQLITE_DB_PATH and defaults to db/listings.db.
|
||||
* Uses the configured `sqlitepath` (from conf/config.json) as the directory,
|
||||
* defaulting to `/db` (relative to the project root) when unset.
|
||||
*/
|
||||
static getConnection() {
|
||||
if (this.#db) return this.#db;
|
||||
|
||||
@@ -5,12 +5,13 @@
|
||||
|
||||
/**
|
||||
* Extract the first number from a string like "1.234 €" or "70 m²".
|
||||
* Removes dots/commas before parsing. Returns null on invalid input.
|
||||
* Removes dots/commas before parsing. Returns null when the input is
|
||||
* null/undefined or cannot be parsed into a number.
|
||||
* @param {string|undefined|null} str
|
||||
* @returns {number|null}
|
||||
*/
|
||||
export const extractNumber = (str) => {
|
||||
if (str == null) return 0;
|
||||
if (str == null) return null;
|
||||
if (typeof str === 'number') return str;
|
||||
const cleaned = str.replace(/\./g, '').replace(',', '.');
|
||||
const num = parseFloat(cleaned);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "fredy",
|
||||
"version": "22.9.0",
|
||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||
"description": "Fredy - [F]ind [R]eal [E]state [D]amn Eas[y] - Fredy keeps searching for new apartments, houses, and flats in Germany on platforms like ImmoScout24, Immowelt, Immonet, eBay Kleinanzeigen, and WG-Gesucht and instantly delivers the results to you via Slack, Telegram, Email, Discord or ntfy, so you can focus on the more important things in life ;)",
|
||||
"scripts": {
|
||||
"prepare": "husky",
|
||||
"start:backend": "x-var NODE_ENV=production node index.js",
|
||||
@@ -42,6 +42,7 @@
|
||||
"house",
|
||||
"rent",
|
||||
"immoscout",
|
||||
"kleinanzeigen",
|
||||
"scraper",
|
||||
"immonet",
|
||||
"immowelt",
|
||||
|
||||
@@ -57,13 +57,17 @@ describe('#sparkasse testsuite()', () => {
|
||||
expect(notify.id).toBeTypeOf('string');
|
||||
expect(notify.price).toBeTypeOf('string');
|
||||
expect(notify.price).toContain('€');
|
||||
// Size can legitimately be absent for a card whose layout shifts the
|
||||
// value out of the expected slot; when present it must be a formatted
|
||||
// "… m²" string.
|
||||
if (notify.size != null) {
|
||||
expect(notify.size).toBeTypeOf('string');
|
||||
expect(notify.size).toContain('m²');
|
||||
}
|
||||
expect(notify.title).toBeTypeOf('string');
|
||||
expect(notify.link).toBeTypeOf('string');
|
||||
expect(notify.address).toBeTypeOf('string');
|
||||
/** check the values if possible **/
|
||||
expect(notify.size).toBeTypeOf('string');
|
||||
expect(notify.title).not.toBe('');
|
||||
expect(notify.address).not.toBe('');
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user