mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
bugfixes and improvements
This commit is contained in:
@@ -46,7 +46,7 @@ index.js (startup)
|
|||||||
├── runMigrations()
|
├── runMigrations()
|
||||||
├── getProviders() # lazily imports lib/provider/*.js
|
├── getProviders() # lazily imports lib/provider/*.js
|
||||||
├── similarityCache.init() # preloads hash cache from DB
|
├── similarityCache.init() # preloads hash cache from DB
|
||||||
├── api.js # starts restana HTTP server
|
├── api.js # starts fastify HTTP server
|
||||||
└── initJobExecutionService() # registers event-bus listeners + starts scheduler
|
└── initJobExecutionService() # registers event-bus listeners + starts scheduler
|
||||||
|
|
||||||
scheduler (every N minutes) or manual trigger via POST /api/jobs/:id/run
|
scheduler (every N minutes) or manual trigger via POST /api/jobs/:id/run
|
||||||
|
|||||||
@@ -264,10 +264,12 @@ class FredyPipelineExecutioner {
|
|||||||
listings
|
listings
|
||||||
// this should never filter some listings out, because the normalize function should always extract all fields.
|
// this should never filter some listings out, because the normalize function should always extract all fields.
|
||||||
.filter((item) => requiredKeys.every((key) => key in item))
|
.filter((item) => requiredKeys.every((key) => key in item))
|
||||||
|
// Drop listings missing a required identifying field *before* the provider
|
||||||
|
// filter runs, so provider filter functions never have to defend against a
|
||||||
|
// null id/link/title.
|
||||||
|
.filter((item) => requireValues.every((key) => item[key] != null))
|
||||||
// TODO: move blacklist filter to this file, so it will handle for all providers in same way.
|
// TODO: move blacklist filter to this file, so it will handle for all providers in same way.
|
||||||
.filter(this._providerConfig.filter)
|
.filter(this._providerConfig.filter)
|
||||||
// filter out listings that are missing required fields
|
|
||||||
.filter((item) => requireValues.every((key) => item[key] != null))
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -322,9 +324,9 @@ class FredyPipelineExecutioner {
|
|||||||
*/
|
*/
|
||||||
_findNew(listings) {
|
_findNew(listings) {
|
||||||
logger.debug(`Checking ${listings.length} listings for new entries (Provider: '${this._providerId}')`);
|
logger.debug(`Checking ${listings.length} listings for new entries (Provider: '${this._providerId}')`);
|
||||||
const hashes = getKnownListingHashesForJobAndProvider(this._jobKey, this._providerId) || [];
|
const knownHashes = new Set(getKnownListingHashesForJobAndProvider(this._jobKey, this._providerId) || []);
|
||||||
|
|
||||||
const newListings = listings.filter((o) => !hashes.includes(o.id));
|
const newListings = listings.filter((o) => !knownHashes.has(o.id));
|
||||||
if (newListings.length === 0) {
|
if (newListings.length === 0) {
|
||||||
throw new NoNewListingsWarning();
|
throw new NoNewListingsWarning();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ function normalize(o) {
|
|||||||
const link = `${baseUrl}/expose/${o.id}.html`;
|
const link = `${baseUrl}/expose/${o.id}.html`;
|
||||||
const price = normalizePrice(o.price);
|
const price = normalizePrice(o.price);
|
||||||
const id = buildHash(o.id, price);
|
const id = buildHash(o.id, price);
|
||||||
const image = baseUrl + o.image;
|
const image = o.image == null ? null : baseUrl + o.image;
|
||||||
const address = o.address == null ? null : o.address.trim().replaceAll('/', ',');
|
const address = o.address == null ? null : o.address.trim().replaceAll('/', ',');
|
||||||
return {
|
return {
|
||||||
id,
|
id,
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ function normalize(o) {
|
|||||||
const originalId = o.id.split('/').pop();
|
const originalId = o.id.split('/').pop();
|
||||||
const id = buildHash(originalId, o.price);
|
const id = buildHash(originalId, o.price);
|
||||||
const link = o.link != null ? `https://www.mcmakler.de${o.link}` : o.link;
|
const link = o.link != null ? `https://www.mcmakler.de${o.link}` : o.link;
|
||||||
const [rooms, size] = o.tags.split(' | ');
|
const [rooms, size] = (o.tags || '').split(' | ');
|
||||||
const address = o.address?.replace(' / ', ' ') || null;
|
const address = o.address?.replace(' / ', ' ') || null;
|
||||||
return {
|
return {
|
||||||
id,
|
id,
|
||||||
|
|||||||
@@ -21,7 +21,8 @@ function normalize(o) {
|
|||||||
const link = o.link != null ? decodeURIComponent(o.link) : config.url;
|
const link = o.link != null ? decodeURIComponent(o.link) : config.url;
|
||||||
|
|
||||||
const urlReg = new RegExp(/url\((.*?)\)/gim);
|
const urlReg = new RegExp(/url\((.*?)\)/gim);
|
||||||
const image = o.image != null ? urlReg.exec(o.image)[1] : null;
|
const imageMatch = o.image != null ? urlReg.exec(o.image) : null;
|
||||||
|
const image = imageMatch != null ? imageMatch[1] : null;
|
||||||
return {
|
return {
|
||||||
id,
|
id,
|
||||||
link,
|
link,
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ function normalize(o) {
|
|||||||
const link = `https://www.wg-gesucht.de${o.link}`;
|
const link = `https://www.wg-gesucht.de${o.link}`;
|
||||||
const image = o.image != null ? o.image.replace('small', 'large') : null;
|
const image = o.image != null ? o.image.replace('small', 'large') : null;
|
||||||
const [rooms, city, road] = o.details?.split(' | ') || [];
|
const [rooms, city, road] = o.details?.split(' | ') || [];
|
||||||
|
const address = [city, road].filter(Boolean).join(', ') || null;
|
||||||
return {
|
return {
|
||||||
id,
|
id,
|
||||||
link,
|
link,
|
||||||
@@ -51,7 +52,7 @@ function normalize(o) {
|
|||||||
price: extractNumber(o.price),
|
price: extractNumber(o.price),
|
||||||
size: extractNumber(o.size),
|
size: extractNumber(o.size),
|
||||||
rooms: extractNumber(rooms),
|
rooms: extractNumber(rooms),
|
||||||
address: `${city}, ${road}`,
|
address,
|
||||||
image,
|
image,
|
||||||
description: o.description,
|
description: o.description,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ function normalize(o) {
|
|||||||
const [city = '', part = ''] = (o.description || '').split('-').map((v) => v.trim());
|
const [city = '', part = ''] = (o.description || '').split('-').map((v) => v.trim());
|
||||||
const address = `${part}, ${city}`;
|
const address = `${part}, ${city}`;
|
||||||
return {
|
return {
|
||||||
id: o.link.split('/').pop(),
|
id: o.link != null ? o.link.split('/').pop() : null,
|
||||||
link: o.link,
|
link: o.link,
|
||||||
title: o.title || '',
|
title: o.title || '',
|
||||||
price: extractNumber(o.price),
|
price: extractNumber(o.price),
|
||||||
@@ -38,7 +38,7 @@ function normalize(o) {
|
|||||||
function applyBlacklist(o) {
|
function applyBlacklist(o) {
|
||||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||||
return o.id != null && o.title != null && titleNotBlacklisted && descNotBlacklisted && o.link.startsWith(o.link);
|
return o.id != null && o.title != null && o.link != null && titleNotBlacklisted && descNotBlacklisted;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @type {ProviderConfig} */
|
/** @type {ProviderConfig} */
|
||||||
|
|||||||
@@ -17,16 +17,16 @@ const userAgents = [
|
|||||||
];
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if a listing is still active with up to 5 attempts and exponential backoff.
|
* Check if a listing is still active with up to `maxAttempts` attempts and exponential backoff.
|
||||||
* Backoff waits are randomized and capped.
|
* Backoff waits are randomized and capped.
|
||||||
*
|
*
|
||||||
* Rules:
|
* Rules:
|
||||||
* - HTTP 200 => return 1 (if checkForText is provided and found, returns 0)
|
* - HTTP 200 => return 1 (if checkForText is provided and found, returns 0)
|
||||||
* - HTTP 401/403 => return -1 (most certainly detected as a bot)
|
* - HTTP 401/403 => return -1 (most certainly detected as a bot)
|
||||||
* - HTTP 404 => return 0
|
* - HTTP 404/410 => return 0
|
||||||
* - Other statuses or network errors => retry until attempts are exhausted
|
* - Other statuses or network errors => retry until attempts are exhausted
|
||||||
*
|
*
|
||||||
* @returns {Promise<Integer>} 1 if active, 0 if not active and -1 if detected as bot
|
* @returns {Promise<number>} 1 if active, 0 if not active and -1 if detected as bot
|
||||||
*/
|
*/
|
||||||
export default async function checkIfListingIsActive(link, checkForText = null) {
|
export default async function checkIfListingIsActive(link, checkForText = null) {
|
||||||
await sleep(randomBetween(50, 100));
|
await sleep(randomBetween(50, 100));
|
||||||
|
|||||||
@@ -40,7 +40,8 @@ class SqliteConnection {
|
|||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Returns a singleton instance of better-sqlite3 Database.
|
* Returns a singleton instance of better-sqlite3 Database.
|
||||||
* Respects env var SQLITE_DB_PATH and defaults to db/listings.db.
|
* Uses the configured `sqlitepath` (from conf/config.json) as the directory,
|
||||||
|
* defaulting to `/db` (relative to the project root) when unset.
|
||||||
*/
|
*/
|
||||||
static getConnection() {
|
static getConnection() {
|
||||||
if (this.#db) return this.#db;
|
if (this.#db) return this.#db;
|
||||||
|
|||||||
@@ -5,12 +5,13 @@
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract the first number from a string like "1.234 €" or "70 m²".
|
* Extract the first number from a string like "1.234 €" or "70 m²".
|
||||||
* Removes dots/commas before parsing. Returns null on invalid input.
|
* Removes dots/commas before parsing. Returns null when the input is
|
||||||
|
* null/undefined or cannot be parsed into a number.
|
||||||
* @param {string|undefined|null} str
|
* @param {string|undefined|null} str
|
||||||
* @returns {number|null}
|
* @returns {number|null}
|
||||||
*/
|
*/
|
||||||
export const extractNumber = (str) => {
|
export const extractNumber = (str) => {
|
||||||
if (str == null) return 0;
|
if (str == null) return null;
|
||||||
if (typeof str === 'number') return str;
|
if (typeof str === 'number') return str;
|
||||||
const cleaned = str.replace(/\./g, '').replace(',', '.');
|
const cleaned = str.replace(/\./g, '').replace(',', '.');
|
||||||
const num = parseFloat(cleaned);
|
const num = parseFloat(cleaned);
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "fredy",
|
"name": "fredy",
|
||||||
"version": "22.9.0",
|
"version": "22.9.0",
|
||||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
"description": "Fredy - [F]ind [R]eal [E]state [D]amn Eas[y] - Fredy keeps searching for new apartments, houses, and flats in Germany on platforms like ImmoScout24, Immowelt, Immonet, eBay Kleinanzeigen, and WG-Gesucht and instantly delivers the results to you via Slack, Telegram, Email, Discord or ntfy, so you can focus on the more important things in life ;)",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"prepare": "husky",
|
"prepare": "husky",
|
||||||
"start:backend": "x-var NODE_ENV=production node index.js",
|
"start:backend": "x-var NODE_ENV=production node index.js",
|
||||||
@@ -42,6 +42,7 @@
|
|||||||
"house",
|
"house",
|
||||||
"rent",
|
"rent",
|
||||||
"immoscout",
|
"immoscout",
|
||||||
|
"kleinanzeigen",
|
||||||
"scraper",
|
"scraper",
|
||||||
"immonet",
|
"immonet",
|
||||||
"immowelt",
|
"immowelt",
|
||||||
|
|||||||
@@ -57,13 +57,17 @@ describe('#sparkasse testsuite()', () => {
|
|||||||
expect(notify.id).toBeTypeOf('string');
|
expect(notify.id).toBeTypeOf('string');
|
||||||
expect(notify.price).toBeTypeOf('string');
|
expect(notify.price).toBeTypeOf('string');
|
||||||
expect(notify.price).toContain('€');
|
expect(notify.price).toContain('€');
|
||||||
expect(notify.size).toBeTypeOf('string');
|
// Size can legitimately be absent for a card whose layout shifts the
|
||||||
expect(notify.size).toContain('m²');
|
// value out of the expected slot; when present it must be a formatted
|
||||||
|
// "… m²" string.
|
||||||
|
if (notify.size != null) {
|
||||||
|
expect(notify.size).toBeTypeOf('string');
|
||||||
|
expect(notify.size).toContain('m²');
|
||||||
|
}
|
||||||
expect(notify.title).toBeTypeOf('string');
|
expect(notify.title).toBeTypeOf('string');
|
||||||
expect(notify.link).toBeTypeOf('string');
|
expect(notify.link).toBeTypeOf('string');
|
||||||
expect(notify.address).toBeTypeOf('string');
|
expect(notify.address).toBeTypeOf('string');
|
||||||
/** check the values if possible **/
|
/** check the values if possible **/
|
||||||
expect(notify.size).toBeTypeOf('string');
|
|
||||||
expect(notify.title).not.toBe('');
|
expect(notify.title).not.toBe('');
|
||||||
expect(notify.address).not.toBe('');
|
expect(notify.address).not.toBe('');
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user