mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3d0fa87d19 | ||
|
|
8b012ef2f1 | ||
|
|
6816b0aded | ||
|
|
ac02817d4e | ||
|
|
fe0a09fe1c |
13
Dockerfile
13
Dockerfile
@@ -1,16 +1,15 @@
|
|||||||
FROM node:22-slim
|
FROM node:22-slim
|
||||||
|
|
||||||
ARG TARGETARCH
|
# System deps for CloakBrowser + build tools for native modules (better-sqlite3)
|
||||||
|
# fonts-noto-color-emoji and fonts-freefont-ttf are required so canvas fingerprint
|
||||||
# System deps for Chrome for Testing + build tools for native modules (better-sqlite3)
|
# hashes match real browsers; missing emoji fonts cause bot detection on Kasada/Akamai.
|
||||||
# On ARM64 we also install system Chromium (Chrome for Testing has no ARM64 binary)
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
curl ca-certificates fonts-liberation libasound2 \
|
curl ca-certificates fonts-liberation libasound2 \
|
||||||
libatk-bridge2.0-0 libatk1.0-0 libcups2 libdbus-1-3 \
|
libatk-bridge2.0-0 libatk1.0-0 libcups2 libdbus-1-3 \
|
||||||
libdrm2 libgbm1 libgtk-3-0 libnspr4 libnss3 \
|
libdrm2 libgbm1 libgtk-3-0 libnspr4 libnss3 \
|
||||||
libx11-xcb1 libxcomposite1 libxdamage1 libxrandr2 xdg-utils \
|
libx11-xcb1 libxcomposite1 libxdamage1 libxrandr2 xdg-utils \
|
||||||
|
fonts-noto-color-emoji fonts-freefont-ttf \
|
||||||
python3 make g++ \
|
python3 make g++ \
|
||||||
&& if [ "$TARGETARCH" = "arm64" ]; then apt-get install -y --no-install-recommends chromium; fi \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
&& mkdir -p /db /conf /fredy
|
&& mkdir -p /db /conf /fredy
|
||||||
|
|
||||||
@@ -26,8 +25,8 @@ RUN yarn config set network-timeout 600000 \
|
|||||||
&& yarn --frozen-lockfile \
|
&& yarn --frozen-lockfile \
|
||||||
&& yarn cache clean
|
&& yarn cache clean
|
||||||
|
|
||||||
# on arm64 use the system Chromium installed above
|
# Pre-download the CloakBrowser stealth Chromium binary (supports x86_64 and arm64)
|
||||||
RUN if [ "$TARGETARCH" != "arm64" ]; then npx puppeteer browsers install chrome; fi
|
RUN node -e "import('cloakbrowser').then(({ensureBinary}) => ensureBinary())"
|
||||||
|
|
||||||
# Purge build tools now that native modules are compiled
|
# Purge build tools now that native modules are compiled
|
||||||
RUN apt-get purge -y python3 make g++ \
|
RUN apt-get purge -y python3 make g++ \
|
||||||
|
|||||||
9
index.js
9
index.js
@@ -15,6 +15,15 @@ import { initGeocodingCron } from './lib/services/crons/geocoding-cron.js';
|
|||||||
import { getSettings } from './lib/services/storage/settingsStorage.js';
|
import { getSettings } from './lib/services/storage/settingsStorage.js';
|
||||||
import SqliteConnection, { computeDbPath } from './lib/services/storage/SqliteConnection.js';
|
import SqliteConnection, { computeDbPath } from './lib/services/storage/SqliteConnection.js';
|
||||||
import { initJobExecutionService } from './lib/services/jobs/jobExecutionService.js';
|
import { initJobExecutionService } from './lib/services/jobs/jobExecutionService.js';
|
||||||
|
import { ensureValidBinary } from './lib/services/ensureValidBinary.js';
|
||||||
|
|
||||||
|
// Ensure the CloakBrowser stealth Chromium binary is present and complete before
|
||||||
|
// jobs run. ensureValidBinary() also detects and auto-heals partial extractions
|
||||||
|
// (e.g. a newer version that was downloaded but only the chrome executable was
|
||||||
|
// written) so Chrome never crashes with "Invalid file descriptor to ICU data".
|
||||||
|
logger.info('Checking CloakBrowser binary...');
|
||||||
|
await ensureValidBinary();
|
||||||
|
logger.info('CloakBrowser binary ready.');
|
||||||
|
|
||||||
//in the config, we store the path of the sqlite file, thus we must check if it is available
|
//in the config, we store the path of the sqlite file, thus we must check if it is available
|
||||||
const isConfigAccessible = await checkIfConfigIsAccessible();
|
const isConfigAccessible = await checkIfConfigIsAccessible();
|
||||||
|
|||||||
@@ -7,4 +7,8 @@ export const TRACKING_POIS = {
|
|||||||
DISTANCE_ADDRESS_ENTERED: 'DISTANCE_ADDRESS_ENTERED',
|
DISTANCE_ADDRESS_ENTERED: 'DISTANCE_ADDRESS_ENTERED',
|
||||||
WELCOME_FINISHED: 'WELCOME_FINISHED',
|
WELCOME_FINISHED: 'WELCOME_FINISHED',
|
||||||
WELCOME_SKIPPED: 'WELCOME_SKIPPED',
|
WELCOME_SKIPPED: 'WELCOME_SKIPPED',
|
||||||
|
JOBS_TABLE_VIEW: 'JOBS_TABLE_VIEW',
|
||||||
|
LISTING_TABLE_VIEW: 'LISTING_TABLE_VIEW',
|
||||||
|
BASE_URL_SETTING: 'BASE_URL_SETTING',
|
||||||
|
DETECTED_AS_BOT: 'DETECTED_AS_BOT',
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ import { ensureDemoUserExists } from '../../services/storage/userStorage.js';
|
|||||||
import logger from '../../services/logger.js';
|
import logger from '../../services/logger.js';
|
||||||
import { getSettings, upsertSettings } from '../../services/storage/settingsStorage.js';
|
import { getSettings, upsertSettings } from '../../services/storage/settingsStorage.js';
|
||||||
import { isAdmin } from '../security.js';
|
import { isAdmin } from '../security.js';
|
||||||
|
import { trackPoi } from '../../services/tracking/Tracker.js';
|
||||||
|
import { TRACKING_POIS } from '../../TRACKING_POIS.js';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param {import('fastify').FastifyInstance} fastify
|
* @param {import('fastify').FastifyInstance} fastify
|
||||||
@@ -33,8 +35,12 @@ export default async function generalSettingsPlugin(fastify) {
|
|||||||
if (typeof sqlitepath !== 'undefined') {
|
if (typeof sqlitepath !== 'undefined') {
|
||||||
fs.writeFileSync(`${getDirName()}/../conf/config.json`, JSON.stringify({ sqlitepath }));
|
fs.writeFileSync(`${getDirName()}/../conf/config.json`, JSON.stringify({ sqlitepath }));
|
||||||
}
|
}
|
||||||
|
|
||||||
upsertSettings(appSettings);
|
upsertSettings(appSettings);
|
||||||
ensureDemoUserExists();
|
ensureDemoUserExists();
|
||||||
|
if (appSettings.baseUrl != null) {
|
||||||
|
await trackPoi(TRACKING_POIS.BASE_URL_SETTING);
|
||||||
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logger.error(err);
|
logger.error(err);
|
||||||
return reply.code(500).send({ error: 'Error while trying to write settings.' });
|
return reply.code(500).send({ error: 'Error while trying to write settings.' });
|
||||||
|
|||||||
@@ -118,6 +118,10 @@ export default async function userSettingsPlugin(fastify) {
|
|||||||
return reply.code(400).send({ error: 'listings_view_mode must be "grid" or "table".' });
|
return reply.code(400).send({ error: 'listings_view_mode must be "grid" or "table".' });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (listings_view_mode === 'table') {
|
||||||
|
await trackPoi(TRACKING_POIS.LISTING_TABLE_VIEW);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
upsertSettings({ listings_view_mode }, userId);
|
upsertSettings({ listings_view_mode }, userId);
|
||||||
return { success: true };
|
return { success: true };
|
||||||
@@ -135,6 +139,10 @@ export default async function userSettingsPlugin(fastify) {
|
|||||||
return reply.code(400).send({ error: 'jobs_view_mode must be "grid" or "table".' });
|
return reply.code(400).send({ error: 'jobs_view_mode must be "grid" or "table".' });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (jobs_view_mode === 'table') {
|
||||||
|
await trackPoi(TRACKING_POIS.JOBS_TABLE_VIEW);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
upsertSettings({ jobs_view_mode }, userId);
|
upsertSettings({ jobs_view_mode }, userId);
|
||||||
return { success: true };
|
return { success: true };
|
||||||
|
|||||||
@@ -87,7 +87,19 @@ const config = {
|
|||||||
crawlContainer:
|
crawlContainer:
|
||||||
'div[data-testid="serp-core-scrollablelistview-testid"]:not(div[data-testid="serp-enlargementlist-testid"] div[data-testid="serp-card-testid"]) div[data-testid="serp-core-classified-card-testid"]',
|
'div[data-testid="serp-core-scrollablelistview-testid"]:not(div[data-testid="serp-enlargementlist-testid"] div[data-testid="serp-card-testid"]) div[data-testid="serp-core-classified-card-testid"]',
|
||||||
sortByDateParam: 'order=DateDesc',
|
sortByDateParam: 'order=DateDesc',
|
||||||
waitForSelector: 'div[data-testid="serp-gridcontainer-testid"]',
|
// waitForSelector is null: extract the full page via page.content() so the
|
||||||
|
// Cheerio crawler can search anywhere in the rendered document.
|
||||||
|
// preNavigateUrl visits the homepage first to establish a trusted session
|
||||||
|
// before hitting the search URL; this prevents CDN-level bot challenges that
|
||||||
|
// fire on cold sessions. waitForNetworkIdle (phase 2) then catches React's
|
||||||
|
// listing API round-trip that fires well after domcontentloaded.
|
||||||
|
waitForSelector: null,
|
||||||
|
puppeteerOptions: {
|
||||||
|
puppeteerTimeout: 60_000,
|
||||||
|
preNavigateUrl: 'https://www.immowelt.de/',
|
||||||
|
waitForNetworkIdle: true,
|
||||||
|
waitForNetworkIdleTimeout: 60_000,
|
||||||
|
},
|
||||||
crawlFields: {
|
crawlFields: {
|
||||||
id: 'a@href',
|
id: 'a@href',
|
||||||
price: 'div[data-testid="cardmfe-price-testid"] | removeNewline | trim',
|
price: 'div[data-testid="cardmfe-price-testid"] | removeNewline | trim',
|
||||||
|
|||||||
147
lib/services/ensureValidBinary.js
Normal file
147
lib/services/ensureValidBinary.js
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2026 by Christian Kellner.
|
||||||
|
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { ensureBinary } from 'cloakbrowser';
|
||||||
|
import fs from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
import os from 'os';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resource files required on Linux/Windows — they must live next to the chrome binary.
|
||||||
|
* macOS packages these inside the .app bundle's Frameworks directory so a different
|
||||||
|
* check is used there (see isBinaryComplete).
|
||||||
|
*/
|
||||||
|
const LINUX_WIN_REQUIRED_FILES = ['icudtl.dat', 'resources.pak'];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the top-level versioned installation directory for any platform.
|
||||||
|
*
|
||||||
|
* - Linux/Windows: binaryPath is ~/.cloakbrowser/chromium-X.Y.Z/chrome
|
||||||
|
* → dirname ~/.cloakbrowser/chromium-X.Y.Z/
|
||||||
|
* - macOS: binaryPath is ~/.cloakbrowser/chromium-X.Y.Z/Chromium.app/Contents/MacOS/Chromium
|
||||||
|
* → 4 levels up ~/.cloakbrowser/chromium-X.Y.Z/
|
||||||
|
*
|
||||||
|
* @param {string} binaryPath
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
function getVersionedDir(binaryPath) {
|
||||||
|
if (process.platform === 'darwin') {
|
||||||
|
return path.resolve(path.dirname(binaryPath), '../../..');
|
||||||
|
}
|
||||||
|
return path.dirname(binaryPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return true when the binary at binaryPath belongs to a complete installation.
|
||||||
|
*
|
||||||
|
* On macOS the binary lives inside an .app bundle:
|
||||||
|
* Chromium.app/Contents/MacOS/Chromium
|
||||||
|
* Resource files (icudtl.dat etc.) are deep inside
|
||||||
|
* Chromium.app/Contents/Frameworks/…
|
||||||
|
* so checking for them next to the binary is wrong. Instead we verify the two
|
||||||
|
* structural markers that are only present after a full extraction: Info.plist
|
||||||
|
* and the Frameworks directory inside Contents/.
|
||||||
|
*
|
||||||
|
* On Linux/Windows the binary and all resource files are siblings in the same
|
||||||
|
* directory.
|
||||||
|
*
|
||||||
|
* @param {string} binaryPath
|
||||||
|
* @returns {boolean}
|
||||||
|
*/
|
||||||
|
function isBinaryComplete(binaryPath) {
|
||||||
|
if (process.platform === 'darwin') {
|
||||||
|
const contentsDir = path.resolve(path.dirname(binaryPath), '..');
|
||||||
|
return fs.existsSync(path.join(contentsDir, 'Info.plist')) && fs.existsSync(path.join(contentsDir, 'Frameworks'));
|
||||||
|
}
|
||||||
|
const dir = path.dirname(binaryPath);
|
||||||
|
return LINUX_WIN_REQUIRED_FILES.every((f) => fs.existsSync(path.join(dir, f)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a human-readable description of which required files/dirs are missing.
|
||||||
|
*
|
||||||
|
* @param {string} binaryPath
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
function missingDescription(binaryPath) {
|
||||||
|
if (process.platform === 'darwin') {
|
||||||
|
const contentsDir = path.resolve(path.dirname(binaryPath), '..');
|
||||||
|
return ['Info.plist', 'Frameworks'].filter((f) => !fs.existsSync(path.join(contentsDir, f))).join(', ');
|
||||||
|
}
|
||||||
|
const dir = path.dirname(binaryPath);
|
||||||
|
return LINUX_WIN_REQUIRED_FILES.filter((f) => !fs.existsSync(path.join(dir, f))).join(', ');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove a corrupt binary installation and all `latest_version*` markers from
|
||||||
|
* the CloakBrowser cache so the next `ensureBinary()` call falls back to the
|
||||||
|
* package-bundled version.
|
||||||
|
*
|
||||||
|
* Removes the full versioned directory (e.g. chromium-X.Y.Z/) on all platforms,
|
||||||
|
* not just the subdirectory that contains the binary.
|
||||||
|
*
|
||||||
|
* @param {string} binaryPath - Path to the (corrupt) chrome/Chromium binary.
|
||||||
|
*/
|
||||||
|
function removeCorruptInstallation(binaryPath) {
|
||||||
|
const versionedDir = getVersionedDir(binaryPath);
|
||||||
|
const cacheDir = process.env.CLOAKBROWSER_CACHE_DIR || path.join(os.homedir(), '.cloakbrowser');
|
||||||
|
|
||||||
|
fs.rmSync(versionedDir, { recursive: true, force: true });
|
||||||
|
|
||||||
|
try {
|
||||||
|
for (const entry of fs.readdirSync(cacheDir)) {
|
||||||
|
if (entry.startsWith('latest_version')) {
|
||||||
|
fs.rmSync(path.join(cacheDir, entry), { force: true });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Cache dir may not exist if versionedDir was the only entry — ignore.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure the CloakBrowser stealth Chromium binary is present **and** complete.
|
||||||
|
*
|
||||||
|
* `cloakbrowser`'s own `ensureBinary()` only checks that the chrome/Chromium
|
||||||
|
* file exists. An incomplete extraction (e.g. interrupted download, disk full)
|
||||||
|
* can leave a directory that contains the executable but is missing essential
|
||||||
|
* resource files. Chrome then crashes immediately on launch.
|
||||||
|
*
|
||||||
|
* This wrapper validates the path returned by `ensureBinary()`. If the
|
||||||
|
* installation is incomplete it removes the corrupt directory, clears the
|
||||||
|
* version marker files, and calls `ensureBinary()` again so it falls back to
|
||||||
|
* (or re-downloads) a complete build.
|
||||||
|
*
|
||||||
|
* The validated path is also pinned via `CLOAKBROWSER_BINARY_PATH` so that
|
||||||
|
* CloakBrowser's own internal `ensureBinary()` call inside `launch()` always
|
||||||
|
* picks up the same, verified binary.
|
||||||
|
*
|
||||||
|
* @returns {Promise<string>} Absolute path to the validated binary.
|
||||||
|
* @throws {Error} When even the fallback binary is incomplete.
|
||||||
|
*/
|
||||||
|
export async function ensureValidBinary() {
|
||||||
|
const binaryPath = await ensureBinary();
|
||||||
|
|
||||||
|
if (isBinaryComplete(binaryPath)) {
|
||||||
|
process.env.CLOAKBROWSER_BINARY_PATH = binaryPath;
|
||||||
|
return binaryPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.warn(
|
||||||
|
`[fredy] CloakBrowser installation at ${getVersionedDir(binaryPath)} is missing: ${missingDescription(binaryPath)}. Removing and retrying.`,
|
||||||
|
);
|
||||||
|
|
||||||
|
removeCorruptInstallation(binaryPath);
|
||||||
|
|
||||||
|
const fallbackPath = await ensureBinary();
|
||||||
|
if (!isBinaryComplete(fallbackPath)) {
|
||||||
|
throw new Error(
|
||||||
|
`CloakBrowser binary at ${getVersionedDir(fallbackPath)} is still missing required files after re-download: ${missingDescription(fallbackPath)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
process.env.CLOAKBROWSER_BINARY_PATH = fallbackPath;
|
||||||
|
return fallbackPath;
|
||||||
|
}
|
||||||
@@ -3,121 +3,135 @@
|
|||||||
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
|
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import puppeteer from 'puppeteer-extra';
|
import { launch } from 'cloakbrowser/puppeteer';
|
||||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
||||||
import { debug, botDetected } from './utils.js';
|
import { debug, botDetected } from './utils.js';
|
||||||
import {
|
import { getPreLaunchConfig } from './botPrevention.js';
|
||||||
getPreLaunchConfig,
|
|
||||||
applyBotPreventionToPage,
|
|
||||||
applyLanguagePersistence,
|
|
||||||
applyPostNavigationHumanSignals,
|
|
||||||
} from './botPrevention.js';
|
|
||||||
import logger from '../logger.js';
|
import logger from '../logger.js';
|
||||||
import fs from 'fs';
|
import { trackPoi } from '../tracking/Tracker.js';
|
||||||
import os from 'os';
|
import { TRACKING_POIS } from '../../TRACKING_POIS.js';
|
||||||
import path from 'path';
|
|
||||||
|
|
||||||
puppeteer.use(StealthPlugin());
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Launch a CloakBrowser/Puppeteer browser instance with stealth and humanizer enabled.
|
||||||
|
*
|
||||||
|
* CloakBrowser applies 49 C++ source-level patches (canvas, WebGL, audio, WebRTC,
|
||||||
|
* navigator.*, automation signals) that are indistinguishable from a real browser.
|
||||||
|
* All fingerprinting and human-behaviour simulation is handled natively; no CDP
|
||||||
|
* overrides (setUserAgent, setExtraHTTPHeaders, evaluateOnNewDocument) are applied
|
||||||
|
* here because they would create detectable inconsistencies on top of the C++ patches.
|
||||||
|
*
|
||||||
|
* @param {string} url - Initial URL (used to derive locale/timezone hints).
|
||||||
|
* @param {object} [options]
|
||||||
|
* @param {boolean} [options.puppeteerHeadless]
|
||||||
|
* @param {number} [options.puppeteerTimeout]
|
||||||
|
* @param {string} [options.proxyUrl]
|
||||||
|
* @param {string} [options.timezone]
|
||||||
|
* @param {string} [options.acceptLanguage]
|
||||||
|
* @param {object} [options.viewport]
|
||||||
|
* @returns {Promise<import('puppeteer-core').Browser>}
|
||||||
|
*/
|
||||||
export async function launchBrowser(url, options) {
|
export async function launchBrowser(url, options) {
|
||||||
const preCfg = getPreLaunchConfig(url, options || {});
|
const preCfg = getPreLaunchConfig(url, options || {});
|
||||||
const launchArgs = [
|
|
||||||
|
// Docker requires --no-sandbox; CloakBrowser handles all stealth args internally.
|
||||||
|
// --ignore-certificate-errors is needed because CloakBrowser ships its own Chromium
|
||||||
|
// binary with an independent CA bundle that may not trust proxies or interceptors
|
||||||
|
// present in the host environment.
|
||||||
|
const args = [
|
||||||
'--no-sandbox',
|
'--no-sandbox',
|
||||||
'--disable-gpu',
|
|
||||||
'--disable-setuid-sandbox',
|
'--disable-setuid-sandbox',
|
||||||
'--disable-dev-shm-usage',
|
'--disable-dev-shm-usage',
|
||||||
'--disable-crash-reporter',
|
|
||||||
'--no-first-run',
|
'--no-first-run',
|
||||||
'--no-default-browser-check',
|
'--no-default-browser-check',
|
||||||
preCfg.langArg,
|
'--ignore-certificate-errors',
|
||||||
|
// Disables the zygote process model. Required in some container environments
|
||||||
|
// (e.g. limited kernel namespaces) where the zygote cannot acquire the
|
||||||
|
// locks it needs and exits with "Invalid file descriptor to ICU data received".
|
||||||
|
'--no-zygote',
|
||||||
preCfg.windowSizeArg,
|
preCfg.windowSizeArg,
|
||||||
...preCfg.extraArgs,
|
|
||||||
];
|
];
|
||||||
if (options?.proxyUrl) {
|
|
||||||
launchArgs.push(`--proxy-server=${options.proxyUrl}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
let userDataDir;
|
const browser = await launch({
|
||||||
let removeUserDataDir = false;
|
|
||||||
if (options && options.userDataDir) {
|
|
||||||
userDataDir = options.userDataDir;
|
|
||||||
} else {
|
|
||||||
const prefix = path.join(os.tmpdir(), 'puppeteer-fredy-');
|
|
||||||
userDataDir = fs.mkdtempSync(prefix);
|
|
||||||
removeUserDataDir = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// On ARM64 Docker, Chrome for Testing has no native binary - use system Chromium instead.
|
|
||||||
const executablePath =
|
|
||||||
options?.executablePath ||
|
|
||||||
(process.arch === 'arm64' && process.env.IS_DOCKER === 'true' ? '/usr/bin/chromium' : undefined);
|
|
||||||
|
|
||||||
const browser = await puppeteer.launch({
|
|
||||||
headless: options?.puppeteerHeadless ?? true,
|
headless: options?.puppeteerHeadless ?? true,
|
||||||
args: launchArgs,
|
humanize: true,
|
||||||
timeout: options?.puppeteerTimeout || 45_000,
|
args,
|
||||||
userDataDir,
|
// locale sets Accept-Language headers and JS navigator.language consistently
|
||||||
executablePath,
|
locale: preCfg.langForFlag,
|
||||||
|
...(options?.proxyUrl ? { proxy: options.proxyUrl } : {}),
|
||||||
|
...(preCfg.timezone ? { timezone: preCfg.timezone } : {}),
|
||||||
});
|
});
|
||||||
|
|
||||||
browser.__fredy_userDataDir = userDataDir;
|
|
||||||
browser.__fredy_removeUserDataDir = removeUserDataDir;
|
|
||||||
|
|
||||||
return browser;
|
return browser;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close a browser instance returned by {@link launchBrowser}.
|
||||||
|
*
|
||||||
|
* @param {import('puppeteer-core').Browser | null} browser
|
||||||
|
*/
|
||||||
export async function closeBrowser(browser) {
|
export async function closeBrowser(browser) {
|
||||||
if (!browser) return;
|
if (!browser) return;
|
||||||
const userDataDir = browser.__fredy_userDataDir;
|
|
||||||
const removeUserDataDir = browser.__fredy_removeUserDataDir;
|
|
||||||
try {
|
try {
|
||||||
await browser.close();
|
await browser.close();
|
||||||
} catch {
|
} catch {
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
if (removeUserDataDir && userDataDir) {
|
|
||||||
try {
|
|
||||||
await fs.promises.rm(userDataDir, { recursive: true, force: true });
|
|
||||||
} catch {
|
|
||||||
// ignore
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open a page in a (possibly reused) browser, navigate to `url`, and return the HTML source.
|
||||||
|
* Returns `null` when a bot-detection page is encountered or on timeout.
|
||||||
|
*
|
||||||
|
* @param {string} url
|
||||||
|
* @param {string | null} waitForSelector
|
||||||
|
* @param {object} [options]
|
||||||
|
* @returns {Promise<string | null>}
|
||||||
|
*/
|
||||||
export default async function execute(url, waitForSelector, options) {
|
export default async function execute(url, waitForSelector, options) {
|
||||||
let browser = options?.browser;
|
let browser = options?.browser;
|
||||||
let isExternalBrowser = !!browser;
|
let isExternalBrowser = !!browser;
|
||||||
let page;
|
let page;
|
||||||
let result;
|
let result;
|
||||||
try {
|
try {
|
||||||
debug(`Sending request to ${url} using Puppeteer.`);
|
debug(`Sending request to ${url} using CloakBrowser.`);
|
||||||
|
|
||||||
if (!isExternalBrowser) {
|
if (!isExternalBrowser) {
|
||||||
browser = await launchBrowser(url, options);
|
browser = await launchBrowser(url, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
page = await browser.newPage();
|
page = await browser.newPage();
|
||||||
const preCfg = getPreLaunchConfig(url, options || {});
|
|
||||||
await applyBotPreventionToPage(page, preCfg);
|
|
||||||
// Provide languages value before navigation
|
|
||||||
await applyLanguagePersistence(page, preCfg);
|
|
||||||
|
|
||||||
// Optional cookies
|
|
||||||
if (Array.isArray(options?.cookies) && options.cookies.length > 0) {
|
if (Array.isArray(options?.cookies) && options.cookies.length > 0) {
|
||||||
await page.setCookie(...options.cookies);
|
await page.setCookie(...options.cookies);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Navigation
|
// Warm-up navigation: visit a trusted page first so the site sees an
|
||||||
|
// established session before the actual target URL. Silently ignored on
|
||||||
|
// failure so it never blocks the main request.
|
||||||
|
if (options?.preNavigateUrl) {
|
||||||
|
try {
|
||||||
|
await page.goto(options.preNavigateUrl, { waitUntil: 'domcontentloaded', timeout: 30_000 });
|
||||||
|
await new Promise((r) => setTimeout(r, 1500 + Math.random() * 2000));
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const response = await page.goto(url, {
|
const response = await page.goto(url, {
|
||||||
waitUntil: options?.waitUntil || 'domcontentloaded',
|
waitUntil: options?.waitUntil || 'domcontentloaded',
|
||||||
timeout: options?.puppeteerTimeout || 60000,
|
timeout: options?.puppeteerTimeout || 60000,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Optionally wait and add subtle human-like interactions
|
// Optional second idle wait: useful for React SPAs that trigger API calls
|
||||||
await applyPostNavigationHumanSignals(page, preCfg);
|
// after domcontentloaded. Times out silently so we use whatever is rendered.
|
||||||
|
if (options?.waitForNetworkIdle) {
|
||||||
|
try {
|
||||||
|
await page.waitForNetworkIdle({ timeout: options?.waitForNetworkIdleTimeout ?? 60_000 });
|
||||||
|
} catch {
|
||||||
|
// ignore — we proceed with whatever the DOM contains at this point
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let pageSource;
|
let pageSource;
|
||||||
// if we're extracting data from a SPA, we must wait for the selector
|
|
||||||
if (waitForSelector != null) {
|
if (waitForSelector != null) {
|
||||||
const selectorTimeout = options?.puppeteerSelectorTimeout ?? options?.puppeteerTimeout ?? 30_000;
|
const selectorTimeout = options?.puppeteerSelectorTimeout ?? options?.puppeteerTimeout ?? 30_000;
|
||||||
await page.waitForSelector(waitForSelector, { timeout: selectorTimeout });
|
await page.waitForSelector(waitForSelector, { timeout: selectorTimeout });
|
||||||
@@ -133,15 +147,18 @@ export default async function execute(url, waitForSelector, options) {
|
|||||||
|
|
||||||
if (botDetected(pageSource, statusCode)) {
|
if (botDetected(pageSource, statusCode)) {
|
||||||
logger.warn('We have been detected as a bot :-/ Tried url: => ', url);
|
logger.warn('We have been detected as a bot :-/ Tried url: => ', url);
|
||||||
|
|
||||||
|
await trackPoi(TRACKING_POIS.DETECTED_AS_BOT);
|
||||||
|
|
||||||
result = null;
|
result = null;
|
||||||
} else {
|
} else {
|
||||||
result = pageSource || (await page.content());
|
result = pageSource || (await page.content());
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (error?.name?.includes('Timeout')) {
|
if (error?.name?.includes('Timeout')) {
|
||||||
logger.debug('Error executing with puppeteer executor', error);
|
logger.debug('Error executing with CloakBrowser executor', error);
|
||||||
} else {
|
} else {
|
||||||
logger.warn('Error executing with puppeteer executor', error);
|
logger.warn('Error executing with CloakBrowser executor', error);
|
||||||
}
|
}
|
||||||
result = null;
|
result = null;
|
||||||
} finally {
|
} finally {
|
||||||
|
|||||||
21
package.json
21
package.json
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "fredy",
|
"name": "fredy",
|
||||||
"version": "21.3.1",
|
"version": "22.0.3",
|
||||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"prepare": "husky",
|
"prepare": "husky",
|
||||||
@@ -62,9 +62,9 @@
|
|||||||
"Firefox ESR"
|
"Firefox ESR"
|
||||||
],
|
],
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@douyinfe/semi-icons": "^2.96.1",
|
"@douyinfe/semi-icons": "^2.97.0",
|
||||||
"@douyinfe/semi-ui": "2.96.1",
|
"@douyinfe/semi-ui": "2.97.0",
|
||||||
"@douyinfe/semi-ui-19": "^2.96.1",
|
"@douyinfe/semi-ui-19": "^2.97.0",
|
||||||
"@fastify/cookie": "^11.0.2",
|
"@fastify/cookie": "^11.0.2",
|
||||||
"@fastify/helmet": "^13.0.2",
|
"@fastify/helmet": "^13.0.2",
|
||||||
"@fastify/session": "^11.1.1",
|
"@fastify/session": "^11.1.1",
|
||||||
@@ -78,6 +78,7 @@
|
|||||||
"better-sqlite3": "^12.9.0",
|
"better-sqlite3": "^12.9.0",
|
||||||
"chart.js": "^4.5.1",
|
"chart.js": "^4.5.1",
|
||||||
"cheerio": "^1.2.0",
|
"cheerio": "^1.2.0",
|
||||||
|
"cloakbrowser": "^0.3.28",
|
||||||
"fastify": "^5.8.5",
|
"fastify": "^5.8.5",
|
||||||
"handlebars": "4.7.9",
|
"handlebars": "4.7.9",
|
||||||
"maplibre-gl": "^5.24.0",
|
"maplibre-gl": "^5.24.0",
|
||||||
@@ -88,9 +89,7 @@
|
|||||||
"nodemailer": "^8.0.7",
|
"nodemailer": "^8.0.7",
|
||||||
"p-throttle": "^8.1.0",
|
"p-throttle": "^8.1.0",
|
||||||
"package-up": "^5.0.0",
|
"package-up": "^5.0.0",
|
||||||
"puppeteer": "^24.43.0",
|
"puppeteer-core": "^24.43.1",
|
||||||
"puppeteer-extra": "^3.3.6",
|
|
||||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
|
||||||
"query-string": "9.3.1",
|
"query-string": "9.3.1",
|
||||||
"react": "19.2.6",
|
"react": "19.2.6",
|
||||||
"react-chartjs-2": "^5.3.1",
|
"react-chartjs-2": "^5.3.1",
|
||||||
@@ -99,9 +98,9 @@
|
|||||||
"react-router": "7.15.0",
|
"react-router": "7.15.0",
|
||||||
"react-router-dom": "7.15.0",
|
"react-router-dom": "7.15.0",
|
||||||
"resend": "^6.12.3",
|
"resend": "^6.12.3",
|
||||||
"semver": "^7.7.4",
|
"semver": "^7.8.0",
|
||||||
"slack": "11.0.2",
|
"slack": "11.0.2",
|
||||||
"vite": "8.0.11",
|
"vite": "8.0.12",
|
||||||
"x-var": "^3.0.1",
|
"x-var": "^3.0.1",
|
||||||
"zustand": "^5.0.13"
|
"zustand": "^5.0.13"
|
||||||
},
|
},
|
||||||
@@ -119,9 +118,9 @@
|
|||||||
"history": "5.3.0",
|
"history": "5.3.0",
|
||||||
"husky": "9.1.7",
|
"husky": "9.1.7",
|
||||||
"less": "4.6.4",
|
"less": "4.6.4",
|
||||||
"lint-staged": "16.4.0",
|
"lint-staged": "17.0.4",
|
||||||
"nodemon": "^3.1.14",
|
"nodemon": "^3.1.14",
|
||||||
"prettier": "3.8.3",
|
"prettier": "3.8.3",
|
||||||
"vitest": "^4.1.5"
|
"vitest": "^4.1.6"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
18
test/globalSetup.js
Normal file
18
test/globalSetup.js
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2026 by Christian Kellner.
|
||||||
|
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { ensureValidBinary } from '../lib/services/ensureValidBinary.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Vitest global setup — runs once in the main process before any workers start.
|
||||||
|
* Downloads and validates the CloakBrowser stealth Chromium binary.
|
||||||
|
* ensureValidBinary() also removes and re-downloads partial/corrupt installations
|
||||||
|
* so tests never fail with "Invalid file descriptor to ICU data received".
|
||||||
|
* Skipped in offline mode because the browser is fully mocked there.
|
||||||
|
*/
|
||||||
|
export async function setup() {
|
||||||
|
if (process.env.TEST_MODE === 'offline') return;
|
||||||
|
await ensureValidBinary();
|
||||||
|
}
|
||||||
@@ -6,83 +6,89 @@
|
|||||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
||||||
import { get } from '../mocks/mockNotification.js';
|
import { get } from '../mocks/mockNotification.js';
|
||||||
import { providerConfig, mockFredy } from '../utils.js';
|
import { providerConfig, mockFredy } from '../utils.js';
|
||||||
import { expect, vi } from 'vitest';
|
import { expect } from 'vitest';
|
||||||
import * as provider from '../../lib/provider/immobilienDe.js';
|
import * as provider from '../../lib/provider/immobilienDe.js';
|
||||||
import * as mockStore from '../mocks/mockStore.js';
|
import { launchBrowser, closeBrowser } from '../../lib/services/extractor/puppeteerExtractor.js';
|
||||||
|
|
||||||
|
// One browser shared across the whole suite so both requests (search + detail)
|
||||||
|
// come from the same warm session, avoiding double cold-start bot detection.
|
||||||
|
const TEST_TIMEOUT = 120_000;
|
||||||
|
|
||||||
describe('#immobilien.de testsuite()', () => {
|
describe('#immobilien.de testsuite()', () => {
|
||||||
provider.init(providerConfig.immobilienDe, [], []);
|
provider.init(providerConfig.immobilienDe, [], []);
|
||||||
it('should test immobilien.de provider', async () => {
|
|
||||||
const mockedJob = {
|
|
||||||
id: 'test1',
|
|
||||||
notificationAdapter: null,
|
|
||||||
spatialFilter: null,
|
|
||||||
specFilter: null,
|
|
||||||
};
|
|
||||||
|
|
||||||
const Fredy = await mockFredy();
|
let browser;
|
||||||
const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
|
let liveListings;
|
||||||
const listing = await fredy.execute();
|
|
||||||
|
|
||||||
if (listing == null || listing.length === 0) {
|
beforeAll(async () => {
|
||||||
throw new Error('Listings is empty!');
|
browser = await launchBrowser(providerConfig.immobilienDe.url);
|
||||||
}
|
}, TEST_TIMEOUT);
|
||||||
|
|
||||||
expect(listing).toBeInstanceOf(Array);
|
afterAll(async () => {
|
||||||
const notificationObj = get();
|
await closeBrowser(browser);
|
||||||
expect(notificationObj).toBeTypeOf('object');
|
|
||||||
expect(notificationObj.serviceName).toBe('immobilienDe');
|
|
||||||
notificationObj.payload.forEach((notify) => {
|
|
||||||
/** check the actual structure **/
|
|
||||||
expect(notify.id).toBeTypeOf('string');
|
|
||||||
expect(notify.price).toBeTypeOf('string');
|
|
||||||
expect(notify.size).toBeTypeOf('string');
|
|
||||||
expect(notify.title).toBeTypeOf('string');
|
|
||||||
expect(notify.link).toBeTypeOf('string');
|
|
||||||
expect(notify.address).toBeTypeOf('string');
|
|
||||||
/** check the values if possible **/
|
|
||||||
expect(notify.price).toContain('€');
|
|
||||||
expect(notify.size).toContain('m²');
|
|
||||||
expect(notify.title).not.toBe('');
|
|
||||||
expect(notify.link).toContain('https://www.immobilien.de');
|
|
||||||
expect(notify.address).not.toBe('');
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it(
|
||||||
|
'should test immobilien.de provider',
|
||||||
|
async () => {
|
||||||
|
const mockedJob = {
|
||||||
|
id: 'test1',
|
||||||
|
notificationAdapter: null,
|
||||||
|
spatialFilter: null,
|
||||||
|
specFilter: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
const Fredy = await mockFredy();
|
||||||
|
const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, browser);
|
||||||
|
liveListings = await fredy.execute();
|
||||||
|
|
||||||
|
if (liveListings == null || liveListings.length === 0) {
|
||||||
|
throw new Error('Listings is empty!');
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(liveListings).toBeInstanceOf(Array);
|
||||||
|
const notificationObj = get();
|
||||||
|
expect(notificationObj).toBeTypeOf('object');
|
||||||
|
expect(notificationObj.serviceName).toBe('immobilienDe');
|
||||||
|
notificationObj.payload.forEach((notify) => {
|
||||||
|
/** check the actual structure **/
|
||||||
|
expect(notify.id).toBeTypeOf('string');
|
||||||
|
expect(notify.price).toBeTypeOf('string');
|
||||||
|
expect(notify.size).toBeTypeOf('string');
|
||||||
|
expect(notify.title).toBeTypeOf('string');
|
||||||
|
expect(notify.link).toBeTypeOf('string');
|
||||||
|
expect(notify.address).toBeTypeOf('string');
|
||||||
|
/** check the values if possible **/
|
||||||
|
expect(notify.price).toContain('€');
|
||||||
|
expect(notify.size).toContain('m²');
|
||||||
|
expect(notify.title).not.toBe('');
|
||||||
|
expect(notify.link).toContain('https://www.immobilien.de');
|
||||||
|
expect(notify.address).not.toBe('');
|
||||||
|
});
|
||||||
|
},
|
||||||
|
TEST_TIMEOUT,
|
||||||
|
);
|
||||||
|
|
||||||
describe('with provider_details enabled', () => {
|
describe('with provider_details enabled', () => {
|
||||||
beforeEach(() => {
|
it(
|
||||||
vi.spyOn(mockStore, 'getUserSettings').mockReturnValue({ provider_details: [provider.metaInformation.id] });
|
'should enrich listings with details',
|
||||||
vi.spyOn(mockStore, 'getKnownListingHashesForJobAndProvider').mockReturnValue([]);
|
async () => {
|
||||||
});
|
if (!liveListings?.length) throw new Error('No listings from first test to enrich');
|
||||||
|
|
||||||
afterEach(() => {
|
// Call fetchDetails directly on the first live listing — no need to
|
||||||
vi.restoreAllMocks();
|
// re-scrape the search page. The shared browser keeps the session warm.
|
||||||
});
|
const enriched = await provider.config.fetchDetails(liveListings[0], browser);
|
||||||
|
|
||||||
it('should enrich listings with details', async () => {
|
if (enriched == null) return;
|
||||||
const Fredy = await mockFredy();
|
expect(enriched.link).toContain('https://www.immobilien.de');
|
||||||
provider.init(providerConfig.immobilienDe, [], []);
|
expect(enriched.address).toBeTypeOf('string');
|
||||||
const mockedJob = { id: 'test1', notificationAdapter: null, specFilter: null, spatialFilter: null };
|
expect(enriched.address).not.toBe('');
|
||||||
|
// description may be null if selectors don't match yet — falls back gracefully
|
||||||
const fredy = new Fredy(
|
if (enriched.description != null) {
|
||||||
provider.config,
|
expect(enriched.description).toBeTypeOf('string');
|
||||||
mockedJob,
|
|
||||||
provider.metaInformation.id,
|
|
||||||
{ checkAndAddEntry: () => false },
|
|
||||||
undefined,
|
|
||||||
);
|
|
||||||
const listings = await fredy.execute();
|
|
||||||
if (listings == null) return;
|
|
||||||
expect(listings).toBeInstanceOf(Array);
|
|
||||||
listings.forEach((listing) => {
|
|
||||||
expect(listing.link).toContain('https://www.immobilien.de');
|
|
||||||
expect(listing.address).toBeTypeOf('string');
|
|
||||||
expect(listing.address).not.toBe('');
|
|
||||||
// description may be null if selectors don't match yet - falls back gracefully
|
|
||||||
if (listing.description != null) {
|
|
||||||
expect(listing.description).toBeTypeOf('string');
|
|
||||||
}
|
}
|
||||||
});
|
},
|
||||||
});
|
TEST_TIMEOUT,
|
||||||
|
);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -3,85 +3,85 @@
|
|||||||
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
|
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { expect, vi } from 'vitest';
|
import { expect } from 'vitest';
|
||||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
||||||
import { mockFredy, providerConfig } from '../utils.js';
|
import { mockFredy, providerConfig } from '../utils.js';
|
||||||
import { get } from '../mocks/mockNotification.js';
|
import { get } from '../mocks/mockNotification.js';
|
||||||
import * as provider from '../../lib/provider/immoscout.js';
|
import * as provider from '../../lib/provider/immoscout.js';
|
||||||
import * as mockStore from '../mocks/mockStore.js';
|
|
||||||
|
// immoscout uses the mobile REST API (fetch-based, no browser). Both tests share
|
||||||
|
// the same module-level listings so the API is only queried once, avoiding
|
||||||
|
// duplicate requests that could trigger rate-limiting.
|
||||||
|
const TEST_TIMEOUT = 120_000;
|
||||||
|
|
||||||
describe('#immoscout provider testsuite()', () => {
|
describe('#immoscout provider testsuite()', () => {
|
||||||
provider.init(providerConfig.immoscout, [], []);
|
provider.init(providerConfig.immoscout, [], []);
|
||||||
it('should test immoscout provider', async () => {
|
|
||||||
const Fredy = await mockFredy();
|
|
||||||
const mockedJob = {
|
|
||||||
id: '',
|
|
||||||
notificationAdapter: null,
|
|
||||||
spatialFilter: null,
|
|
||||||
specFilter: null,
|
|
||||||
};
|
|
||||||
|
|
||||||
return await new Promise((resolve, reject) => {
|
let liveListings;
|
||||||
const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
|
|
||||||
fredy.execute().then((listings) => {
|
|
||||||
if (listings == null || listings.length === 0) {
|
|
||||||
reject('Listings is empty!');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
expect(listings).toBeInstanceOf(Array);
|
it(
|
||||||
const notificationObj = get();
|
'should test immoscout provider',
|
||||||
expect(notificationObj).toBeTypeOf('object');
|
async () => {
|
||||||
|
const Fredy = await mockFredy();
|
||||||
|
const mockedJob = {
|
||||||
|
id: '',
|
||||||
|
notificationAdapter: null,
|
||||||
|
spatialFilter: null,
|
||||||
|
specFilter: null,
|
||||||
|
};
|
||||||
|
|
||||||
// check if there is at least one valid notification
|
return await new Promise((resolve, reject) => {
|
||||||
const hasValidNotification = notificationObj.payload.some((notify) => {
|
const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
|
||||||
return (
|
fredy.execute().then((listings) => {
|
||||||
typeof notify.id === 'string' &&
|
if (listings == null || listings.length === 0) {
|
||||||
typeof notify.price === 'string' &&
|
reject('Listings is empty!');
|
||||||
notify.price.includes('€') &&
|
return;
|
||||||
typeof notify.size === 'string' &&
|
}
|
||||||
notify.size.includes('m²') &&
|
|
||||||
typeof notify.title === 'string' &&
|
liveListings = listings;
|
||||||
notify.title !== '' &&
|
expect(listings).toBeInstanceOf(Array);
|
||||||
typeof notify.link === 'string' &&
|
const notificationObj = get();
|
||||||
notify.link.includes('https://www.immobilienscout24.de/') &&
|
expect(notificationObj).toBeTypeOf('object');
|
||||||
typeof notify.address === 'string'
|
|
||||||
);
|
// check if there is at least one valid notification
|
||||||
|
const hasValidNotification = notificationObj.payload.some((notify) => {
|
||||||
|
return (
|
||||||
|
typeof notify.id === 'string' &&
|
||||||
|
typeof notify.price === 'string' &&
|
||||||
|
notify.price.includes('€') &&
|
||||||
|
typeof notify.size === 'string' &&
|
||||||
|
notify.size.includes('m²') &&
|
||||||
|
typeof notify.title === 'string' &&
|
||||||
|
notify.title !== '' &&
|
||||||
|
typeof notify.link === 'string' &&
|
||||||
|
notify.link.includes('https://www.immobilienscout24.de/') &&
|
||||||
|
typeof notify.address === 'string'
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(hasValidNotification).toBe(true);
|
||||||
|
resolve();
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(hasValidNotification).toBe(true);
|
|
||||||
resolve();
|
|
||||||
});
|
});
|
||||||
});
|
},
|
||||||
});
|
TEST_TIMEOUT,
|
||||||
|
);
|
||||||
|
|
||||||
describe('with provider_details enabled', () => {
|
describe('with provider_details enabled', () => {
|
||||||
beforeEach(() => {
|
it(
|
||||||
vi.spyOn(mockStore, 'getUserSettings').mockReturnValue({ provider_details: [provider.metaInformation.id] });
|
'should enrich listings with details',
|
||||||
vi.spyOn(mockStore, 'getKnownListingHashesForJobAndProvider').mockReturnValue([]);
|
async () => {
|
||||||
});
|
if (!liveListings?.length) throw new Error('No listings from first test to enrich');
|
||||||
|
|
||||||
afterEach(() => {
|
// Call fetchDetails directly on the first live listing — no need to
|
||||||
vi.restoreAllMocks();
|
// re-query the search API. immoscout uses fetch (no browser).
|
||||||
});
|
const enriched = await provider.config.fetchDetails(liveListings[0]);
|
||||||
|
|
||||||
it('should enrich listings with details', async () => {
|
expect(enriched).toBeTruthy();
|
||||||
const Fredy = await mockFredy();
|
expect(enriched.description).toBeTypeOf('string');
|
||||||
provider.init(providerConfig.immoscout, [], []);
|
expect(enriched.description).not.toBe('');
|
||||||
const mockedJob = { id: '', notificationAdapter: null, specFilter: null, spatialFilter: null };
|
},
|
||||||
const fredy = new Fredy(
|
TEST_TIMEOUT,
|
||||||
provider.config,
|
);
|
||||||
mockedJob,
|
|
||||||
provider.metaInformation.id,
|
|
||||||
{ checkAndAddEntry: () => false },
|
|
||||||
undefined,
|
|
||||||
);
|
|
||||||
const listings = await fredy.execute();
|
|
||||||
expect(listings).toBeInstanceOf(Array);
|
|
||||||
listings.forEach((listing) => {
|
|
||||||
expect(listing.description).toBeTypeOf('string');
|
|
||||||
expect(listing.description).not.toBe('');
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -6,87 +6,95 @@
|
|||||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
||||||
import { get } from '../mocks/mockNotification.js';
|
import { get } from '../mocks/mockNotification.js';
|
||||||
import { mockFredy, providerConfig } from '../utils.js';
|
import { mockFredy, providerConfig } from '../utils.js';
|
||||||
import { expect, vi } from 'vitest';
|
import { expect } from 'vitest';
|
||||||
import * as provider from '../../lib/provider/immowelt.js';
|
import * as provider from '../../lib/provider/immowelt.js';
|
||||||
import * as mockStore from '../mocks/mockStore.js';
|
import { launchBrowser, closeBrowser } from '../../lib/services/extractor/puppeteerExtractor.js';
|
||||||
|
|
||||||
|
// One browser shared across the whole suite so both requests (search + detail)
|
||||||
|
// come from the same warm session. Immowelt's CDN challenges cold sessions
|
||||||
|
// aggressively; a shared warm browser prevents the second request from being
|
||||||
|
// blocked as a bot hit.
|
||||||
|
const TEST_TIMEOUT = 180_000;
|
||||||
|
|
||||||
describe('#immowelt testsuite()', () => {
|
describe('#immowelt testsuite()', () => {
|
||||||
it('should test immowelt provider', async () => {
|
let browser;
|
||||||
const Fredy = await mockFredy();
|
let liveListings;
|
||||||
const mockedJob = {
|
|
||||||
id: 'immowelt',
|
|
||||||
notificationAdapter: null,
|
|
||||||
spatialFilter: null,
|
|
||||||
specFilter: null,
|
|
||||||
};
|
|
||||||
provider.init(providerConfig.immowelt, [], []);
|
|
||||||
|
|
||||||
const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
|
beforeAll(async () => {
|
||||||
|
browser = await launchBrowser(providerConfig.immowelt.url);
|
||||||
|
}, TEST_TIMEOUT);
|
||||||
|
|
||||||
const listing = await fredy.execute();
|
afterAll(async () => {
|
||||||
|
await closeBrowser(browser);
|
||||||
if (listing == null || listing.length === 0) {
|
|
||||||
throw new Error('Listings is empty!');
|
|
||||||
}
|
|
||||||
|
|
||||||
expect(listing).toBeInstanceOf(Array);
|
|
||||||
const notificationObj = get();
|
|
||||||
expect(notificationObj).toBeTypeOf('object');
|
|
||||||
expect(notificationObj.serviceName).toBe('immowelt');
|
|
||||||
notificationObj.payload.forEach((notify) => {
|
|
||||||
/** check the actual structure **/
|
|
||||||
expect(notify.id).toBeTypeOf('string');
|
|
||||||
if (notify.price != null) {
|
|
||||||
expect(notify.price).toBeTypeOf('string');
|
|
||||||
expect(notify.price).toContain('€');
|
|
||||||
}
|
|
||||||
expect(notify.title).toBeTypeOf('string');
|
|
||||||
expect(notify.link).toBeTypeOf('string');
|
|
||||||
expect(notify.address).toBeTypeOf('string');
|
|
||||||
/** check the values if possible **/
|
|
||||||
if (notify.size != null && notify.size.trim().toLowerCase() !== 'k.a.') {
|
|
||||||
expect(notify.size).toBeTypeOf('string');
|
|
||||||
expect(notify.size).toContain('m²');
|
|
||||||
}
|
|
||||||
expect(notify.title).not.toBe('');
|
|
||||||
expect(notify.link).toContain('https://www.immowelt.de');
|
|
||||||
expect(notify.address).not.toBe('');
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it(
|
||||||
|
'should test immowelt provider',
|
||||||
|
async () => {
|
||||||
|
const Fredy = await mockFredy();
|
||||||
|
const mockedJob = {
|
||||||
|
id: 'immowelt',
|
||||||
|
notificationAdapter: null,
|
||||||
|
spatialFilter: null,
|
||||||
|
specFilter: null,
|
||||||
|
};
|
||||||
|
provider.init(providerConfig.immowelt, [], []);
|
||||||
|
|
||||||
|
const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, browser);
|
||||||
|
|
||||||
|
liveListings = await fredy.execute();
|
||||||
|
|
||||||
|
if (liveListings == null || liveListings.length === 0) {
|
||||||
|
throw new Error('Listings is empty!');
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(liveListings).toBeInstanceOf(Array);
|
||||||
|
const notificationObj = get();
|
||||||
|
expect(notificationObj).toBeTypeOf('object');
|
||||||
|
expect(notificationObj.serviceName).toBe('immowelt');
|
||||||
|
notificationObj.payload.forEach((notify) => {
|
||||||
|
/** check the actual structure **/
|
||||||
|
expect(notify.id).toBeTypeOf('string');
|
||||||
|
if (notify.price != null) {
|
||||||
|
expect(notify.price).toBeTypeOf('string');
|
||||||
|
expect(notify.price).toContain('€');
|
||||||
|
}
|
||||||
|
expect(notify.title).toBeTypeOf('string');
|
||||||
|
expect(notify.link).toBeTypeOf('string');
|
||||||
|
expect(notify.address).toBeTypeOf('string');
|
||||||
|
/** check the values if possible **/
|
||||||
|
if (notify.size != null && notify.size.trim().toLowerCase() !== 'k.a.') {
|
||||||
|
expect(notify.size).toBeTypeOf('string');
|
||||||
|
expect(notify.size).toContain('m²');
|
||||||
|
}
|
||||||
|
expect(notify.title).not.toBe('');
|
||||||
|
expect(notify.link).toContain('https://www.immowelt.de');
|
||||||
|
expect(notify.address).not.toBe('');
|
||||||
|
});
|
||||||
|
},
|
||||||
|
TEST_TIMEOUT,
|
||||||
|
);
|
||||||
|
|
||||||
describe('with provider_details enabled', () => {
|
describe('with provider_details enabled', () => {
|
||||||
beforeEach(() => {
|
it(
|
||||||
vi.spyOn(mockStore, 'getUserSettings').mockReturnValue({ provider_details: [provider.metaInformation.id] });
|
'should enrich listings with details',
|
||||||
vi.spyOn(mockStore, 'getKnownListingHashesForJobAndProvider').mockReturnValue([]);
|
async () => {
|
||||||
});
|
if (!liveListings?.length) throw new Error('No listings from first test to enrich');
|
||||||
|
|
||||||
afterEach(() => {
|
// Call fetchDetails directly on the first live listing — no need to
|
||||||
vi.restoreAllMocks();
|
// re-scrape the search page. The shared browser keeps the session warm.
|
||||||
});
|
const enriched = await provider.config.fetchDetails(liveListings[0], browser);
|
||||||
|
|
||||||
it('should enrich listings with details', async () => {
|
expect(enriched).toBeTruthy();
|
||||||
const Fredy = await mockFredy();
|
expect(enriched.link).toContain('https://www.immowelt.de');
|
||||||
provider.init(providerConfig.immowelt, [], []);
|
expect(enriched.address).toBeTypeOf('string');
|
||||||
const mockedJob = { id: 'immowelt', notificationAdapter: null, specFilter: null, spatialFilter: null };
|
expect(enriched.address).not.toBe('');
|
||||||
|
|
||||||
const fredy = new Fredy(
|
|
||||||
provider.config,
|
|
||||||
mockedJob,
|
|
||||||
provider.metaInformation.id,
|
|
||||||
{ checkAndAddEntry: () => false },
|
|
||||||
undefined,
|
|
||||||
);
|
|
||||||
const listings = await fredy.execute();
|
|
||||||
expect(listings).toBeInstanceOf(Array);
|
|
||||||
listings.forEach((listing) => {
|
|
||||||
expect(listing.link).toContain('https://www.immowelt.de');
|
|
||||||
expect(listing.address).toBeTypeOf('string');
|
|
||||||
expect(listing.address).not.toBe('');
|
|
||||||
// description is enriched from the detail page; falls back gracefully if blocked
|
// description is enriched from the detail page; falls back gracefully if blocked
|
||||||
if (listing.description != null) {
|
if (enriched.description != null) {
|
||||||
expect(listing.description).toBeTypeOf('string');
|
expect(enriched.description).toBeTypeOf('string');
|
||||||
}
|
}
|
||||||
});
|
},
|
||||||
});
|
TEST_TIMEOUT,
|
||||||
|
);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -6,80 +6,88 @@
|
|||||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
||||||
import { get } from '../mocks/mockNotification.js';
|
import { get } from '../mocks/mockNotification.js';
|
||||||
import { mockFredy, providerConfig } from '../utils.js';
|
import { mockFredy, providerConfig } from '../utils.js';
|
||||||
import { expect, vi } from 'vitest';
|
import { expect } from 'vitest';
|
||||||
import * as provider from '../../lib/provider/kleinanzeigen.js';
|
import * as provider from '../../lib/provider/kleinanzeigen.js';
|
||||||
import * as mockStore from '../mocks/mockStore.js';
|
import { launchBrowser, closeBrowser } from '../../lib/services/extractor/puppeteerExtractor.js';
|
||||||
|
|
||||||
|
// One browser shared across the whole suite so both requests (search + detail)
|
||||||
|
// come from the same warm session. Kleinanzeigen rate-limits cold browser
|
||||||
|
// sessions; a shared warm browser prevents the second request from being blocked.
|
||||||
|
const TEST_TIMEOUT = 180_000;
|
||||||
|
|
||||||
describe('#kleinanzeigen testsuite()', () => {
|
describe('#kleinanzeigen testsuite()', () => {
|
||||||
it('should test kleinanzeigen provider', async () => {
|
let browser;
|
||||||
const Fredy = await mockFredy();
|
let liveListings;
|
||||||
const mockedJob = {
|
|
||||||
id: 'kleinanzeigen',
|
|
||||||
notificationAdapter: null,
|
|
||||||
spatialFilter: null,
|
|
||||||
specFilter: null,
|
|
||||||
};
|
|
||||||
provider.init(providerConfig.kleinanzeigen, [], []);
|
|
||||||
return await new Promise((resolve, reject) => {
|
|
||||||
const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
|
|
||||||
|
|
||||||
fredy.execute().then((listing) => {
|
beforeAll(async () => {
|
||||||
if (listing == null || listing.length === 0) {
|
browser = await launchBrowser(providerConfig.kleinanzeigen.url);
|
||||||
reject('Listings is empty!');
|
}, TEST_TIMEOUT);
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
expect(listing).toBeInstanceOf(Array);
|
afterAll(async () => {
|
||||||
const notificationObj = get();
|
await closeBrowser(browser);
|
||||||
expect(notificationObj).toBeTypeOf('object');
|
|
||||||
expect(notificationObj.serviceName).toBe('kleinanzeigen');
|
|
||||||
notificationObj.payload.forEach((notify) => {
|
|
||||||
/** check the actual structure **/
|
|
||||||
expect(notify.id).toBeTypeOf('string');
|
|
||||||
expect(notify.title).toBeTypeOf('string');
|
|
||||||
expect(notify.link).toBeTypeOf('string');
|
|
||||||
expect(notify.address).toBeTypeOf('string');
|
|
||||||
/** check the values if possible **/
|
|
||||||
expect(notify.title).not.toBe('');
|
|
||||||
expect(notify.link).toContain('https://www.kleinanzeigen.de');
|
|
||||||
expect(notify.address).not.toBe('');
|
|
||||||
});
|
|
||||||
resolve();
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it(
|
||||||
|
'should test kleinanzeigen provider',
|
||||||
|
async () => {
|
||||||
|
const Fredy = await mockFredy();
|
||||||
|
const mockedJob = {
|
||||||
|
id: 'kleinanzeigen',
|
||||||
|
notificationAdapter: null,
|
||||||
|
spatialFilter: null,
|
||||||
|
specFilter: null,
|
||||||
|
};
|
||||||
|
provider.init(providerConfig.kleinanzeigen, [], []);
|
||||||
|
return await new Promise((resolve, reject) => {
|
||||||
|
const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, browser);
|
||||||
|
|
||||||
|
fredy.execute().then((listing) => {
|
||||||
|
if (listing == null || listing.length === 0) {
|
||||||
|
reject('Listings is empty!');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
liveListings = listing;
|
||||||
|
expect(listing).toBeInstanceOf(Array);
|
||||||
|
const notificationObj = get();
|
||||||
|
expect(notificationObj).toBeTypeOf('object');
|
||||||
|
expect(notificationObj.serviceName).toBe('kleinanzeigen');
|
||||||
|
notificationObj.payload.forEach((notify) => {
|
||||||
|
/** check the actual structure **/
|
||||||
|
expect(notify.id).toBeTypeOf('string');
|
||||||
|
expect(notify.title).toBeTypeOf('string');
|
||||||
|
expect(notify.link).toBeTypeOf('string');
|
||||||
|
expect(notify.address).toBeTypeOf('string');
|
||||||
|
/** check the values if possible **/
|
||||||
|
expect(notify.title).not.toBe('');
|
||||||
|
expect(notify.link).toContain('https://www.kleinanzeigen.de');
|
||||||
|
expect(notify.address).not.toBe('');
|
||||||
|
});
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
},
|
||||||
|
TEST_TIMEOUT,
|
||||||
|
);
|
||||||
|
|
||||||
describe('with provider_details enabled', () => {
|
describe('with provider_details enabled', () => {
|
||||||
beforeEach(() => {
|
it(
|
||||||
vi.spyOn(mockStore, 'getUserSettings').mockReturnValue({ provider_details: [provider.metaInformation.id] });
|
'should enrich listings with details',
|
||||||
vi.spyOn(mockStore, 'getKnownListingHashesForJobAndProvider').mockReturnValue([]);
|
async () => {
|
||||||
});
|
if (!liveListings?.length) throw new Error('No listings from first test to enrich');
|
||||||
|
|
||||||
afterEach(() => {
|
// Call fetchDetails directly on the first live listing — no need to
|
||||||
vi.restoreAllMocks();
|
// re-scrape the search page. The shared browser keeps the session warm.
|
||||||
});
|
const enriched = await provider.config.fetchDetails(liveListings[0], browser);
|
||||||
|
|
||||||
it('should enrich listings with details', async () => {
|
expect(enriched).toBeTruthy();
|
||||||
const Fredy = await mockFredy();
|
expect(enriched.link).toContain('https://www.kleinanzeigen.de');
|
||||||
provider.init(providerConfig.kleinanzeigen, [], []);
|
expect(enriched.address).toBeTypeOf('string');
|
||||||
const mockedJob = { id: 'kleinanzeigen', notificationAdapter: null, specFilter: null, spatialFilter: null };
|
expect(enriched.address).not.toBe('');
|
||||||
|
expect(enriched.description).toBeTypeOf('string');
|
||||||
const fredy = new Fredy(
|
expect(enriched.description).not.toBe('');
|
||||||
provider.config,
|
},
|
||||||
mockedJob,
|
TEST_TIMEOUT,
|
||||||
provider.metaInformation.id,
|
);
|
||||||
{ checkAndAddEntry: () => false },
|
|
||||||
undefined,
|
|
||||||
);
|
|
||||||
const listings = await fredy.execute();
|
|
||||||
expect(listings).toBeInstanceOf(Array);
|
|
||||||
listings.forEach((listing) => {
|
|
||||||
expect(listing.link).toContain('https://www.kleinanzeigen.de');
|
|
||||||
expect(listing.address).toBeTypeOf('string');
|
|
||||||
expect(listing.address).not.toBe('');
|
|
||||||
expect(listing.description).toBeTypeOf('string');
|
|
||||||
expect(listing.description).not.toBe('');
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -9,81 +9,97 @@ import { mockFredy, providerConfig } from '../utils.js';
|
|||||||
import { expect, vi } from 'vitest';
|
import { expect, vi } from 'vitest';
|
||||||
import * as provider from '../../lib/provider/sparkasse.js';
|
import * as provider from '../../lib/provider/sparkasse.js';
|
||||||
import * as mockStore from '../mocks/mockStore.js';
|
import * as mockStore from '../mocks/mockStore.js';
|
||||||
|
import { launchBrowser, closeBrowser } from '../../lib/services/extractor/puppeteerExtractor.js';
|
||||||
|
|
||||||
|
// One browser shared across the whole suite so both requests (search + detail)
|
||||||
|
// come from the same warm session. This prevents the second request from being
|
||||||
|
// flagged as a cold-start bot hit.
|
||||||
|
const TEST_TIMEOUT = 120_000;
|
||||||
|
|
||||||
describe('#sparkasse testsuite()', () => {
|
describe('#sparkasse testsuite()', () => {
|
||||||
it('should test sparkasse provider', async () => {
|
let browser;
|
||||||
const Fredy = await mockFredy();
|
let liveListings;
|
||||||
const mockedJob = {
|
|
||||||
id: 'sparkasse',
|
|
||||||
notificationAdapter: null,
|
|
||||||
spatialFilter: null,
|
|
||||||
specFilter: null,
|
|
||||||
};
|
|
||||||
provider.init(providerConfig.sparkasse, []);
|
|
||||||
|
|
||||||
const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
|
beforeAll(async () => {
|
||||||
|
browser = await launchBrowser(providerConfig.sparkasse.url);
|
||||||
|
}, TEST_TIMEOUT);
|
||||||
|
|
||||||
const listing = await fredy.execute();
|
afterAll(async () => {
|
||||||
|
await closeBrowser(browser);
|
||||||
if (listing == null || listing.length === 0) {
|
|
||||||
throw new Error('Listings is empty!');
|
|
||||||
}
|
|
||||||
|
|
||||||
expect(listing).toBeInstanceOf(Array);
|
|
||||||
const notificationObj = get();
|
|
||||||
expect(notificationObj).toBeTypeOf('object');
|
|
||||||
expect(notificationObj.serviceName).toBe('sparkasse');
|
|
||||||
notificationObj.payload.forEach((notify) => {
|
|
||||||
/** check the actual structure **/
|
|
||||||
expect(notify.id).toBeTypeOf('string');
|
|
||||||
expect(notify.price).toBeTypeOf('string');
|
|
||||||
expect(notify.price).toContain('€');
|
|
||||||
expect(notify.size).toBeTypeOf('string');
|
|
||||||
expect(notify.size).toContain('m²');
|
|
||||||
expect(notify.title).toBeTypeOf('string');
|
|
||||||
expect(notify.link).toBeTypeOf('string');
|
|
||||||
expect(notify.address).toBeTypeOf('string');
|
|
||||||
/** check the values if possible **/
|
|
||||||
expect(notify.size).toBeTypeOf('string');
|
|
||||||
expect(notify.title).not.toBe('');
|
|
||||||
expect(notify.address).not.toBe('');
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it(
|
||||||
|
'should test sparkasse provider',
|
||||||
|
async () => {
|
||||||
|
const Fredy = await mockFredy();
|
||||||
|
const mockedJob = {
|
||||||
|
id: 'sparkasse',
|
||||||
|
notificationAdapter: null,
|
||||||
|
spatialFilter: null,
|
||||||
|
specFilter: null,
|
||||||
|
};
|
||||||
|
provider.init(providerConfig.sparkasse, []);
|
||||||
|
|
||||||
|
const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, browser);
|
||||||
|
|
||||||
|
liveListings = await fredy.execute();
|
||||||
|
|
||||||
|
if (liveListings == null || liveListings.length === 0) {
|
||||||
|
throw new Error('Listings is empty!');
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(liveListings).toBeInstanceOf(Array);
|
||||||
|
const notificationObj = get();
|
||||||
|
expect(notificationObj).toBeTypeOf('object');
|
||||||
|
expect(notificationObj.serviceName).toBe('sparkasse');
|
||||||
|
notificationObj.payload.forEach((notify) => {
|
||||||
|
/** check the actual structure **/
|
||||||
|
expect(notify.id).toBeTypeOf('string');
|
||||||
|
expect(notify.price).toBeTypeOf('string');
|
||||||
|
expect(notify.price).toContain('€');
|
||||||
|
expect(notify.size).toBeTypeOf('string');
|
||||||
|
expect(notify.size).toContain('m²');
|
||||||
|
expect(notify.title).toBeTypeOf('string');
|
||||||
|
expect(notify.link).toBeTypeOf('string');
|
||||||
|
expect(notify.address).toBeTypeOf('string');
|
||||||
|
/** check the values if possible **/
|
||||||
|
expect(notify.size).toBeTypeOf('string');
|
||||||
|
expect(notify.title).not.toBe('');
|
||||||
|
expect(notify.address).not.toBe('');
|
||||||
|
});
|
||||||
|
},
|
||||||
|
TEST_TIMEOUT,
|
||||||
|
);
|
||||||
|
|
||||||
describe('with provider_details enabled', () => {
|
describe('with provider_details enabled', () => {
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
vi.spyOn(mockStore, 'getUserSettings').mockReturnValue({ provider_details: [provider.metaInformation.id] });
|
vi.spyOn(mockStore, 'getUserSettings').mockReturnValue({ provider_details: [provider.metaInformation.id] });
|
||||||
vi.spyOn(mockStore, 'getKnownListingHashesForJobAndProvider').mockReturnValue([]);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
afterEach(() => {
|
afterEach(() => {
|
||||||
vi.restoreAllMocks();
|
vi.restoreAllMocks();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should enrich listings with details', async () => {
|
it(
|
||||||
const Fredy = await mockFredy();
|
'should enrich listings with details',
|
||||||
provider.init(providerConfig.sparkasse, []);
|
async () => {
|
||||||
const mockedJob = { id: 'sparkasse', notificationAdapter: null, specFilter: null, spatialFilter: null };
|
if (!liveListings?.length) throw new Error('No listings from first test to enrich');
|
||||||
|
|
||||||
const fredy = new Fredy(
|
// Call fetchDetails directly on the first live listing — no need to
|
||||||
provider.config,
|
// re-scrape the search page. The shared browser keeps the session warm.
|
||||||
mockedJob,
|
const enriched = await provider.config.fetchDetails(liveListings[0], browser);
|
||||||
provider.metaInformation.id,
|
|
||||||
{ checkAndAddEntry: () => false },
|
expect(enriched).toBeTruthy();
|
||||||
undefined,
|
expect(enriched.link).toContain('https://immobilien.sparkasse.de');
|
||||||
);
|
expect(enriched.address).toBeTypeOf('string');
|
||||||
const listings = await fredy.execute();
|
expect(enriched.address).not.toBe('');
|
||||||
expect(listings).toBeInstanceOf(Array);
|
// description is enriched from the detail page; falls back gracefully if blocked
|
||||||
listings.forEach((listing) => {
|
if (enriched.description != null) {
|
||||||
expect(listing.link).toContain('https://immobilien.sparkasse.de');
|
expect(enriched.description).toBeTypeOf('string');
|
||||||
expect(listing.address).toBeTypeOf('string');
|
expect(enriched.description).not.toBe('');
|
||||||
expect(listing.address).not.toBe('');
|
|
||||||
// description is enriched from the detail page; falls back gracefully if bot-detected
|
|
||||||
if (listing.description != null) {
|
|
||||||
expect(listing.description).toBeTypeOf('string');
|
|
||||||
expect(listing.description).not.toBe('');
|
|
||||||
}
|
}
|
||||||
});
|
},
|
||||||
});
|
TEST_TIMEOUT,
|
||||||
|
);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -6,77 +6,85 @@
|
|||||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
||||||
import { get } from '../mocks/mockNotification.js';
|
import { get } from '../mocks/mockNotification.js';
|
||||||
import { mockFredy, providerConfig } from '../utils.js';
|
import { mockFredy, providerConfig } from '../utils.js';
|
||||||
import { expect, vi } from 'vitest';
|
import { expect } from 'vitest';
|
||||||
import * as provider from '../../lib/provider/wgGesucht.js';
|
import * as provider from '../../lib/provider/wgGesucht.js';
|
||||||
import * as mockStore from '../mocks/mockStore.js';
|
import { launchBrowser, closeBrowser } from '../../lib/services/extractor/puppeteerExtractor.js';
|
||||||
|
|
||||||
|
// One browser shared across the whole suite so both requests (search + detail)
|
||||||
|
// come from the same warm session, avoiding double cold-start bot detection.
|
||||||
|
const TEST_TIMEOUT = 120_000;
|
||||||
|
|
||||||
describe('#wgGesucht testsuite()', () => {
|
describe('#wgGesucht testsuite()', () => {
|
||||||
provider.init(providerConfig.wgGesucht, [], []);
|
provider.init(providerConfig.wgGesucht, [], []);
|
||||||
it('should test wgGesucht provider', { timeout: 120000 }, async () => {
|
|
||||||
const Fredy = await mockFredy();
|
|
||||||
const mockedJob = {
|
|
||||||
id: 'wgGesucht',
|
|
||||||
notificationAdapter: null,
|
|
||||||
spatialFilter: null,
|
|
||||||
specFilter: null,
|
|
||||||
};
|
|
||||||
|
|
||||||
return await new Promise((resolve, reject) => {
|
let browser;
|
||||||
const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, undefined);
|
let liveListings;
|
||||||
|
|
||||||
fredy.execute().then((listing) => {
|
beforeAll(async () => {
|
||||||
if (listing == null || listing.length === 0) {
|
browser = await launchBrowser(providerConfig.wgGesucht.url);
|
||||||
reject('Listings is empty!');
|
}, TEST_TIMEOUT);
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
expect(listing).toBeInstanceOf(Array);
|
afterAll(async () => {
|
||||||
const notificationObj = get();
|
await closeBrowser(browser);
|
||||||
expect(notificationObj.serviceName).toBe('wgGesucht');
|
|
||||||
notificationObj.payload.forEach((notify) => {
|
|
||||||
expect(notify).toBeTypeOf('object');
|
|
||||||
/** check the actual structure **/
|
|
||||||
expect(notify.id).toBeTypeOf('string');
|
|
||||||
expect(notify.title).toBeTypeOf('string');
|
|
||||||
// expect(notify.details).toBeTypeOf('string');
|
|
||||||
expect(notify.price).toBeTypeOf('string');
|
|
||||||
expect(notify.price).toContain('€');
|
|
||||||
expect(notify.link).toBeTypeOf('string');
|
|
||||||
});
|
|
||||||
resolve();
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it(
|
||||||
|
'should test wgGesucht provider',
|
||||||
|
async () => {
|
||||||
|
const Fredy = await mockFredy();
|
||||||
|
const mockedJob = {
|
||||||
|
id: 'wgGesucht',
|
||||||
|
notificationAdapter: null,
|
||||||
|
spatialFilter: null,
|
||||||
|
specFilter: null,
|
||||||
|
};
|
||||||
|
|
||||||
|
return await new Promise((resolve, reject) => {
|
||||||
|
const fredy = new Fredy(provider.config, mockedJob, provider.metaInformation.id, similarityCache, browser);
|
||||||
|
|
||||||
|
fredy.execute().then((listing) => {
|
||||||
|
if (listing == null || listing.length === 0) {
|
||||||
|
reject('Listings is empty!');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
liveListings = listing;
|
||||||
|
expect(listing).toBeInstanceOf(Array);
|
||||||
|
const notificationObj = get();
|
||||||
|
expect(notificationObj.serviceName).toBe('wgGesucht');
|
||||||
|
notificationObj.payload.forEach((notify) => {
|
||||||
|
expect(notify).toBeTypeOf('object');
|
||||||
|
/** check the actual structure **/
|
||||||
|
expect(notify.id).toBeTypeOf('string');
|
||||||
|
expect(notify.title).toBeTypeOf('string');
|
||||||
|
// expect(notify.details).toBeTypeOf('string');
|
||||||
|
expect(notify.price).toBeTypeOf('string');
|
||||||
|
expect(notify.price).toContain('€');
|
||||||
|
expect(notify.link).toBeTypeOf('string');
|
||||||
|
});
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
},
|
||||||
|
TEST_TIMEOUT,
|
||||||
|
);
|
||||||
|
|
||||||
describe('with provider_details enabled', () => {
|
describe('with provider_details enabled', () => {
|
||||||
beforeEach(() => {
|
it(
|
||||||
vi.spyOn(mockStore, 'getUserSettings').mockReturnValue({ provider_details: [provider.metaInformation.id] });
|
'should enrich listings with details',
|
||||||
vi.spyOn(mockStore, 'getKnownListingHashesForJobAndProvider').mockReturnValue([]);
|
async () => {
|
||||||
});
|
if (!liveListings?.length) throw new Error('No listings from first test to enrich');
|
||||||
|
|
||||||
afterEach(() => {
|
// Call fetchDetails directly on the first live listing — no need to
|
||||||
vi.restoreAllMocks();
|
// re-scrape the search page. The shared browser keeps the session warm.
|
||||||
});
|
const enriched = await provider.config.fetchDetails(liveListings[0], browser);
|
||||||
|
|
||||||
it('should enrich listings with details', async () => {
|
expect(enriched).toBeTruthy();
|
||||||
const Fredy = await mockFredy();
|
expect(enriched.link).toContain('https://www.wg-gesucht.de');
|
||||||
provider.init(providerConfig.wgGesucht, [], []);
|
expect(enriched.description).toBeTypeOf('string');
|
||||||
const mockedJob = { id: 'wgGesucht', notificationAdapter: null, specFilter: null, spatialFilter: null };
|
expect(enriched.description).not.toBe('');
|
||||||
|
},
|
||||||
const fredy = new Fredy(
|
TEST_TIMEOUT,
|
||||||
provider.config,
|
);
|
||||||
mockedJob,
|
|
||||||
provider.metaInformation.id,
|
|
||||||
{ checkAndAddEntry: () => false },
|
|
||||||
undefined,
|
|
||||||
);
|
|
||||||
const listings = await fredy.execute();
|
|
||||||
expect(listings).toBeInstanceOf(Array);
|
|
||||||
listings.forEach((listing) => {
|
|
||||||
expect(listing.link).toContain('https://www.wg-gesucht.de');
|
|
||||||
expect(listing.description).toBeTypeOf('string');
|
|
||||||
expect(listing.description).not.toBe('');
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ describe('services/jobs/jobExecutionService', () => {
|
|||||||
}));
|
}));
|
||||||
vi.doMock(utilsPath, () => ({
|
vi.doMock(utilsPath, () => ({
|
||||||
duringWorkingHoursOrNotSet: () => false,
|
duringWorkingHoursOrNotSet: () => false,
|
||||||
|
getPackageVersion: async () => '0.0.0-test',
|
||||||
}));
|
}));
|
||||||
vi.doMock(loggerPath, () => {
|
vi.doMock(loggerPath, () => {
|
||||||
const m = { debug: () => {}, info: () => {}, warn: () => {}, error: () => {} };
|
const m = { debug: () => {}, info: () => {}, warn: () => {}, error: () => {} };
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ vi.mock('../lib/services/extractor/puppeteerExtractor.js', async (importOriginal
|
|||||||
const { readFixture } = await import('./offlineFixtures.js');
|
const { readFixture } = await import('./offlineFixtures.js');
|
||||||
return {
|
return {
|
||||||
default: (url) => readFixture(url),
|
default: (url) => readFixture(url),
|
||||||
launchBrowser: async () => ({ close: async () => {}, __fredy_removeUserDataDir: false }),
|
launchBrowser: async () => ({ close: async () => {}, isConnected: () => true }),
|
||||||
closeBrowser: async () => {},
|
closeBrowser: async () => {},
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -197,7 +197,7 @@ const ListingsOverview = () => {
|
|||||||
|
|
||||||
<Button
|
<Button
|
||||||
icon={sortDir === 'asc' ? <IconArrowUp /> : <IconArrowDown />}
|
icon={sortDir === 'asc' ? <IconArrowUp /> : <IconArrowDown />}
|
||||||
onClick={() => setSortDir((d) => (d === 'asc' ? 'desc' : 'asc'))}
|
onClick={() => setSortDir(sortDir === 'asc' ? 'desc' : 'asc')}
|
||||||
title={sortDir === 'asc' ? 'Ascending' : 'Descending'}
|
title={sortDir === 'asc' ? 'Ascending' : 'Descending'}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ export default defineConfig({
|
|||||||
globals: true,
|
globals: true,
|
||||||
environment: 'node',
|
environment: 'node',
|
||||||
include: ['test/**/*.test.js'],
|
include: ['test/**/*.test.js'],
|
||||||
|
globalSetup: ['./test/globalSetup.js'],
|
||||||
testTimeout: 60000,
|
testTimeout: 60000,
|
||||||
reporters: ['verbose'],
|
reporters: ['verbose'],
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user