Compare commits

..

5 Commits

Author SHA1 Message Date
orangecoding
7a9d49899b improve reusing of puppeteer by adding a safeguard for broken chrome 2026-02-18 20:16:55 +01:00
orangecoding
9a87c58d3e next release version 2026-02-18 20:06:40 +01:00
orangecoding
fdd7e835e8 improve default puppeteer timeout 2026-02-18 20:06:22 +01:00
Christian Kellner
00d6a12b30 Puppeteer improvements (#270)
* improve puppeteer handling. Now only 1 puppeteer instance is being used which is WAY more efficient

* removing package-lock

* reduce logging

* removing problematic docker command

* Remove Immonet. They now belong to immowelt
2026-02-18 20:05:02 +01:00
orangecoding
05218800d2 fixing app init 2026-02-17 14:28:08 +01:00
11 changed files with 128 additions and 13746 deletions

View File

@@ -63,13 +63,15 @@ class FredyPipelineExecutioner {
* @param {string} providerId The ID of the provider currently in use.
* @param {string} jobKey Key of the job that is currently running (from within the config).
* @param {SimilarityCache} similarityCache Cache instance for checking similar entries.
* @param browser
*/
constructor(providerConfig, notificationConfig, providerId, jobKey, similarityCache) {
constructor(providerConfig, notificationConfig, providerId, jobKey, similarityCache, browser) {
this._providerConfig = providerConfig;
this._notificationConfig = notificationConfig;
this._providerId = providerId;
this._jobKey = jobKey;
this._similarityCache = similarityCache;
this._browser = browser;
}
/**
@@ -119,7 +121,7 @@ class FredyPipelineExecutioner {
* @returns {Promise<Listing[]>} Resolves with an array of listings (empty when none found).
*/
_getListings(url) {
const extractor = new Extractor();
const extractor = new Extractor({ ...this._providerConfig.puppeteerOptions, browser: this._browser });
return new Promise((resolve, reject) => {
extractor
.execute(url, this._providerConfig.waitForSelector)

View File

@@ -1,53 +0,0 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
import { isOneOf, buildHash } from '../utils.js';
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
let appliedBlackList = [];
function normalize(o) {
const size = o.size != null ? o.size.replace('Wohnfläche ', '') : 'N/A m²';
const price = o.price.replace('Kaufpreis ', '');
const address = o.address?.split(' • ')?.pop() ?? null;
const title = o.title || 'No title available';
const link = o.link != null ? decodeURIComponent(o.link) : config.url;
const id = buildHash(title, price);
return Object.assign(o, { id, address, price, size, title, link });
}
function applyBlacklist(o) {
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted;
}
const config = {
url: null,
crawlContainer: 'div[data-testid="serp-core-classified-card-testid"]',
sortByDateParam: 'sortby=19',
waitForSelector: 'div[data-testid="serp-gridcontainer-testid"]',
crawlFields: {
id: 'button@title |trim',
title: 'button@title |trim',
price: 'div[data-testid="cardmfe-price-testid"] | trim',
size: 'div[data-testid="cardmfe-keyfacts-testid"] | trim',
address: 'div[data-testid="cardmfe-description-box-address"] | trim',
image: 'div[data-testid="cardmfe-picture-box-test-id"] img@src',
link: 'button@data-base',
description: 'div[data-testid="cardmfe-description-text-test-id"] | trim',
},
normalize: normalize,
filter: applyBlacklist,
activeTester: checkIfListingIsActive,
};
export const init = (sourceConfig, blacklist) => {
config.enabled = sourceConfig.enabled;
config.url = sourceConfig.url;
appliedBlackList = blacklist || [];
};
export const metaInformation = {
name: 'Immonet',
baseUrl: 'https://www.immonet.de/',
id: 'immonet',
};
export { config };

View File

@@ -19,52 +19,80 @@ import path from 'path';
puppeteer.use(StealthPlugin());
export default async function execute(url, waitForSelector, options) {
let browser;
let page;
let result;
export async function launchBrowser(url, options) {
const preCfg = getPreLaunchConfig(url, options || {});
const launchArgs = [
'--no-sandbox',
'--disable-gpu',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-crash-reporter',
'--no-first-run',
'--no-default-browser-check',
preCfg.langArg,
preCfg.windowSizeArg,
...preCfg.extraArgs,
];
if (options?.proxyUrl) {
launchArgs.push(`--proxy-server=${options.proxyUrl}`);
}
let userDataDir;
let removeUserDataDir = false;
if (options && options.userDataDir) {
userDataDir = options.userDataDir;
} else {
const prefix = path.join(os.tmpdir(), 'puppeteer-fredy-');
userDataDir = fs.mkdtempSync(prefix);
removeUserDataDir = true;
}
const browser = await puppeteer.launch({
headless: options?.puppeteerHeadless ?? true,
args: launchArgs,
timeout: options?.puppeteerTimeout || 45_000,
userDataDir,
executablePath: options?.executablePath,
});
browser.__fredy_userDataDir = userDataDir;
browser.__fredy_removeUserDataDir = removeUserDataDir;
return browser;
}
export async function closeBrowser(browser) {
if (!browser) return;
const userDataDir = browser.__fredy_userDataDir;
const removeUserDataDir = browser.__fredy_removeUserDataDir;
try {
await browser.close();
} catch {
// ignore
}
if (removeUserDataDir && userDataDir) {
try {
await fs.promises.rm(userDataDir, { recursive: true, force: true });
} catch {
// ignore
}
}
}
export default async function execute(url, waitForSelector, options) {
let browser = options?.browser;
let isExternalBrowser = !!browser;
let page;
let result;
try {
debug(`Sending request to ${url} using Puppeteer.`);
// Prepare a dedicated temporary userDataDir to avoid leaking /tmp/.org.chromium.* dirs
if (options && options.userDataDir) {
userDataDir = options.userDataDir;
removeUserDataDir = !!options.cleanupUserDataDir;
} else {
const prefix = path.join(os.tmpdir(), 'puppeteer-fredy-');
userDataDir = fs.mkdtempSync(prefix);
removeUserDataDir = true;
if (!isExternalBrowser) {
browser = await launchBrowser(url, options);
}
const launchArgs = [
'--no-sandbox',
'--disable-gpu',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-crash-reporter',
'--no-first-run',
'--no-default-browser-check',
];
if (options?.proxyUrl) {
launchArgs.push(`--proxy-server=${options.proxyUrl}`);
}
// Prepare bot prevention pre-launch config
const preCfg = getPreLaunchConfig(url, options || {});
launchArgs.push(preCfg.langArg);
launchArgs.push(preCfg.windowSizeArg);
launchArgs.push(...preCfg.extraArgs);
browser = await puppeteer.launch({
headless: options?.puppeteerHeadless ?? true,
args: launchArgs,
timeout: options?.puppeteerTimeout || 30_000,
userDataDir,
executablePath: options?.executablePath, // allow using system Chrome
});
page = await browser.newPage();
const preCfg = getPreLaunchConfig(url, options || {});
await applyBotPreventionToPage(page, preCfg);
// Provide languages value before navigation
await applyLanguagePersistence(page, preCfg);
@@ -104,7 +132,7 @@ export default async function execute(url, waitForSelector, options) {
result = pageSource || (await page.content());
}
} catch (error) {
if (error?.message?.includes('Timeout')) {
if (error?.name?.includes('Timeout')) {
logger.debug('Error executing with puppeteer executor', error);
} else {
logger.warn('Error executing with puppeteer executor', error);
@@ -118,19 +146,8 @@ export default async function execute(url, waitForSelector, options) {
} catch {
// ignore
}
try {
if (browser != null) {
await browser.close();
}
} catch {
// ignore
}
try {
if (removeUserDataDir && userDataDir) {
await fs.promises.rm(userDataDir, { recursive: true, force: true });
}
} catch {
// ignore
if (browser != null && !isExternalBrowser) {
await closeBrowser(browser);
}
}
return result;

View File

@@ -13,6 +13,7 @@ import FredyPipelineExecutioner from '../../FredyPipelineExecutioner.js';
import * as similarityCache from '../similarity-check/similarityCache.js';
import { isRunning, markFinished, markRunning } from './run-state.js';
import { sendToUsers } from '../sse/sse-broker.js';
import * as puppeteerExtractor from '../extractor/puppeteerExtractor.js';
/**
* Initializes the job execution service.
@@ -94,7 +95,7 @@ export function initJobExecutionService({ providers, settings, intervalMs }) {
* @param {{userId?: string, isAdmin?: boolean}} [context] - Who requested the run; determines job filtering.
* @returns {void}
*/
function runAll(respectWorkingHours = true, context = undefined) {
async function runAll(respectWorkingHours = true, context = undefined) {
if (settings.demoMode) return;
const now = Date.now();
const withinHours = duringWorkingHoursOrNotSet(settings, now);
@@ -103,15 +104,18 @@ export function initJobExecutionService({ providers, settings, intervalMs }) {
return;
}
settings.lastRun = now;
jobStorage
const jobs = jobStorage
.getJobs()
.filter((job) => job.enabled)
.filter((job) => {
if (!context) return true; // startup/cron → all
if (context.isAdmin) return true; // admin → all
return context.userId ? job.userId === context.userId : false; // user → own
})
.forEach((job) => executeJob(job));
});
for (const job of jobs) {
await executeJob(job);
}
}
/**
@@ -154,28 +158,42 @@ export function initJobExecutionService({ providers, settings, intervalMs }) {
} catch (err) {
logger.warn('Failed to emit start status for job', job.id, err);
}
let browser;
try {
const jobProviders = job.provider.filter(
(p) => providers.find((loaded) => loaded.metaInformation.id === p.id) != null,
);
const executions = jobProviders.map(async (prov) => {
const matchedProvider = providers.find((loaded) => loaded.metaInformation.id === prov.id);
matchedProvider.init(prov, job.blacklist);
await new FredyPipelineExecutioner(
matchedProvider.config,
job.notificationAdapter,
prov.id,
job.id,
similarityCache,
).execute();
});
const results = await Promise.allSettled(executions);
for (const r of results) {
if (r.status === 'rejected') {
logger.error(r.reason);
for (const prov of jobProviders) {
try {
const matchedProvider = providers.find((loaded) => loaded.metaInformation.id === prov.id);
matchedProvider.init(prov, job.blacklist);
if (browser && !browser.isConnected()) {
logger.debug('Browser is disconnected, nullifying to launch a new one.');
await puppeteerExtractor.closeBrowser(browser);
browser = null;
}
if (!browser && matchedProvider.config.getListings == null) {
browser = await puppeteerExtractor.launchBrowser(matchedProvider.config.url, {});
}
await new FredyPipelineExecutioner(
matchedProvider.config,
job.notificationAdapter,
prov.id,
job.id,
similarityCache,
browser,
).execute();
} catch (err) {
logger.error(err);
}
}
} finally {
if (browser) {
await puppeteerExtractor.closeBrowser(browser);
}
markFinished(job.id);
try {
bus.emit('jobs:status', { jobId: job.id, running: false });

13553
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"name": "fredy",
"version": "19.4.1",
"version": "19.5.1",
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
"scripts": {
"prepare": "husky",
@@ -80,7 +80,7 @@
"node-mailjet": "6.0.11",
"p-throttle": "^8.1.0",
"package-up": "^5.0.0",
"puppeteer": "^24.37.2",
"puppeteer": "^24.37.3",
"puppeteer-extra": "^3.3.6",
"puppeteer-extra-plugin-stealth": "^2.11.2",
"query-string": "9.3.1",

View File

@@ -1,38 +0,0 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
import { get } from '../mocks/mockNotification.js';
import { mockFredy, providerConfig } from '../utils.js';
import { expect } from 'chai';
import * as provider from '../../lib/provider/immonet.js';
describe('#immonet testsuite()', () => {
it('should test immonet provider', async () => {
const Fredy = await mockFredy();
provider.init(providerConfig.immonet, [], []);
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'immonet', similarityCache);
const listing = await fredy.execute();
expect(listing).to.be.a('array');
const notificationObj = get();
expect(notificationObj).to.be.a('object');
expect(notificationObj.serviceName).to.equal('immonet');
notificationObj.payload.forEach((notify) => {
/** check the actual structure **/
expect(notify.id).to.be.a('string');
expect(notify.price).to.be.a('string');
expect(notify.size).to.be.a('string');
expect(notify.title).to.be.a('string');
expect(notify.link).to.be.a('string');
expect(notify.address).to.be.a('string');
/** check the values if possible **/
expect(notify.size).that.does.include('m²');
expect(notify.title).to.be.not.empty;
expect(notify.address).to.be.not.empty;
});
});
});

View File

@@ -8,10 +8,6 @@
"url": "https://www.immobilien.de/Wohnen/Suchergebnisse-51797.html?search._digest=true&search._filter=wohnen&search.flaeche_von=50&search.objektart=wohnung&search.preis_bis=1200&search.typ=mieten&search.umkreis=15&search.wo=district%3A2434%2C2695%2C2621%2C2700%2C2967%2C2734%2C2909%2C2955%2C2392%2C2746%2C2767%2C2982%2C2904%2C2612%2C2892%2C2587%2C2871%2C2975%2C2591%2C2887%2C2569%2C2640%2C2735&sort_col=*created_ts&sort_dir=desc",
"enabled": true
},
"immonet": {
"url": "https://www.immonet.de/classified-search?distributionTypes=Buy,Buy_Auction,Compulsory_Auction&estateTypes=House,Apartment&locations=AD08DE2112&order=Default&m=homepage_new_search_classified_search_result",
"enabled": true
},
"immowelt": {
"url": "https://www.immowelt.de/classified-search?distributionTypes=Buy,Buy_Auction,Compulsory_Auction&estateTypes=House,Apartment&locations=AD08DE2350",
"enabled": true

View File

@@ -14,12 +14,6 @@
"shouldBecome": "https://www.wg-gesucht.de/1-zimmer-wohnungen-in-Dusseldorf.30.1.1.0.html?sort_column=0&sort_order=0",
"id": "wgGesucht"
},
{
"url": "https://www.immonet.de/immobiliensuche/sel.do?sortby=0&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
"shouldBecome": "https://www.immonet.de/immobiliensuche/sel.do?sortby=19&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
"id": "immonet"
},
{
"url": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/",
"shouldBecome": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/?Sortierung=Id&Richtung=DESC",

View File

@@ -11,7 +11,7 @@ import GeneralSettings from './views/generalSettings/GeneralSettings';
import UserSettings from './views/userSettings/UserSettings';
import JobMutation from './views/jobs/mutation/JobMutation';
import UserMutator from './views/user/mutation/UserMutator';
import { useActions, useSelector, useFredyState } from './services/state/store';
import { useActions, useSelector } from './services/state/store';
import { Routes, Route, Navigate } from 'react-router-dom';
import Login from './views/login/Login';
import Users from './views/user/Users';
@@ -41,24 +41,21 @@ export default function FredyApp() {
useEffect(() => {
async function init() {
await actions.user.getCurrentUser();
const user = useFredyState.getState().user.currentUser;
if (!user || Object.keys(user).length === 0) {
setLoading(false);
return;
if (!needsLogin()) {
await actions.provider.getProvider();
await actions.jobsData.getJobs();
await actions.jobsData.getSharableUserList();
await actions.notificationAdapter.getAdapter();
await actions.generalSettings.getGeneralSettings();
await actions.userSettings.getUserSettings();
await actions.versionUpdate.getVersionUpdate();
await actions.tracking.getTrackingPois();
}
await actions.provider.getProvider();
await actions.jobsData.getJobs();
await actions.jobsData.getSharableUserList();
await actions.notificationAdapter.getAdapter();
await actions.generalSettings.getGeneralSettings();
await actions.userSettings.getUserSettings();
await actions.versionUpdate.getVersionUpdate();
await actions.tracking.getTrackingPois();
setLoading(false);
}
init();
}, []);
}, [currentUser?.userId]);
const needsLogin = () => {
return currentUser == null || Object.keys(currentUser).length === 0;
@@ -68,7 +65,10 @@ export default function FredyApp() {
const { Sider, Content } = Layout;
return loading ? null : needsLogin() ? (
<Login />
<Routes>
<Route path="/login" element={<Login />} />
<Route path="*" element={<Navigate to="/login" replace />} />
</Routes>
) : (
<Layout className="app">
<Sider>
@@ -137,7 +137,6 @@ export default function FredyApp() {
}
/>
{/* Authenticated fallbacks */}
<Route path="/" element={<Navigate to="/dashboard" replace />} />
</Routes>
</Content>

View File

@@ -6283,7 +6283,7 @@ puppeteer-extra@^3.3.6:
debug "^4.1.1"
deepmerge "^4.2.2"
puppeteer@^24.37.2:
puppeteer@^24.37.3:
version "24.37.3"
resolved "https://registry.yarnpkg.com/puppeteer/-/puppeteer-24.37.3.tgz#68327e1f571887ed1ba7916d770e6028975a21db"
integrity sha512-AUGGWq0BhPM+IOS2U9A+ZREH3HDFkV1Y5HERYGDg5cbGXjoGsTCT7/A6VZRfNU0UJJdCclyEimZICkZW6pqJyw==