Feature/kleinanzeigen new (#292)

* Feature/Kleinanzeigen addresses (#289)

* upgrade dependencies

* immoscout_details -> provider_details

* fetching details more generic

* removing claude action

* fixing sparkassen selector

* improvements

* fixing immobilienDE test

* upgrading dependencies

* settings for many provider

---------

Co-authored-by: Adrian Bach <65734063+realDayaa@users.noreply.github.com>
This commit is contained in:
Christian Kellner
2026-04-07 19:53:40 +02:00
committed by GitHub
parent 7888c5b340
commit cdc0cbda2f
35 changed files with 1098 additions and 501 deletions

View File

@@ -94,12 +94,34 @@ export async function applyBotPreventionToPage(page, cfg) {
// webdriver
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
// chrome runtime
// chrome runtime — expose loadTimes, csi and app like real Chrome
// @ts-ignore
if (!window.chrome) {
window.chrome = {
runtime: {},
// @ts-ignore
window.chrome = { runtime: {} };
}
loadTimes: () => ({
requestTime: performance.timeOrigin / 1000,
startLoadTime: performance.timeOrigin / 1000,
commitLoadTime: performance.timeOrigin / 1000 + 0.1,
finishDocumentLoadTime: 0,
finishLoadTime: 0,
firstPaintTime: 0,
firstPaintAfterLoadTime: 0,
navigationType: 'Other',
wasFetchedViaSpdy: false,
wasNpnNegotiated: false,
npnNegotiatedProtocol: '',
wasAlternateProtocolAvailable: false,
connectionInfo: 'http/1.1',
}),
// @ts-ignore
csi: () => ({ startE: performance.timeOrigin, onloadT: Date.now(), pageT: performance.now(), tran: 15 }),
app: {
isInstalled: false,
InstallState: { DISABLED: 'disabled', INSTALLED: 'installed', NOT_INSTALLED: 'not_installed' },
RunningState: { CANNOT_RUN: 'cannot_run', READY_TO_RUN: 'ready_to_run', RUNNING: 'running' },
},
};
// languages
// @ts-ignore
@@ -107,23 +129,38 @@ export async function applyBotPreventionToPage(page, cfg) {
get: () => (window.localStorage.getItem('__LANGS__') || 'de-DE,de').split(','),
});
// plugins
// plugins — mimic real Chrome's built-in PDF plugins
const makePlugin = (name, filename, description, mimeType, mimeTypeSuffix) => {
const mimeObj = { type: mimeType, suffixes: mimeTypeSuffix, description, enabledPlugin: null };
const plugin = { name, filename, description, length: 1, 0: mimeObj };
mimeObj.enabledPlugin = plugin;
return plugin;
};
const fakePlugins = [
makePlugin('PDF Viewer', 'internal-pdf-viewer', 'Portable Document Format', 'application/pdf', 'pdf'),
makePlugin('Chrome PDF Viewer', 'internal-pdf-viewer', 'Portable Document Format', 'application/pdf', 'pdf'),
makePlugin('Chromium PDF Viewer', 'internal-pdf-viewer', 'Portable Document Format', 'application/pdf', 'pdf'),
makePlugin(
'Microsoft Edge PDF Viewer',
'internal-pdf-viewer',
'Portable Document Format',
'application/pdf',
'pdf',
),
makePlugin('WebKit built-in PDF', 'internal-pdf-viewer', 'Portable Document Format', 'application/pdf', 'pdf'),
];
// @ts-ignore
Object.defineProperty(navigator, 'plugins', {
get: () => [{}, {}, {}],
});
Object.defineProperty(navigator, 'plugins', { get: () => fakePlugins });
// @ts-ignore
Object.defineProperty(navigator, 'mimeTypes', { get: () => [fakePlugins[0][0]] });
// platform and concurrency hints
// @ts-ignore
Object.defineProperty(navigator, 'platform', { get: () => 'Win32' });
// @ts-ignore
if (typeof navigator.hardwareConcurrency === 'number' && navigator.hardwareConcurrency < 2) {
Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 4 });
}
Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 });
// @ts-ignore
if (typeof navigator.deviceMemory === 'number' && navigator.deviceMemory < 2) {
Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 });
}
Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 });
// userAgentData (Client Hints)
try {
@@ -236,6 +273,21 @@ export async function applyBotPreventionToPage(page, cfg) {
} catch {
//noop
}
// document.hasFocus — headless returns false; real active tabs return true
try {
document.hasFocus = () => true;
} catch {
//noop
}
// screen color depth — normalise in case headless reports 0
try {
Object.defineProperty(screen, 'colorDepth', { get: () => 24 });
Object.defineProperty(screen, 'pixelDepth', { get: () => 24 });
} catch {
//noop
}
} catch {
//noop
}
@@ -273,6 +325,8 @@ export async function applyPostNavigationHumanSignals(page, cfg) {
const my = Math.floor(vh * (0.3 + Math.random() * 0.4));
await page.mouse.move(mx, my, { steps: 10 + Math.floor(Math.random() * 10) });
await page.mouse.wheel({ deltaY: 100 + Math.floor(Math.random() * 200) });
await new Promise((res) => setTimeout(res, 150 + Math.floor(Math.random() * 200)));
await page.mouse.wheel({ deltaY: -(30 + Math.floor(Math.random() * 60)) });
} catch {
// ignore if mouse is unavailable
}

View File

@@ -110,6 +110,7 @@ export default async function execute(url, waitForSelector, options) {
// Navigation
const response = await page.goto(url, {
waitUntil: options?.waitUntil || 'domcontentloaded',
timeout: options?.puppeteerTimeout || 60000,
});
// Optionally wait and add subtle human-like interactions

View File

@@ -0,0 +1,17 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
// We have moved the previous immoscout_details setting to provider_details and enable this by default
// We also set it to false per default as this is increasing the chance to be detected as a bot by a lot
export function up(db) {
db.exec(`
UPDATE settings
SET name = 'provider_details', value = false
WHERE name = 'immoscout_details'
AND NOT EXISTS (
SELECT 1 FROM settings WHERE name = 'provider_details'
);
`);
}

View File

@@ -0,0 +1,15 @@
/*
* Copyright (c) 2026 by Christian Kellner.
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
// Convert provider_details from a boolean to an array of provider id strings.
// Users will re-configure which providers they want to fetch details from.
export function up(db) {
const row = db.prepare("SELECT value FROM settings WHERE name = 'provider_details'").get();
if (row) {
db.prepare("UPDATE settings SET value = ? WHERE name = 'provider_details'").run(JSON.stringify([]));
} else {
db.prepare("INSERT INTO settings (name, value) VALUES ('provider_details', ?)").run(JSON.stringify([]));
}
}