From afd1048c9e2b13ee902f315c4a79305c065076f9 Mon Sep 17 00:00:00 2001 From: orangecoding Date: Mon, 26 Jan 2026 12:34:49 +0100 Subject: [PATCH] hardening the check if a listing is active --- lib/provider/sparkasse.js | 2 +- lib/services/listings/listingActiveTester.js | 61 +++++++++++++++----- 2 files changed, 48 insertions(+), 15 deletions(-) diff --git a/lib/provider/sparkasse.js b/lib/provider/sparkasse.js index 68dd678..bc5aa2a 100755 --- a/lib/provider/sparkasse.js +++ b/lib/provider/sparkasse.js @@ -36,7 +36,7 @@ const config = { }, normalize: normalize, filter: applyBlacklist, - activeTester: checkIfListingIsActive, + activeTester: (url) => checkIfListingIsActive(url, 'Angebot nicht gefunden'), }; export const init = (sourceConfig, blacklist) => { config.enabled = sourceConfig.enabled; diff --git a/lib/services/listings/listingActiveTester.js b/lib/services/listings/listingActiveTester.js index 0ba1c86..c7ed84d 100644 --- a/lib/services/listings/listingActiveTester.js +++ b/lib/services/listings/listingActiveTester.js @@ -8,38 +8,71 @@ import { randomBetween, sleep } from '../../utils.js'; const maxAttempts = 3; +const userAgents = [ + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Mobile/15E148 Safari/604.1', +]; + /** - * Check if a listing is still active with up to 3 attempts and exponential backoff. - * Backoff waits are capped and the last wait is at most 2000 ms. + * Check if a listing is still active with up to 5 attempts and exponential backoff. + * Backoff waits are randomized and capped. * * Rules: - * - HTTP 200 => return 1 + * - HTTP 200 => return 1 (if checkForText is provided and found, returns 0) * - HTTP 401/403 => return -1 (most certainly detected as a bot) * - HTTP 404 => return 0 * - Other statuses or network errors => retry until attempts are exhausted * - * @returns {Promise} 1 if active, o if not active and -1 if detected as bot + * @returns {Promise} 1 if active, 0 if not active and -1 if detected as bot */ -export default async function checkIfListingIsActive(link) { +export default async function checkIfListingIsActive(link, checkForText = null) { await sleep(randomBetween(50, 100)); for (let attempt = 1; attempt <= maxAttempts; attempt++) { try { + const userAgent = userAgents[Math.floor(Math.random() * userAgents.length)]; const res = await fetch(link, { redirect: 'manual', headers: { - 'User-Agent': - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36', - 'Accept-Language': 'de-DE,de;q=0.9,en;q=0.8', + 'User-Agent': userAgent, + Accept: + 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'Accept-Language': 'de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7', + 'Accept-Encoding': 'gzip, deflate, br', + 'Cache-Control': 'max-age=0', + 'Sec-Ch-Ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"', + 'Sec-Ch-Ua-Mobile': '?0', + 'Sec-Ch-Ua-Platform': '"macOS"', + 'Sec-Fetch-Dest': 'document', + 'Sec-Fetch-Mode': 'navigate', + 'Sec-Fetch-Site': 'none', + 'Sec-Fetch-User': '?1', + 'Upgrade-Insecure-Requests': '1', + Referer: 'https://www.google.com/', }, }); if (res.status === 200) { + if (checkForText) { + const htmText = await res.text(); + if (htmText.includes(checkForText)) { + return 0; + } + } + return 1; } - if (res.status === 401) return -1; - if (res.status === 403) return -1; - if (res.status === 404) return 0; + if (res.status === 401 || res.status === 403) { + if (attempt < maxAttempts) { + await sleep(backoffDelay(attempt)); + continue; + } + return -1; + } + if (res.status === 404 || res.status === 410) return 0; // For any other status, only retry if attempts remain if (attempt < maxAttempts) { @@ -62,13 +95,13 @@ export default async function checkIfListingIsActive(link) { } /** - * Exponential backoff delay with cap. - * attempt: 1 -> 500ms, 2 -> 1000ms, 3 -> 2000ms (cap) + * Exponential backoff delay with cap and jitter. * @param {number} attempt 1-based attempt index * @returns {number} delay in ms */ function backoffDelay(attempt) { const base = 500; const cap = 2000; - return Math.min(base * 2 ** (attempt - 1), cap); + const delay = Math.min(base * 2 ** (attempt - 1), cap); + return delay + randomBetween(0, 1000); }