Compare commits

..

2 Commits

Author SHA1 Message Date
orangecoding
7879d0e94a next release version 2026-01-26 12:35:57 +01:00
orangecoding
afd1048c9e hardening the check if a listing is active 2026-01-26 12:34:49 +01:00
3 changed files with 49 additions and 16 deletions

View File

@@ -36,7 +36,7 @@ const config = {
},
normalize: normalize,
filter: applyBlacklist,
activeTester: checkIfListingIsActive,
activeTester: (url) => checkIfListingIsActive(url, 'Angebot nicht gefunden'),
};
export const init = (sourceConfig, blacklist) => {
config.enabled = sourceConfig.enabled;

View File

@@ -8,38 +8,71 @@ import { randomBetween, sleep } from '../../utils.js';
const maxAttempts = 3;
const userAgents = [
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15',
'Mozilla/5.0 (iPhone; CPU iPhone OS 17_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Mobile/15E148 Safari/604.1',
];
/**
* Check if a listing is still active with up to 3 attempts and exponential backoff.
* Backoff waits are capped and the last wait is at most 2000 ms.
* Check if a listing is still active with up to 5 attempts and exponential backoff.
* Backoff waits are randomized and capped.
*
* Rules:
* - HTTP 200 => return 1
* - HTTP 200 => return 1 (if checkForText is provided and found, returns 0)
* - HTTP 401/403 => return -1 (most certainly detected as a bot)
* - HTTP 404 => return 0
* - Other statuses or network errors => retry until attempts are exhausted
*
* @returns {Promise<Integer>} 1 if active, o if not active and -1 if detected as bot
* @returns {Promise<Integer>} 1 if active, 0 if not active and -1 if detected as bot
*/
export default async function checkIfListingIsActive(link) {
export default async function checkIfListingIsActive(link, checkForText = null) {
await sleep(randomBetween(50, 100));
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
const userAgent = userAgents[Math.floor(Math.random() * userAgents.length)];
const res = await fetch(link, {
redirect: 'manual',
headers: {
'User-Agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
'Accept-Language': 'de-DE,de;q=0.9,en;q=0.8',
'User-Agent': userAgent,
Accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7',
'Accept-Encoding': 'gzip, deflate, br',
'Cache-Control': 'max-age=0',
'Sec-Ch-Ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
'Sec-Ch-Ua-Mobile': '?0',
'Sec-Ch-Ua-Platform': '"macOS"',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
Referer: 'https://www.google.com/',
},
});
if (res.status === 200) {
if (checkForText) {
const htmText = await res.text();
if (htmText.includes(checkForText)) {
return 0;
}
}
return 1;
}
if (res.status === 401) return -1;
if (res.status === 403) return -1;
if (res.status === 404) return 0;
if (res.status === 401 || res.status === 403) {
if (attempt < maxAttempts) {
await sleep(backoffDelay(attempt));
continue;
}
return -1;
}
if (res.status === 404 || res.status === 410) return 0;
// For any other status, only retry if attempts remain
if (attempt < maxAttempts) {
@@ -62,13 +95,13 @@ export default async function checkIfListingIsActive(link) {
}
/**
* Exponential backoff delay with cap.
* attempt: 1 -> 500ms, 2 -> 1000ms, 3 -> 2000ms (cap)
* Exponential backoff delay with cap and jitter.
* @param {number} attempt 1-based attempt index
* @returns {number} delay in ms
*/
function backoffDelay(attempt) {
const base = 500;
const cap = 2000;
return Math.min(base * 2 ** (attempt - 1), cap);
const delay = Math.min(base * 2 ** (attempt - 1), cap);
return delay + randomBetween(0, 1000);
}

View File

@@ -1,6 +1,6 @@
{
"name": "fredy",
"version": "19.2.0",
"version": "19.2.1",
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
"scripts": {
"prepare": "husky",