Files
fredy/lib/services/extractor/utils.js

38 lines
1.1 KiB
JavaScript
Raw Normal View History

2025-12-11 10:40:55 +01:00
/*
* Copyright (c) 2026 by Christian Kellner.
2025-12-11 10:40:55 +01:00
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
*/
2025-09-13 18:57:56 +02:00
import logger from '../logger.js';
let debuggingOn = false;
export const DEFAULT_HEADER = {
2025-09-28 08:12:51 +02:00
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
2025-01-07 12:25:19 +01:00
Connection: 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'User-Agent':
2025-09-28 08:12:51 +02:00
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
};
2025-01-07 12:25:19 +01:00
export const setDebug = (options) => {
debuggingOn = !!options?.debug;
};
export const debug = (message) => {
2025-01-07 12:25:19 +01:00
if (debuggingOn) {
2025-09-13 18:57:56 +02:00
logger.debug(message);
2025-01-07 12:25:19 +01:00
}
};
export const botDetected = (pageSource, statusCode) => {
2025-01-07 12:25:19 +01:00
const suspiciousStatusCodes = [403, 429];
const botDetectionPatterns = [/verify you are human/i, /access denied/i, /x-amz-cf-id/i];
2025-01-07 12:25:19 +01:00
const detectedInSource = botDetectionPatterns.some((pattern) => pattern.test(pageSource));
const detectedByStatus = suspiciousStatusCodes.includes(statusCode);
2025-01-07 12:25:19 +01:00
return detectedInSource || detectedByStatus;
};