mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
improving bot detection prevention
This commit is contained in:
@@ -5,6 +5,7 @@ import logger from '../logger.js';
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
import path from 'path';
|
||||
import { URL } from 'url';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
@@ -27,23 +28,97 @@ export default async function execute(url, waitForSelector, options) {
|
||||
removeUserDataDir = true;
|
||||
}
|
||||
|
||||
const launchArgs = [
|
||||
'--no-sandbox',
|
||||
'--disable-gpu',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-crash-reporter',
|
||||
'--no-first-run',
|
||||
'--no-default-browser-check',
|
||||
];
|
||||
if (options?.proxyUrl) {
|
||||
launchArgs.push(`--proxy-server=${options.proxyUrl}`);
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: options.puppeteerHeadless ?? true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-gpu',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-crash-reporter',
|
||||
],
|
||||
timeout: options.puppeteerTimeout || 30_000,
|
||||
headless: options?.puppeteerHeadless ?? true,
|
||||
args: launchArgs,
|
||||
timeout: options?.puppeteerTimeout || 30_000,
|
||||
userDataDir,
|
||||
executablePath: options?.executablePath, // allow using system Chrome
|
||||
});
|
||||
|
||||
page = await browser.newPage();
|
||||
await page.setExtraHTTPHeaders(DEFAULT_HEADER);
|
||||
const response = await page.goto(url, {
|
||||
waitUntil: 'domcontentloaded',
|
||||
|
||||
// Derive domain-specific defaults
|
||||
const { hostname } = new URL(url);
|
||||
|
||||
// Set a realistic modern user agent unless provided
|
||||
const userAgent =
|
||||
options?.userAgent ||
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36';
|
||||
await page.setUserAgent(userAgent);
|
||||
|
||||
// Viewport and device scale for typical desktop
|
||||
await page.setViewport({ width: 1366, height: 768, deviceScaleFactor: 1 });
|
||||
|
||||
// Extra HTTP headers with localized Accept-Language
|
||||
const acceptLanguage = options?.acceptLanguage || 'de-DE,de;q=0.9,en-US;q=0.7,en;q=0.5';
|
||||
const headers = {
|
||||
...DEFAULT_HEADER,
|
||||
'Accept-Language': acceptLanguage,
|
||||
'User-Agent': userAgent,
|
||||
Referer: options?.referer || `https://${hostname}/`,
|
||||
Connection: 'keep-alive',
|
||||
DNT: '1',
|
||||
};
|
||||
await page.setExtraHTTPHeaders(headers);
|
||||
|
||||
// Timezone and locale tweaks to look German when needed
|
||||
try {
|
||||
const tz = options?.timezone || 'Europe/Berlin';
|
||||
if (tz) await page.emulateTimezone(tz);
|
||||
} catch {
|
||||
//noop
|
||||
}
|
||||
|
||||
// Harden navigator properties (stealth already covers many, but we ensure critical ones)
|
||||
await page.evaluateOnNewDocument(() => {
|
||||
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
||||
// Plugins and mimeTypes
|
||||
// @ts-ignore
|
||||
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] });
|
||||
// @ts-ignore
|
||||
Object.defineProperty(navigator, 'languages', {
|
||||
get: () => (window.localStorage.getItem('__LANGS__') || 'de-DE,de').split(','),
|
||||
});
|
||||
});
|
||||
// Provide languages value before navigation
|
||||
await page.evaluateOnNewDocument((langs) => {
|
||||
try {
|
||||
window.localStorage.setItem('__LANGS__', langs);
|
||||
} catch {
|
||||
//noop
|
||||
}
|
||||
}, acceptLanguage.split(';')[0]);
|
||||
|
||||
// Optional cookies
|
||||
if (Array.isArray(options?.cookies) && options.cookies.length > 0) {
|
||||
await page.setCookie(...options.cookies);
|
||||
}
|
||||
|
||||
// Navigation
|
||||
const response = await page.goto(url, {
|
||||
waitUntil: options?.waitUntil || 'domcontentloaded',
|
||||
});
|
||||
|
||||
// Optionally wait a random small delay to mimic human rendering time
|
||||
if (options?.humanDelay !== false) {
|
||||
const delay = 200 + Math.floor(Math.random() * 400);
|
||||
await new Promise((res) => setTimeout(res, delay));
|
||||
}
|
||||
|
||||
let pageSource;
|
||||
// if we're extracting data from a SPA, we must wait for the selector
|
||||
if (waitForSelector != null) {
|
||||
@@ -57,7 +132,7 @@ export default async function execute(url, waitForSelector, options) {
|
||||
pageSource = await page.content();
|
||||
}
|
||||
|
||||
const statusCode = response.status();
|
||||
const statusCode = response?.status?.() ?? 200;
|
||||
|
||||
if (botDetected(pageSource, statusCode)) {
|
||||
logger.warn('We have been detected as a bot :-/ Tried url: => ', url);
|
||||
|
||||
30
package.json
30
package.json
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "fredy",
|
||||
"version": "14.3.3",
|
||||
"version": "14.3.4",
|
||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||
"scripts": {
|
||||
"prepare": "husky",
|
||||
@@ -56,13 +56,13 @@
|
||||
"Firefox ESR"
|
||||
],
|
||||
"dependencies": {
|
||||
"@douyinfe/semi-icons": "^2.87.1",
|
||||
"@douyinfe/semi-ui": "2.87.1",
|
||||
"@douyinfe/semi-icons": "^2.88.0",
|
||||
"@douyinfe/semi-ui": "2.88.0",
|
||||
"@sendgrid/mail": "8.1.6",
|
||||
"@visactor/react-vchart": "^2.0.5",
|
||||
"@visactor/vchart": "^2.0.5",
|
||||
"@visactor/react-vchart": "^2.0.8",
|
||||
"@visactor/vchart": "^2.0.8",
|
||||
"@visactor/vchart-semi-theme": "^1.12.2",
|
||||
"@vitejs/plugin-react": "5.1.0",
|
||||
"@vitejs/plugin-react": "5.1.1",
|
||||
"better-sqlite3": "^12.4.1",
|
||||
"body-parser": "2.2.0",
|
||||
"cheerio": "^1.1.2",
|
||||
@@ -73,21 +73,21 @@
|
||||
"node-cron": "^4.2.1",
|
||||
"node-fetch": "3.3.2",
|
||||
"node-mailjet": "6.0.11",
|
||||
"p-throttle": "^8.0.0",
|
||||
"p-throttle": "^8.1.0",
|
||||
"package-up": "^5.0.0",
|
||||
"puppeteer": "^24.27.0",
|
||||
"puppeteer": "^24.30.0",
|
||||
"puppeteer-extra": "^3.3.6",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||
"query-string": "9.3.1",
|
||||
"react": "18.3.1",
|
||||
"react-dom": "18.3.1",
|
||||
"react-router": "7.9.5",
|
||||
"react-router-dom": "7.9.5",
|
||||
"react-router": "7.9.6",
|
||||
"react-router-dom": "7.9.6",
|
||||
"restana": "5.1.0",
|
||||
"semver": "^7.7.3",
|
||||
"serve-static": "2.2.0",
|
||||
"slack": "11.0.2",
|
||||
"vite": "7.1.12",
|
||||
"vite": "7.2.2",
|
||||
"x-var": "^3.0.1",
|
||||
"zustand": "^5.0.8"
|
||||
},
|
||||
@@ -96,8 +96,8 @@
|
||||
"@babel/eslint-parser": "7.28.5",
|
||||
"@babel/preset-env": "7.28.5",
|
||||
"@babel/preset-react": "7.28.5",
|
||||
"chai": "6.2.0",
|
||||
"eslint": "9.39.0",
|
||||
"chai": "6.2.1",
|
||||
"eslint": "9.39.1",
|
||||
"eslint-config-prettier": "10.1.8",
|
||||
"eslint-plugin-react": "7.37.5",
|
||||
"esmock": "2.7.3",
|
||||
@@ -105,8 +105,8 @@
|
||||
"husky": "9.1.7",
|
||||
"less": "4.4.2",
|
||||
"lint-staged": "16.2.6",
|
||||
"mocha": "11.7.4",
|
||||
"nodemon": "^3.1.10",
|
||||
"mocha": "11.7.5",
|
||||
"nodemon": "^3.1.11",
|
||||
"prettier": "3.6.2"
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user