upgrading pois

This commit is contained in:
orangecoding
2026-05-12 19:28:58 +02:00
parent bcd3042026
commit 0cbfa25062
10 changed files with 19 additions and 11 deletions

View File

@@ -227,7 +227,7 @@ class FredyPipelineExecutioner {
const extractor = new Extractor({ ...this._providerConfig.puppeteerOptions, browser: this._browser });
return new Promise((resolve, reject) => {
extractor
.execute(url, this._providerConfig.waitForSelector)
.execute(url, this._providerConfig.waitForSelector, this._jobKey)
.then(() => {
const listings = extractor.parseResponseText(
this._providerConfig.crawlContainer,

View File

@@ -26,7 +26,7 @@ function parseId(shortenedLink) {
async function fetchDetails(listing, browser) {
try {
const html = await puppeteerExtractor(listing.link, null, { browser });
const html = await puppeteerExtractor(listing.link, null, { browser, name: 'immobilienDe_details' });
if (!html) return listing;
const $ = cheerio.load(html);

View File

@@ -16,7 +16,7 @@ let appliedBlackList = [];
async function fetchDetails(listing, browser) {
try {
const html = await puppeteerExtractor(listing.link, null, { browser });
const html = await puppeteerExtractor(listing.link, null, { browser, name: 'immowelt_details' });
if (!html) return listing;
const $ = cheerio.load(html);

View File

@@ -128,7 +128,7 @@ async function enrichListingFromDetails(listing, browser) {
if (!absoluteLink) return listing;
try {
const html = await puppeteerExtractor(absoluteLink, null, { browser });
const html = await puppeteerExtractor(absoluteLink, null, { browser, name: 'kleinanzeigen_details' });
if (!html) return { ...listing, link: absoluteLink };
const { detailAddress, detailDescription } = extractDetailFromHtml(html);

View File

@@ -16,7 +16,7 @@ let appliedBlackList = [];
async function fetchDetails(listing, browser) {
try {
const html = await puppeteerExtractor(listing.link, 'body', { browser });
const html = await puppeteerExtractor(listing.link, 'body', { browser, name: 'sparkasse_details' });
const $ = cheerio.load(html);
const nextDataRaw = $('#__NEXT_DATA__').text;

View File

@@ -16,7 +16,7 @@ let appliedBlackList = [];
async function fetchDetails(listing, browser) {
try {
const html = await puppeteerExtractor(listing.link, null, { browser });
const html = await puppeteerExtractor(listing.link, null, { browser, name: 'wgGesucht_details' });
if (!html) return listing;
const $ = cheerio.load(html);

View File

@@ -29,11 +29,12 @@ export default class Extractor {
* your response will never contain what you are really looking for
* @param url
* @param waitForSelector
* @param jobKey
*/
execute = async (url, waitForSelector = null) => {
execute = async (url, waitForSelector = null, jobKey = null) => {
this.responseText = null;
try {
this.responseText = await puppeteerExtractor(url, waitForSelector, this.options);
this.responseText = await puppeteerExtractor(url, waitForSelector, { ...this.options, name: jobKey });
if (this.responseText != null) {
loadParser(this.responseText);
}

View File

@@ -148,7 +148,11 @@ export default async function execute(url, waitForSelector, options) {
if (botDetected(pageSource, statusCode)) {
logger.warn('We have been detected as a bot :-/ Tried url: => ', url);
await trackPoi(TRACKING_POIS.DETECTED_AS_BOT);
if (options != null && options.name != null) {
await trackPoi(TRACKING_POIS.DETECTED_AS_BOT + '_' + options.name);
} else {
await trackPoi(TRACKING_POIS.DETECTED_AS_BOT);
}
result = null;
} else {

View File

@@ -1,6 +1,6 @@
{
"name": "fredy",
"version": "22.0.5",
"version": "22.0.6",
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
"scripts": {
"prepare": "husky",

View File

@@ -95,7 +95,10 @@ async function downloadHtmlProvider(name, providerConfig, launchBrowser, closeBr
const browser = await launchBrowser(providerConfig.url, {});
try {
const html = await puppeteerExtractor(providerConfig.url, providerConfig.waitForSelector, { browser });
const html = await puppeteerExtractor(providerConfig.url, providerConfig.waitForSelector, {
browser,
name: 'dowload_fixtures',
});
if (!html) {
console.warn(` Failed to download ${name}`);