mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
upgrading pois
This commit is contained in:
@@ -227,7 +227,7 @@ class FredyPipelineExecutioner {
|
||||
const extractor = new Extractor({ ...this._providerConfig.puppeteerOptions, browser: this._browser });
|
||||
return new Promise((resolve, reject) => {
|
||||
extractor
|
||||
.execute(url, this._providerConfig.waitForSelector)
|
||||
.execute(url, this._providerConfig.waitForSelector, this._jobKey)
|
||||
.then(() => {
|
||||
const listings = extractor.parseResponseText(
|
||||
this._providerConfig.crawlContainer,
|
||||
|
||||
@@ -26,7 +26,7 @@ function parseId(shortenedLink) {
|
||||
|
||||
async function fetchDetails(listing, browser) {
|
||||
try {
|
||||
const html = await puppeteerExtractor(listing.link, null, { browser });
|
||||
const html = await puppeteerExtractor(listing.link, null, { browser, name: 'immobilienDe_details' });
|
||||
if (!html) return listing;
|
||||
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
@@ -16,7 +16,7 @@ let appliedBlackList = [];
|
||||
|
||||
async function fetchDetails(listing, browser) {
|
||||
try {
|
||||
const html = await puppeteerExtractor(listing.link, null, { browser });
|
||||
const html = await puppeteerExtractor(listing.link, null, { browser, name: 'immowelt_details' });
|
||||
if (!html) return listing;
|
||||
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
@@ -128,7 +128,7 @@ async function enrichListingFromDetails(listing, browser) {
|
||||
if (!absoluteLink) return listing;
|
||||
|
||||
try {
|
||||
const html = await puppeteerExtractor(absoluteLink, null, { browser });
|
||||
const html = await puppeteerExtractor(absoluteLink, null, { browser, name: 'kleinanzeigen_details' });
|
||||
if (!html) return { ...listing, link: absoluteLink };
|
||||
|
||||
const { detailAddress, detailDescription } = extractDetailFromHtml(html);
|
||||
|
||||
@@ -16,7 +16,7 @@ let appliedBlackList = [];
|
||||
|
||||
async function fetchDetails(listing, browser) {
|
||||
try {
|
||||
const html = await puppeteerExtractor(listing.link, 'body', { browser });
|
||||
const html = await puppeteerExtractor(listing.link, 'body', { browser, name: 'sparkasse_details' });
|
||||
|
||||
const $ = cheerio.load(html);
|
||||
const nextDataRaw = $('#__NEXT_DATA__').text;
|
||||
|
||||
@@ -16,7 +16,7 @@ let appliedBlackList = [];
|
||||
|
||||
async function fetchDetails(listing, browser) {
|
||||
try {
|
||||
const html = await puppeteerExtractor(listing.link, null, { browser });
|
||||
const html = await puppeteerExtractor(listing.link, null, { browser, name: 'wgGesucht_details' });
|
||||
if (!html) return listing;
|
||||
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
@@ -29,11 +29,12 @@ export default class Extractor {
|
||||
* your response will never contain what you are really looking for
|
||||
* @param url
|
||||
* @param waitForSelector
|
||||
* @param jobKey
|
||||
*/
|
||||
execute = async (url, waitForSelector = null) => {
|
||||
execute = async (url, waitForSelector = null, jobKey = null) => {
|
||||
this.responseText = null;
|
||||
try {
|
||||
this.responseText = await puppeteerExtractor(url, waitForSelector, this.options);
|
||||
this.responseText = await puppeteerExtractor(url, waitForSelector, { ...this.options, name: jobKey });
|
||||
if (this.responseText != null) {
|
||||
loadParser(this.responseText);
|
||||
}
|
||||
|
||||
@@ -148,7 +148,11 @@ export default async function execute(url, waitForSelector, options) {
|
||||
if (botDetected(pageSource, statusCode)) {
|
||||
logger.warn('We have been detected as a bot :-/ Tried url: => ', url);
|
||||
|
||||
await trackPoi(TRACKING_POIS.DETECTED_AS_BOT);
|
||||
if (options != null && options.name != null) {
|
||||
await trackPoi(TRACKING_POIS.DETECTED_AS_BOT + '_' + options.name);
|
||||
} else {
|
||||
await trackPoi(TRACKING_POIS.DETECTED_AS_BOT);
|
||||
}
|
||||
|
||||
result = null;
|
||||
} else {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "fredy",
|
||||
"version": "22.0.5",
|
||||
"version": "22.0.6",
|
||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||
"scripts": {
|
||||
"prepare": "husky",
|
||||
|
||||
@@ -95,7 +95,10 @@ async function downloadHtmlProvider(name, providerConfig, launchBrowser, closeBr
|
||||
|
||||
const browser = await launchBrowser(providerConfig.url, {});
|
||||
try {
|
||||
const html = await puppeteerExtractor(providerConfig.url, providerConfig.waitForSelector, { browser });
|
||||
const html = await puppeteerExtractor(providerConfig.url, providerConfig.waitForSelector, {
|
||||
browser,
|
||||
name: 'dowload_fixtures',
|
||||
});
|
||||
|
||||
if (!html) {
|
||||
console.warn(` Failed to download ${name}`);
|
||||
|
||||
Reference in New Issue
Block a user