Scrapingant proxies (#59)

* preparing scraping ant proxies

* adding general settings for scraping ant proxy

* retrying with new ui settings
This commit is contained in:
Christian Kellner
2022-06-13 08:10:30 +02:00
committed by GitHub
parent a4501007ff
commit 2062aa11a3
5 changed files with 64 additions and 25 deletions

View File

@@ -1,16 +1,21 @@
const axios = require('axios');
const config = require('../../conf/config.json');
const { makeUrlResidential } = require('./scrapingAnt');
//if ScrapingAnt got blocked, this http status is returned
const BLOCKED_HTTP_STATUS = 423;
const MAX_RETRIES_SCRAPING_ANT = 3;
const NOT_FOUND_HTTP_STATUS = 404;
const MAX_RETRIES_SCRAPING_ANT = 10;
const EXPECTED_STATUS_CODES = [BLOCKED_HTTP_STATUS, NOT_FOUND_HTTP_STATUS];
function makeDriver(headers = {}) {
let cookies = '';
async function scrapingAntDriver(context, callback, tryResidentialProxy, retryCounter = 0) {
async function scrapingAntDriver(context, callback, retryCounter = 0) {
const proxyType = config.scrapingAnt?.proxy || 'datacenter';
try {
const url = context.url;
const url = proxyType === 'residential' ? makeUrlResidential(context.url) : context.url;
const result = await axios({
url,
headers: {
@@ -26,27 +31,16 @@ function makeDriver(headers = {}) {
callback(null, result.data.content);
} catch (exception) {
/* eslint-disable no-console */
if (exception.response?.status !== BLOCKED_HTTP_STATUS) {
if (!EXPECTED_STATUS_CODES.includes(exception.response?.status)) {
console.error(`Error while trying to scrape data from scraping ant. Received error: ${exception.message}`);
callback(null, []);
return;
}
if (!tryResidentialProxy) {
console.debug('ScrapingAnt got blocked out. Retrying with residential Proxy...');
await scrapingAntDriver({ ...context, url: makeUrlResidential(context.url) }, callback, true, 0);
} else if (retryCounter <= MAX_RETRIES_SCRAPING_ANT) {
if (retryCounter <= MAX_RETRIES_SCRAPING_ANT) {
retryCounter++;
console.debug(`ScrapingAnt still got blocked retry ${retryCounter} / ${MAX_RETRIES_SCRAPING_ANT}`);
await scrapingAntDriver(
{
...context,
url: makeUrlResidential(context.url),
},
callback,
true,
retryCounter
);
console.debug(`ScrapingAnt got blocked. Retrying ${retryCounter} / ${MAX_RETRIES_SCRAPING_ANT}`);
await scrapingAntDriver(context, callback, retryCounter);
} else {
console.error(`Error while trying to scrape data from scraping ant. Received error: ${exception.message}`);
callback(null, []);

View File

@@ -9,7 +9,7 @@ const isImmoscout = (id) => {
exports.transformUrlForScrapingAnt = (url, id) => {
if (isImmoscout(id)) {
//only do calls to scrapingAnt when dealing with Immoscout
url = `https://api.scrapingant.com/v1/general?url=${encodeURIComponent(url)}&proxy_type=residential`;
url = `https://api.scrapingant.com/v1/general?url=${encodeURIComponent(url)}&proxy_type=datacenter`;
}
return url;
};