diff --git a/conf/config.json b/conf/config.json
index 79dac47..6df9d31 100755
--- a/conf/config.json
+++ b/conf/config.json
@@ -1 +1 @@
-{"interval":"60","port":9998,"scrapingAnt":{"apiKey":""},"workingHours":{"from":"","to":""}}
+{"interval":"60","port":9998,"scrapingAnt":{"apiKey":"","proxy":"datacenter"},"workingHours":{"from":"","to":""}}
\ No newline at end of file
diff --git a/lib/services/requestDriver.js b/lib/services/requestDriver.js
index f918901..dfff63c 100644
--- a/lib/services/requestDriver.js
+++ b/lib/services/requestDriver.js
@@ -1,16 +1,21 @@
const axios = require('axios');
+const config = require('../../conf/config.json');
const { makeUrlResidential } = require('./scrapingAnt');
//if ScrapingAnt got blocked, this http status is returned
const BLOCKED_HTTP_STATUS = 423;
-const MAX_RETRIES_SCRAPING_ANT = 3;
+const NOT_FOUND_HTTP_STATUS = 404;
+const MAX_RETRIES_SCRAPING_ANT = 10;
+const EXPECTED_STATUS_CODES = [BLOCKED_HTTP_STATUS, NOT_FOUND_HTTP_STATUS];
function makeDriver(headers = {}) {
let cookies = '';
- async function scrapingAntDriver(context, callback, tryResidentialProxy, retryCounter = 0) {
+ async function scrapingAntDriver(context, callback, retryCounter = 0) {
+ const proxyType = config.scrapingAnt?.proxy || 'datacenter';
+
try {
- const url = context.url;
+ const url = proxyType === 'residential' ? makeUrlResidential(context.url) : context.url;
const result = await axios({
url,
headers: {
@@ -26,27 +31,16 @@ function makeDriver(headers = {}) {
callback(null, result.data.content);
} catch (exception) {
/* eslint-disable no-console */
- if (exception.response?.status !== BLOCKED_HTTP_STATUS) {
+ if (!EXPECTED_STATUS_CODES.includes(exception.response?.status)) {
console.error(`Error while trying to scrape data from scraping ant. Received error: ${exception.message}`);
callback(null, []);
return;
}
- if (!tryResidentialProxy) {
- console.debug('ScrapingAnt got blocked out. Retrying with residential Proxy...');
- await scrapingAntDriver({ ...context, url: makeUrlResidential(context.url) }, callback, true, 0);
- } else if (retryCounter <= MAX_RETRIES_SCRAPING_ANT) {
+ if (retryCounter <= MAX_RETRIES_SCRAPING_ANT) {
retryCounter++;
- console.debug(`ScrapingAnt still got blocked retry ${retryCounter} / ${MAX_RETRIES_SCRAPING_ANT}`);
- await scrapingAntDriver(
- {
- ...context,
- url: makeUrlResidential(context.url),
- },
- callback,
- true,
- retryCounter
- );
+ console.debug(`ScrapingAnt got blocked. Retrying ${retryCounter} / ${MAX_RETRIES_SCRAPING_ANT}`);
+ await scrapingAntDriver(context, callback, retryCounter);
} else {
console.error(`Error while trying to scrape data from scraping ant. Received error: ${exception.message}`);
callback(null, []);
diff --git a/lib/services/scrapingAnt.js b/lib/services/scrapingAnt.js
index be776aa..bdd5a82 100644
--- a/lib/services/scrapingAnt.js
+++ b/lib/services/scrapingAnt.js
@@ -9,7 +9,7 @@ const isImmoscout = (id) => {
exports.transformUrlForScrapingAnt = (url, id) => {
if (isImmoscout(id)) {
//only do calls to scrapingAnt when dealing with Immoscout
- url = `https://api.scrapingant.com/v1/general?url=${encodeURIComponent(url)}&proxy_type=residential`;
+ url = `https://api.scrapingant.com/v1/general?url=${encodeURIComponent(url)}&proxy_type=datacenter`;
}
return url;
};
diff --git a/package.json b/package.json
index 57b691e..6aa5103 100755
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "fredy",
- "version": "5.6.1",
+ "version": "5.7.0",
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
"scripts": {
"start": "node index.js",
@@ -11,7 +11,7 @@
"prod:win32": "set BUILD_DEV='false' && webpack --node-env=production --config ./webpack.prod.js",
"prod:default": "export BUILD_DEV='false' && webpack --node-env=production --config ./webpack.prod.js",
"format": "prettier --write lib/**/*.js ui/src/**/*.js test/**/*.js *.js --single-quote --print-width 120",
- "test": "mocha --timeout 20000 test/**/*.test.js",
+ "test": "mocha --timeout 3000000 test/**/*.test.js",
"lint": "eslint ./index.js ./lib/**/*.js ./test/**/*.js"
},
"husky": {
diff --git a/ui/src/views/generalSettings/GeneralSettings.js b/ui/src/views/generalSettings/GeneralSettings.js
index 972a8ac..03dcb88 100644
--- a/ui/src/views/generalSettings/GeneralSettings.js
+++ b/ui/src/views/generalSettings/GeneralSettings.js
@@ -2,7 +2,7 @@ import React from 'react';
import { useDispatch, useSelector } from 'react-redux';
-import { Button, Form, Icon, Message, Segment } from 'semantic-ui-react';
+import { Button, Form, Icon, Message, Segment, Radio } from 'semantic-ui-react';
import ToastContext from '../../components/toasts/ToastContext';
import Headline from '../../components/headline/Headline';
import { xhrPost } from '../../services/xhr';
@@ -18,6 +18,7 @@ const GeneralSettings = function Users() {
const [interval, setInterval] = React.useState('');
const [port, setPort] = React.useState('');
const [scrapingAntApiKey, setScrapingAntApiKey] = React.useState('');
+ const [scrapingAntProxy, setScrapingAntProxy] = React.useState('');
const [workingHourFrom, setWorkingHourFrom] = React.useState(null);
const [workingHourTo, setWorkingHourTo] = React.useState(null);
const ctx = React.useContext(ToastContext);
@@ -33,6 +34,7 @@ const GeneralSettings = function Users() {
setScrapingAntApiKey(settings?.scrapingAnt?.apiKey);
setWorkingHourFrom(settings?.workingHours?.from);
setWorkingHourTo(settings?.workingHours?.to);
+ setScrapingAntProxy(settings?.scrapingAnt?.proxy || 'datacenter');
}, [settings]);
const nullOrEmpty = (val) => val == null || val.length === 0;
@@ -69,6 +71,7 @@ const GeneralSettings = function Users() {
port,
scrapingAnt: {
apiKey: scrapingAntApiKey,
+ proxy: scrapingAntProxy,
},
workingHours: {
from: workingHourFrom,
@@ -144,6 +147,48 @@ const GeneralSettings = function Users() {
/>
+
+
+ ScrapingAnt is needed to scrape Immoscout. ScrapingAnt itself is using 2 different types of proxies.{' '}
+
+ Datacenter-Proxy
+ Proxy server located in one of the datacenters across the world. Datacenter proxies are slower and more
+ likely to fail, but they are cheaper. A call with a datacenter proxy cost 10 credits.
+ Residential-Proxy
+ High-quality proxy server located in one of the real people houses across the world. Datacenter proxies
+ are faster and more likely to success, but they are more expensive. A call with a datacenter proxy cost
+ 250 credits.
+
+
+
+ On the free tier, you have 10.000 credits, so chose your option wisely. Keep in mind, only successful
+ calls will be charged.
+
+
+
+ setScrapingAntProxy(value)}
+ />
+
+
+ setScrapingAntProxy(value)}
+ />
+
+
+
until