mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
265ea58bab | ||
|
|
ab5ee59d72 | ||
|
|
2062aa11a3 |
@@ -2,6 +2,12 @@ Newer release changelog see https://github.com/orangecoding/fredy/releases
|
||||
|
||||
------------
|
||||
|
||||
###### [V5.5.0]
|
||||
- Upgrading dependencies
|
||||
- fixing provider
|
||||
- allow multiple instances of 1 provider
|
||||
- __BREAKING__: Minimum node version is now 16
|
||||
|
||||
###### [V5.4.6]
|
||||
- Adding Instana node.js monitoring
|
||||
-
|
||||
|
||||
@@ -1 +1 @@
|
||||
{"interval":"60","port":9998,"scrapingAnt":{"apiKey":""},"workingHours":{"from":"","to":""}}
|
||||
{"interval":"60","port":9998,"scrapingAnt":{"apiKey":"","proxy":"datacenter"},"workingHours":{"from":"","to":""}}
|
||||
33
index.js
33
index.js
@@ -31,33 +31,20 @@ setInterval(
|
||||
|
||||
if (isDuringWorkingHoursOrNotSet) {
|
||||
config.lastRun = Date.now();
|
||||
const fetchedProvider = provider
|
||||
.filter((provider) => provider.endsWith('.js'))
|
||||
.map((pro) => require(`${path}/${pro}`));
|
||||
|
||||
jobStorage
|
||||
.getJobs()
|
||||
.filter((job) => job.enabled)
|
||||
.forEach((job) => {
|
||||
const providerIds = job.provider.map((provider) => provider.id);
|
||||
|
||||
provider
|
||||
.filter((provider) => provider.endsWith('.js'))
|
||||
.map((pro) => require(`${path}/${pro}`))
|
||||
.filter((provider) => providerIds.indexOf(provider.metaInformation.id) !== -1)
|
||||
.forEach(async (pro) => {
|
||||
const providerId = pro.metaInformation.id;
|
||||
if (providerId == null || providerId.length === 0) {
|
||||
throw new Error('Provider id must not be empty. => ' + pro);
|
||||
}
|
||||
const providerConfig = job.provider.find((jobProvider) => jobProvider.id === providerId);
|
||||
if (providerConfig == null) {
|
||||
throw new Error(`Provider Config for provider with id ${providerId} not found.`);
|
||||
}
|
||||
pro.init(providerConfig, job.blacklist);
|
||||
await new FredyRuntime(
|
||||
pro.config,
|
||||
job.notificationAdapter,
|
||||
providerId,
|
||||
job.id,
|
||||
similarityCache
|
||||
).execute();
|
||||
job.provider
|
||||
.filter((p) => fetchedProvider.find((fp) => fp.metaInformation.id === p.id) != null)
|
||||
.forEach(async (prov) => {
|
||||
const pro = fetchedProvider.find((fp) => fp.metaInformation.id === prov.id);
|
||||
pro.init(prov, job.blacklist);
|
||||
await new FredyRuntime(pro.config, job.notificationAdapter, prov.id, job.id, similarityCache).execute();
|
||||
setLastJobExecution(job.id);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,16 +1,21 @@
|
||||
const axios = require('axios');
|
||||
const config = require('../../conf/config.json');
|
||||
|
||||
const { makeUrlResidential } = require('./scrapingAnt');
|
||||
//if ScrapingAnt got blocked, this http status is returned
|
||||
const BLOCKED_HTTP_STATUS = 423;
|
||||
const MAX_RETRIES_SCRAPING_ANT = 3;
|
||||
const NOT_FOUND_HTTP_STATUS = 404;
|
||||
const MAX_RETRIES_SCRAPING_ANT = 10;
|
||||
const EXPECTED_STATUS_CODES = [BLOCKED_HTTP_STATUS, NOT_FOUND_HTTP_STATUS];
|
||||
|
||||
function makeDriver(headers = {}) {
|
||||
let cookies = '';
|
||||
|
||||
async function scrapingAntDriver(context, callback, tryResidentialProxy, retryCounter = 0) {
|
||||
async function scrapingAntDriver(context, callback, retryCounter = 0) {
|
||||
const proxyType = config.scrapingAnt?.proxy || 'datacenter';
|
||||
|
||||
try {
|
||||
const url = context.url;
|
||||
const url = proxyType === 'residential' ? makeUrlResidential(context.url) : context.url;
|
||||
const result = await axios({
|
||||
url,
|
||||
headers: {
|
||||
@@ -26,27 +31,16 @@ function makeDriver(headers = {}) {
|
||||
callback(null, result.data.content);
|
||||
} catch (exception) {
|
||||
/* eslint-disable no-console */
|
||||
if (exception.response?.status !== BLOCKED_HTTP_STATUS) {
|
||||
if (!EXPECTED_STATUS_CODES.includes(exception.response?.status)) {
|
||||
console.error(`Error while trying to scrape data from scraping ant. Received error: ${exception.message}`);
|
||||
callback(null, []);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!tryResidentialProxy) {
|
||||
console.debug('ScrapingAnt got blocked out. Retrying with residential Proxy...');
|
||||
await scrapingAntDriver({ ...context, url: makeUrlResidential(context.url) }, callback, true, 0);
|
||||
} else if (retryCounter <= MAX_RETRIES_SCRAPING_ANT) {
|
||||
if (retryCounter <= MAX_RETRIES_SCRAPING_ANT) {
|
||||
retryCounter++;
|
||||
console.debug(`ScrapingAnt still got blocked retry ${retryCounter} / ${MAX_RETRIES_SCRAPING_ANT}`);
|
||||
await scrapingAntDriver(
|
||||
{
|
||||
...context,
|
||||
url: makeUrlResidential(context.url),
|
||||
},
|
||||
callback,
|
||||
true,
|
||||
retryCounter
|
||||
);
|
||||
console.debug(`ScrapingAnt got blocked. Retrying ${retryCounter} / ${MAX_RETRIES_SCRAPING_ANT}`);
|
||||
await scrapingAntDriver(context, callback, retryCounter);
|
||||
} else {
|
||||
console.error(`Error while trying to scrape data from scraping ant. Received error: ${exception.message}`);
|
||||
callback(null, []);
|
||||
|
||||
@@ -9,7 +9,7 @@ const isImmoscout = (id) => {
|
||||
exports.transformUrlForScrapingAnt = (url, id) => {
|
||||
if (isImmoscout(id)) {
|
||||
//only do calls to scrapingAnt when dealing with Immoscout
|
||||
url = `https://api.scrapingant.com/v1/general?url=${encodeURIComponent(url)}&proxy_type=residential`;
|
||||
url = `https://api.scrapingant.com/v1/general?url=${encodeURIComponent(url)}&proxy_type=datacenter`;
|
||||
}
|
||||
return url;
|
||||
};
|
||||
|
||||
44
package.json
44
package.json
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "fredy",
|
||||
"version": "5.6.1",
|
||||
"version": "5.8.0",
|
||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||
"scripts": {
|
||||
"start": "node index.js",
|
||||
@@ -11,7 +11,7 @@
|
||||
"prod:win32": "set BUILD_DEV='false' && webpack --node-env=production --config ./webpack.prod.js",
|
||||
"prod:default": "export BUILD_DEV='false' && webpack --node-env=production --config ./webpack.prod.js",
|
||||
"format": "prettier --write lib/**/*.js ui/src/**/*.js test/**/*.js *.js --single-quote --print-width 120",
|
||||
"test": "mocha --timeout 20000 test/**/*.test.js",
|
||||
"test": "mocha --timeout 3000000 test/**/*.test.js",
|
||||
"lint": "eslint ./index.js ./lib/**/*.js ./test/**/*.js"
|
||||
},
|
||||
"husky": {
|
||||
@@ -47,7 +47,7 @@
|
||||
},
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=14.0.0",
|
||||
"node": ">=16.0.0",
|
||||
"npm": ">=7.0.0"
|
||||
},
|
||||
"browserslist": [
|
||||
@@ -59,61 +59,61 @@
|
||||
"dependencies": {
|
||||
"@rematch/core": "2.2.0",
|
||||
"@rematch/loading": "2.1.2",
|
||||
"@sendgrid/mail": "7.6.2",
|
||||
"@sendgrid/mail": "7.7.0",
|
||||
"axios": "0.27.2",
|
||||
"better-sqlite3": "7.5.1",
|
||||
"body-parser": "1.20.0",
|
||||
"better-sqlite3": "8.0.1",
|
||||
"body-parser": "1.20.1",
|
||||
"cookie-session": "2.0.0",
|
||||
"handlebars": "4.7.7",
|
||||
"highcharts": "10.0.0",
|
||||
"highcharts": "10.3.2",
|
||||
"highcharts-react-official": "3.1.0",
|
||||
"lowdb": "1.0.0",
|
||||
"markdown": "^0.5.0",
|
||||
"nanoid": "3.3.3",
|
||||
"node-mailjet": "3.3.13",
|
||||
"query-string": "7.1.1",
|
||||
"query-string": "7.1.3",
|
||||
"react": "17.0.2",
|
||||
"react-dom": "17.0.2",
|
||||
"react-redux": "8.0.1",
|
||||
"react-redux": "8.0.5",
|
||||
"react-router": "5.2.1",
|
||||
"react-router-dom": "5.3.0",
|
||||
"react-switch": "^6.0.0",
|
||||
"react-switch": "6.0.0",
|
||||
"redux": "4.2.0",
|
||||
"redux-thunk": "2.4.1",
|
||||
"restana": "4.9.4",
|
||||
"semantic-ui-react": "2.1.2",
|
||||
"redux-thunk": "2.4.2",
|
||||
"restana": "4.9.7",
|
||||
"semantic-ui-react": "2.1.4",
|
||||
"serve-static": "1.15.0",
|
||||
"slack": "11.0.2",
|
||||
"string-similarity": "^4.0.4",
|
||||
"x-ray": "2.3.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/core": "7.17.9",
|
||||
"@babel/preset-env": "7.16.11",
|
||||
"@babel/preset-react": "7.16.7",
|
||||
"@babel/core": "7.20.5",
|
||||
"@babel/preset-env": "7.20.2",
|
||||
"@babel/preset-react": "7.18.6",
|
||||
"babel-eslint": "10.1.0",
|
||||
"babel-loader": "8.2.5",
|
||||
"chai": "4.3.6",
|
||||
"chai": "4.3.7",
|
||||
"clean-webpack-plugin": "4.0.0",
|
||||
"copy-webpack-plugin": "10.2.4",
|
||||
"css-loader": "6.7.1",
|
||||
"css-loader": "6.7.2",
|
||||
"eslint": "7.32.0",
|
||||
"eslint-config-prettier": "8.5.0",
|
||||
"eslint-plugin-react": "7.29.4",
|
||||
"eslint-plugin-react": "7.31.11",
|
||||
"file-loader": "6.2.0",
|
||||
"history": "5.3.0",
|
||||
"husky": "4.3.8",
|
||||
"less": "4.1.2",
|
||||
"less": "4.1.3",
|
||||
"less-loader": "10.2.0",
|
||||
"lint-staged": "12.4.1",
|
||||
"mocha": "9.2.2",
|
||||
"prettier": "2.6.2",
|
||||
"prettier": "2.8.1",
|
||||
"proxyquire": "2.1.3",
|
||||
"redux-logger": "3.0.6",
|
||||
"run-script-os": "^1.1.6",
|
||||
"style-loader": "3.3.1",
|
||||
"url-loader": "4.1.1",
|
||||
"webpack": "5.72.0",
|
||||
"webpack": "5.75.0",
|
||||
"webpack-cli": "4.9.2",
|
||||
"webpack-dev-server": "3.11.2",
|
||||
"webpack-merge": "5.8.0"
|
||||
|
||||
@@ -22,7 +22,7 @@ describe('#einsAImmobilien testsuite()', () => {
|
||||
|
||||
it('should test einsAImmobilien provider', async () => {
|
||||
return await new Promise((resolve) => {
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'test1', similarityCache);
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'einsAImmobilien', similarityCache);
|
||||
fredy.execute().then((listings) => {
|
||||
expect(listings).to.be.a('array');
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ describe('#immonet testsuite()', () => {
|
||||
|
||||
it('should test immonet provider', async () => {
|
||||
return await new Promise((resolve) => {
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'test1', similarityCache);
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'immonet', similarityCache);
|
||||
fredy.execute().then((listing) => {
|
||||
expect(listing).to.be.a('array');
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ describe('#immoscout testsuite()', () => {
|
||||
return;
|
||||
}
|
||||
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'test1', similarityCache);
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'immoscout', similarityCache);
|
||||
fredy.execute().then((listing) => {
|
||||
expect(listing).to.be.a('array');
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ describe('#immoswp testsuite()', () => {
|
||||
|
||||
it('should test immoswp provider', async () => {
|
||||
return await new Promise((resolve) => {
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'test1', similarityCache);
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'immoswp', similarityCache);
|
||||
fredy.execute().then((listing) => {
|
||||
expect(listing).to.be.a('array');
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ describe('#immowelt testsuite()', () => {
|
||||
});
|
||||
|
||||
return await new Promise((resolve) => {
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'test1', similarityCache);
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'immowelt', similarityCache);
|
||||
fredy.execute().then((listing) => {
|
||||
expect(listing).to.be.a('array');
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ describe('#kleinanzeigen testsuite()', () => {
|
||||
});
|
||||
|
||||
return await new Promise((resolve) => {
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'test1', similarityCache);
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'kleinanzeigen', similarityCache);
|
||||
fredy.execute().then((listing) => {
|
||||
expect(listing).to.be.a('array');
|
||||
|
||||
@@ -31,10 +31,8 @@ describe('#kleinanzeigen testsuite()', () => {
|
||||
notificationObj.payload.forEach((notify) => {
|
||||
/** check the actual structure **/
|
||||
expect(notify.id).to.be.a('number');
|
||||
expect(notify.size).to.be.a('string');
|
||||
expect(notify.title).to.be.a('string');
|
||||
expect(notify.link).to.be.a('string');
|
||||
expect(notify.price).to.be.a('string');
|
||||
expect(notify.address).to.be.a('string');
|
||||
|
||||
/** check the values if possible **/
|
||||
|
||||
@@ -20,7 +20,7 @@ describe('#neubauKompass testsuite()', () => {
|
||||
|
||||
it('should test neubauKompass provider', async () => {
|
||||
return await new Promise((resolve) => {
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'test1', similarityCache);
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'neubauKompass', similarityCache);
|
||||
fredy.execute().then((listing) => {
|
||||
expect(listing).to.be.a('array');
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ describe('#wgGesucht testsuite()', () => {
|
||||
|
||||
it('should test wgGesucht provider', async () => {
|
||||
return await new Promise((resolve) => {
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'test1', similarityCache);
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'wgGesucht', similarityCache);
|
||||
fredy.execute().then((listing) => {
|
||||
expect(listing).to.be.a('array');
|
||||
const notificationObj = mockNotification.get();
|
||||
|
||||
@@ -2,7 +2,7 @@ import React from 'react';
|
||||
|
||||
import { useDispatch, useSelector } from 'react-redux';
|
||||
|
||||
import { Button, Form, Icon, Message, Segment } from 'semantic-ui-react';
|
||||
import { Button, Form, Icon, Message, Segment, Radio } from 'semantic-ui-react';
|
||||
import ToastContext from '../../components/toasts/ToastContext';
|
||||
import Headline from '../../components/headline/Headline';
|
||||
import { xhrPost } from '../../services/xhr';
|
||||
@@ -18,6 +18,7 @@ const GeneralSettings = function Users() {
|
||||
const [interval, setInterval] = React.useState('');
|
||||
const [port, setPort] = React.useState('');
|
||||
const [scrapingAntApiKey, setScrapingAntApiKey] = React.useState('');
|
||||
const [scrapingAntProxy, setScrapingAntProxy] = React.useState('');
|
||||
const [workingHourFrom, setWorkingHourFrom] = React.useState(null);
|
||||
const [workingHourTo, setWorkingHourTo] = React.useState(null);
|
||||
const ctx = React.useContext(ToastContext);
|
||||
@@ -33,6 +34,7 @@ const GeneralSettings = function Users() {
|
||||
setScrapingAntApiKey(settings?.scrapingAnt?.apiKey);
|
||||
setWorkingHourFrom(settings?.workingHours?.from);
|
||||
setWorkingHourTo(settings?.workingHours?.to);
|
||||
setScrapingAntProxy(settings?.scrapingAnt?.proxy || 'datacenter');
|
||||
}, [settings]);
|
||||
|
||||
const nullOrEmpty = (val) => val == null || val.length === 0;
|
||||
@@ -69,6 +71,7 @@ const GeneralSettings = function Users() {
|
||||
port,
|
||||
scrapingAnt: {
|
||||
apiKey: scrapingAntApiKey,
|
||||
proxy: scrapingAntProxy,
|
||||
},
|
||||
workingHours: {
|
||||
from: workingHourFrom,
|
||||
@@ -144,6 +147,48 @@ const GeneralSettings = function Users() {
|
||||
/>
|
||||
</SegmentPart>
|
||||
|
||||
<SegmentPart
|
||||
name="ScrapingAnt proxy settings"
|
||||
helpText="Scraping ant provides different proxies."
|
||||
icon="key"
|
||||
>
|
||||
<Message info>
|
||||
ScrapingAnt is needed to scrape Immoscout. ScrapingAnt itself is using 2 different types of proxies.{' '}
|
||||
<br />
|
||||
<h4>Datacenter-Proxy</h4>
|
||||
Proxy server located in one of the datacenters across the world. Datacenter proxies are slower and more
|
||||
likely to fail, but they are cheaper. A call with a datacenter proxy cost 10 credits.
|
||||
<h4>Residential-Proxy</h4>
|
||||
High-quality proxy server located in one of the real people houses across the world. Datacenter proxies
|
||||
are faster and more likely to success, but they are more expensive. A call with a datacenter proxy cost
|
||||
250 credits.
|
||||
<br />
|
||||
<br />
|
||||
<b>
|
||||
On the free tier, you have 10.000 credits, so chose your option wisely. Keep in mind, only successful
|
||||
calls will be charged.
|
||||
</b>
|
||||
</Message>
|
||||
<Form.Field>
|
||||
<Radio
|
||||
label="Datacenter proxy"
|
||||
name="scrapingAntProxy"
|
||||
value="datacenter"
|
||||
checked={scrapingAntProxy === 'datacenter'}
|
||||
onChange={(e, { value }) => setScrapingAntProxy(value)}
|
||||
/>
|
||||
</Form.Field>
|
||||
<Form.Field>
|
||||
<Radio
|
||||
label="Residential proxy"
|
||||
name="scrapingAntProxy"
|
||||
value="residential"
|
||||
checked={scrapingAntProxy === 'residential'}
|
||||
onChange={(e, { value }) => setScrapingAntProxy(value)}
|
||||
/>
|
||||
</Form.Field>
|
||||
</SegmentPart>
|
||||
|
||||
<SegmentPart
|
||||
name="Working hours"
|
||||
helpText="During this hours, Fredy will search for new apartments. If nothing is configured, Fredy will search around the clock."
|
||||
@@ -153,7 +198,7 @@ const GeneralSettings = function Users() {
|
||||
<Form.Input
|
||||
className="generalSettings__time"
|
||||
type="time"
|
||||
placeholder="ScrapingAnt Api Key"
|
||||
placeholder="Working hours from"
|
||||
inverted
|
||||
size="mini"
|
||||
width={2}
|
||||
@@ -163,7 +208,7 @@ const GeneralSettings = function Users() {
|
||||
<div className="generalSettings__until">until</div>
|
||||
<Form.Input
|
||||
type="time"
|
||||
placeholder="ScrapingAnt Api Key"
|
||||
placeholder="Working hours to"
|
||||
inverted
|
||||
size="mini"
|
||||
width={2}
|
||||
|
||||
@@ -81,7 +81,6 @@ export default function JobMutator() {
|
||||
<ProviderMutator
|
||||
visible={providerCreationVisible}
|
||||
onVisibilityChanged={(visible) => setProviderCreationVisibility(visible)}
|
||||
selected={providerData}
|
||||
onData={(data) => {
|
||||
setProviderData([...providerData, data]);
|
||||
}}
|
||||
|
||||
@@ -16,7 +16,7 @@ const sortProvider = (a, b) => {
|
||||
return 0;
|
||||
};
|
||||
|
||||
export default function ProviderMutator({ onVisibilityChanged, visible = false, selected = [], onData } = {}) {
|
||||
export default function ProviderMutator({ onVisibilityChanged, visible = false, onData } = {}) {
|
||||
const provider = useSelector((state) => state.provider);
|
||||
const [selectedProvider, setSelectedProvider] = useState(null);
|
||||
const [providerUrl, setProviderUrl] = useState(null);
|
||||
@@ -107,8 +107,6 @@ export default function ProviderMutator({ onVisibilityChanged, visible = false,
|
||||
text: pro.name,
|
||||
};
|
||||
})
|
||||
//filter out those, that have already been selected
|
||||
.filter((option) => selected.find((selectedOption) => selectedOption.id === option.key) == null)
|
||||
.sort(sortProvider)}
|
||||
onChange={(e, { value }) => {
|
||||
const selectedProvider = provider.find((pro) => pro.id === value);
|
||||
|
||||
Reference in New Issue
Block a user