diff --git a/README.md b/README.md
index 968820d..c57164f 100755
--- a/README.md
+++ b/README.md
@@ -17,6 +17,15 @@ yarn run start
```
_Fredy_ will start with the default port, set to `9998`. You can access _Fredy_ by opening a browser `http://localhost:9998`. The default login is `admin` for username and password. (You should change the password asap when you plan to run Fredy on your server.)
+## Immoscout
+I have added **EXPERIMENTAL** support for Immoscout. Immoscout is somewhat special, coz they have decided to secure their service from bots using Re-Capture. Finding a way around this is barely possible. For _Fredy_ to be able to bypass the check, I'm using a service called [ScrapingAnt](https://scrapingant.com/). The trick is to use a headless browser, rotating proxies and (once successful validated) re-send the cookies each time.
+
+To be able to use Immoscout, you need to create an account and copy the apiKey into the config file under /conf/config.json.
+The rest should be done by _Fredy_. Keep in mind, the support is experimental. There might be bugs and you might not always get pass the re-capture check, but most of the time it works pretty good :)
+
+If you need more that the 1000 api calls you can do per month, I'd suggest opting for a paid account... (No I don't get any money for recommending good services)
+
+
## Understanding the fundamentals
There are 3 important parts in Fredy, that you need to understand to leverage the full power of _Fredy_.
@@ -57,9 +66,6 @@ yarn run test
# Architecture

-## Why is Immoscout missing
-Immoscout decided to add "robot protection" to their service. Meaning if Fredy tries to check for listings, it will be recognized as a bot. I haven't found a way around it (yet) ;)
-
#### Contribution guidelines
See [Contribution](https://github.com/orangecoding/fredy/blob/master/CONTRIBUTION.md)
diff --git a/conf/config.json b/conf/config.json
index 53d66d8..bf23b83 100755
--- a/conf/config.json
+++ b/conf/config.json
@@ -1,4 +1,7 @@
{
"interval": 30,
- "port": 9998
+ "port": 9998,
+ "scrapingAnt": {
+ "apiKey": ""
+ }
}
diff --git a/lib/FredyRuntime.js b/lib/FredyRuntime.js
index b7b45c1..89d1d7b 100755
--- a/lib/FredyRuntime.js
+++ b/lib/FredyRuntime.js
@@ -3,6 +3,7 @@ const { setKnownListings, getKnownListings } = require('./services/storage/listi
const notify = require('./notification/notify');
const xray = require('./services/scraper');
+const scrapingAnt = require('./services/scrapingAnt');
class FredyRuntime {
/**
@@ -41,15 +42,24 @@ class FredyRuntime {
_getListings(url) {
return new Promise((resolve, reject) => {
- let x = xray(url, this._providerConfig.crawlContainer, [this._providerConfig.crawlFields]);
-
- x((err, listings) => {
- if (err) {
+ const id = this._providerId;
+ if (scrapingAnt.isImmoscout(id) && !scrapingAnt.isScrapingAntApiKeySet()) {
+ const error = 'Immoscout can only be used with if you have set an apikey for scrapingAnt.';
+ /* eslint-disable no-console */
+ console.log(error);
+ /* eslint-enable no-console */
+ reject(error);
+ return;
+ }
+ const u = scrapingAnt.isImmoscout(id) ? scrapingAnt.transformUrlForScrapingAnt(url, id) : url;
+ xray(u, this._providerConfig.crawlContainer, [this._providerConfig.crawlFields])
+ .then((listings) => {
+ resolve(listings == null ? [] : listings);
+ })
+ .catch((err) => {
reject(err);
- } else {
- resolve(listings);
- }
- });
+ console.error(err);
+ });
});
}
diff --git a/lib/provider/immoscout.js b/lib/provider/immoscout.js
new file mode 100644
index 0000000..ddf8364
--- /dev/null
+++ b/lib/provider/immoscout.js
@@ -0,0 +1,44 @@
+const utils = require('../utils');
+
+let appliedBlackList = [];
+
+function normalize(o) {
+ const title = o.title.replace('NEU', '');
+ const address = (o.address || '').replace(/\(.*\),.*$/, '').trim();
+ const link = `https://www.immobilienscout24.de${o.link.substring(o.link.indexOf('/expose'))}`;
+ return Object.assign(o, { title, address, link });
+}
+
+function applyBlacklist(o) {
+ return !utils.isOneOf(o.title, appliedBlackList);
+}
+
+const config = {
+ url: null,
+ crawlContainer: '#resultListItems li.result-list__listing',
+ crawlFields: {
+ id: '.result-list-entry@data-obid | int',
+ price: '.result-list-entry .result-list-entry__criteria .grid-item:first-child dd | removeNewline | trim',
+ size: '.result-list-entry .result-list-entry__criteria .grid-item:nth-child(2) dd | removeNewline | trim',
+ title: '.result-list-entry .result-list-entry__brand-title-container h5 | removeNewline | trim',
+ link: '.result-list-entry .result-list-entry__brand-title-container@href',
+ address: '.result-list-entry .result-list-entry__map-link',
+ },
+ paginate: '#pager .align-right a@href',
+ normalize: normalize,
+ filter: applyBlacklist,
+};
+
+exports.init = (sourceConfig, blacklist) => {
+ config.enabled = sourceConfig.enabled;
+ config.url = sourceConfig.url;
+ appliedBlackList = blacklist || [];
+};
+
+exports.metaInformation = {
+ name: 'Immoscout',
+ baseUrl: 'https://www.immobilienscout24.de/',
+ id: __filename.slice(__dirname.length + 1, -3),
+};
+
+exports.config = config;
diff --git a/lib/services/requestDriver.js b/lib/services/requestDriver.js
new file mode 100644
index 0000000..39d8ebd
--- /dev/null
+++ b/lib/services/requestDriver.js
@@ -0,0 +1,33 @@
+const axios = require('axios');
+
+function makeDriver(headers = {}) {
+ let cookies = '';
+
+ return async function driver(context, callback) {
+ const url = context.url;
+ let result;
+ try {
+ result = await axios({
+ url,
+ headers: {
+ ...headers,
+ Cookie: cookies,
+ },
+ });
+ } catch (exception) {
+ callback(exception, null);
+ }
+
+ if (typeof result.data === 'object' && url.toLowerCase().indexOf('scrapingant') !== -1) {
+ //assume we have gotten a response from scrapingAnt
+ if (cookies.length === 0) {
+ cookies = result.data.cookies;
+ }
+ callback(null, result.data.content);
+ } else {
+ callback(null, result.data);
+ }
+ };
+}
+
+module.exports = makeDriver;
diff --git a/lib/services/scraper.js b/lib/services/scraper.js
index 9959111..a677699 100755
--- a/lib/services/scraper.js
+++ b/lib/services/scraper.js
@@ -1,4 +1,5 @@
-const makeDriver = require('request-x-ray');
+const config = require('../../conf/config.json');
+const makeDriver = require('./requestDriver');
const Xray = require('x-ray');
class Scraper {
@@ -9,14 +10,16 @@ class Scraper {
int: this._int,
};
- const driver = makeDriver({
- headers: {
- 'User-Agent':
- 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.85 Safari/537.36',
- cookie:
- 'longUnreliableState="dWlkcg==:YS1kZDViMzVhZWRhMTk0MDdmYWRjNDNkY2VmYTcxZmVkOQ=="; eveD=eyJldnRfZ2FfYWN0aW9uIjpbInNlYXJjaCJdLCJldnRfZ2FfY2F0ZWdvcnkiOlsicmVzdWx0bGlzdCJdLCJnZW9fYmxuIjpbIm5vcmRyaGVpbl93ZXN0ZmFsZW4iXSwiZXZ0X2dhX2xhYmVsIjpbImRpc3RyaWN0Il0sIm9ial9pdHlwIjpbIndvaG51bmdfa2F1ZiJdLCJnZW9fa3JzIjpbImTDvHNzZWxkb3JmIl0sImdlb19sYW5kIjpbImRldXRzY2hsYW5kIl0sIm9ial9yZXN1bHRsaXN0X2NvdW50IjpbIjI4NCJdLCJvYmpfY3Jvc3N0eXBlIjpbImxpdl9hcGFydG1lbnRfYnV5Il19; ABNTEST=9526230109; is24_experiment_visitor_id=d568590b-951b-45c3-b890-13feef6ee472; reese84=3:Xf3JwcTIC3yeubDXqWBTfg==:oqnDVs58wBxZRMfpzPnlzLzscVQhboRBffkM4caxNe+vLBdozdtdrCwpcTKyvIuhB9MOMCAinb2qnSTL4D9kLpqL72gl+jtl7QdiNAEn2erDKLqX4b9/K5wFU7j6qzxFWdfcMUm295qU3o3s7O8CM8HdghKYOVtoif+qTkeztphyYMfmAePYkfYRhZXZaFwHwxUfkRVUEX2VKoepkTf9TudCHsTYXWqvnpUt/CT+yrFHlUdTgdTWfD5tQJvn3inPqKERAB8TTKoHIvM4duBJV/5fZDax07CHNqHcKhrws0pq4y2ssKfdxLxCE0OIpnMSOtmn7O0koDoV6RzRjNUC+UZ7mhPFH+YSPHTb+6VJsZQDnRufEIz4B1WWIORV+jvHzfIli9OHsmOPnskA6mnCpFwEvQAfJu9R+jI9dccjFno=:Oc7c2wwYiNMBJnvZeDCIKLP0LuVVPWJ4kzd5MPlsoTg=',
- },
- });
+ const headers = {
+ 'User-Agent':
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.85 Safari/537.36',
+ useQueryString: true,
+ };
+
+ if (config.scrapingAnt != null && config.scrapingAnt.apiKey != null) {
+ headers['x-api-key'] = config.scrapingAnt.apiKey;
+ }
+ const driver = makeDriver(headers);
const xray = Xray({ filters });
xray.driver(driver);
diff --git a/lib/services/scrapingAnt.js b/lib/services/scrapingAnt.js
new file mode 100644
index 0000000..e13a7e4
--- /dev/null
+++ b/lib/services/scrapingAnt.js
@@ -0,0 +1,24 @@
+const { metaInformation } = require('../provider/immoscout');
+//to better confure re-capture chose a random proxy each time we do a call
+const proxies = ['ae', 'br', 'cn', 'de', 'es', 'fr', 'gb', 'hk', 'in', 'it', 'il', 'jp', 'nl', 'ru', 'sa', 'us', 'cz'];
+const config = require('../../conf/config.json');
+
+const isImmoscout = (id) => {
+ return id.toLowerCase() === metaInformation.id;
+};
+
+exports.transformUrlForScrapingAnt = (url, id) => {
+ const randomProxy = proxies[Math.floor(Math.random() * proxies.length)];
+
+ if (isImmoscout(id)) {
+ //only do calls to scrapingAnt when dealing with Immoscout
+ url = `https://api.scrapingant.com/v1/general?url=${encodeURIComponent(url)}&proxy_country=${randomProxy}`;
+ }
+ return url;
+};
+
+exports.isScrapingAntApiKeySet = () => {
+ return config.scrapingAnt != null && config.scrapingAnt.apiKey != null && config.scrapingAnt.apiKey.length > 0;
+};
+
+exports.isImmoscout = isImmoscout;
diff --git a/package.json b/package.json
index 1732525..09c4579 100755
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "fredy",
- "version": "3.0.0",
+ "version": "4.0.0",
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
"scripts": {
"start": "node index.js",
@@ -72,7 +72,6 @@
"react-switch": "^6.0.0",
"redux": "4.0.5",
"redux-thunk": "2.3.0",
- "request-x-ray": "0.1.4",
"restana": "4.8.1",
"semantic-ui-react": "2.0.3",
"serve-static": "^1.14.1",
diff --git a/test/provider/immoscout.test.js b/test/provider/immoscout.test.js
new file mode 100644
index 0000000..657f793
--- /dev/null
+++ b/test/provider/immoscout.test.js
@@ -0,0 +1,56 @@
+const mockNotification = require('../mocks/mockNotification');
+const providerConfig = require('./testProvider.json');
+const mockStore = require('../mocks/mockStore');
+const proxyquire = require('proxyquire').noCallThru();
+const expect = require('chai').expect;
+const provider = require('../../lib/provider/immoscout');
+const scrapingAnt = require('../../lib/services/scrapingAnt');
+
+describe('#immoscout testsuite()', () => {
+ provider.init(providerConfig.immoscout, [], []);
+ const Fredy = proxyquire('../../lib/FredyRuntime', {
+ './services/storage/listingsStorage': {
+ ...mockStore,
+ },
+ './notification/notify': mockNotification,
+ });
+
+ it('should test immoscout provider', async () => {
+ return await new Promise((resolve) => {
+ if (!scrapingAnt.isScrapingAntApiKeySet()) {
+ /* eslint-disable no-console */
+ console.info('Skipping Immoscout test as ScrapingAnt Api Key is not set.');
+ /* eslint-enable no-console */
+ resolve();
+ return;
+ }
+
+ const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'test1');
+ fredy.execute().then((listing) => {
+ expect(listing).to.be.a('array');
+
+ const notificationObj = mockNotification.get();
+ expect(notificationObj).to.be.a('object');
+ expect(notificationObj.serviceName).to.equal('immoscout');
+
+ notificationObj.payload.forEach((notify) => {
+ /** check the actual structure **/
+ expect(notify.id).to.be.a('number');
+ expect(notify.price).to.be.a('string');
+ expect(notify.size).to.be.a('string');
+ expect(notify.title).to.be.a('string');
+ expect(notify.link).to.be.a('string');
+ expect(notify.address).to.be.a('string');
+
+ /** check the values if possible **/
+ expect(notify.price).that.does.include('€');
+ expect(notify.size).that.does.include('m²');
+ expect(notify.title).to.be.not.empty;
+ expect(notify.link).that.does.include('https://www.immobilienscout24.de');
+ expect(notify.address).to.be.not.empty;
+ });
+ resolve();
+ });
+ });
+ });
+});
diff --git a/test/provider/testProvider.json b/test/provider/testProvider.json
index 84ee78c..4d4dbbe 100644
--- a/test/provider/testProvider.json
+++ b/test/provider/testProvider.json
@@ -12,6 +12,10 @@
"url": "https://www.immowelt.de/liste/duesseldorf-benrath/wohnungen/kaufen?geoid=10805111000004%2C10805111000005%2C10805111000006%2C10805111000007%2C10805111000009%2C10805111000010%2C10805111000011%2C10805111000013%2C10805111000014%2C10805111000015%2C10805111000016%2C10805111000017%2C10805111000018%2C10805111000019%2C10805111000023%2C10805111000024%2C10805111000027%2C10805111000032%2C10805111000034%2C10805111000035%2C10805111000039%2C10805111000041%2C10805111000042%2C10805111000043%2C10805111000047%2C10805111000048%2C10805111000049%2C10805111000051%2C10805111000052%2C10805111000053&roomi=3&prima=420000&wflmi=90&sort=createdate%2Bdesc",
"enabled": true
},
+ "immoscout": {
+ "url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?enteredFrom=one_step_search",
+ "enabled": true
+ },
"kalaydo": {
"url": "https://www.kalaydo.de/immobilien/eigentumswohnung-kaufen/o/duesseldorf/4/?attr_gt_estate_size_living_area=90.0&attr_gt_no_of_rooms=3.5&maxPrice=420000.00&radius=5&resultsPerPage=50&sorting=-date",
"enabled": true
diff --git a/ui/src/views/jobs/mutation/components/provider/ProviderMutator.js b/ui/src/views/jobs/mutation/components/provider/ProviderMutator.js
index de93e52..66233de 100644
--- a/ui/src/views/jobs/mutation/components/provider/ProviderMutator.js
+++ b/ui/src/views/jobs/mutation/components/provider/ProviderMutator.js
@@ -84,6 +84,11 @@ export default function ProviderMutator({ onVisibilityChanged, visible = false,
When the search results are shown on the website, copy the url and paste it into the textfield below.
+
+ If you chose Immoscout as a provider, make sure to also add the scrapingAnt apiKey to the config.json.
+ (See readme)
+
+
Do not forget to sort the results by date before copying the url to Fredy, so that Fredy always captures
the latest search results.
diff --git a/webpack.common.js b/webpack.common.js
index fcfc00b..0c17f64 100644
--- a/webpack.common.js
+++ b/webpack.common.js
@@ -18,6 +18,7 @@ module.exports = {
publicPath: '/',
filename: 'fredy.bundle.js',
},
+ performance: { hints: false },
module: {
rules: [
{
diff --git a/yarn.lock b/yarn.lock
index ac17694..c1cac5b 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -1895,6 +1895,15 @@ bcrypt-pbkdf@^1.0.0:
dependencies:
tweetnacl "^0.14.3"
+bent@^7.3.12:
+ version "7.3.12"
+ resolved "https://registry.yarnpkg.com/bent/-/bent-7.3.12.tgz#e0a2775d4425e7674c64b78b242af4f49da6b035"
+ integrity sha512-T3yrKnVGB63zRuoco/7Ybl7BwwGZR0lceoVG5XmQyMIH9s19SV5m+a8qam4if0zQuAmOQTyPTPmsQBdAorGK3w==
+ dependencies:
+ bytesish "^0.4.1"
+ caseless "~0.12.0"
+ is-stream "^2.0.0"
+
big.js@^5.2.2:
version "5.2.2"
resolved "https://registry.yarnpkg.com/big.js/-/big.js-5.2.2.tgz#65f0af382f578bcdc742bd9c281e9cb2d7768328"
@@ -2133,6 +2142,11 @@ bytes@3.1.0:
resolved "https://registry.yarnpkg.com/bytes/-/bytes-3.1.0.tgz#f6cf7933a360e0588fa9fde85651cdc7f805d1f6"
integrity sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg==
+bytesish@^0.4.1:
+ version "0.4.4"
+ resolved "https://registry.yarnpkg.com/bytesish/-/bytesish-0.4.4.tgz#f3b535a0f1153747427aee27256748cff92347e6"
+ integrity sha512-i4uu6M4zuMUiyfZN4RU2+i9+peJh//pXhd9x1oSe1LBkZ3LEbCoygu8W0bXTukU1Jme2txKuotpCZRaC3FLxcQ==
+
cacache@^12.0.2:
version "12.0.4"
resolved "https://registry.yarnpkg.com/cacache/-/cacache-12.0.4.tgz#668bcbd105aeb5f1d92fe25570ec9525c8faa40c"