Compare commits

..

11 Commits
7.1.0 ... 7.2.1

Author SHA1 Message Date
Christian Kellner
86465e0076 next release version 2023-05-08 09:33:20 +02:00
Christian Kellner
d947dad488 fixing ebay kleinanzeigen, now becoming kleinanzeigen 2023-05-08 09:32:07 +02:00
weakmap@gmail.com
23ef434fe1 next release version 2023-04-15 18:25:31 +02:00
weakmap@gmail.com
5e6d92c5be Merge branch 'master' of https://github.com/orangecoding/fredy 2023-04-15 18:24:57 +02:00
weakmap@gmail.com
4ba098e0b6 bringing back immonet by using scrapingant 2023-04-15 18:24:51 +02:00
Janek Bettinger
2d1a9a0452 fix Mailjet adapter (#76) 2023-04-15 16:27:27 +02:00
weakmap@gmail.com
6fbee3e7c6 Merge branch 'master' of https://github.com/orangecoding/fredy 2023-04-14 21:36:53 +02:00
Daniel Linsenmeyer
46775c3662 Fix validation and add ntfy as notification adapter (#75) 2023-04-14 17:16:08 +02:00
weakmap@gmail.com
1feb5bfda1 running all tests 2023-04-07 19:45:24 +02:00
weakmap@gmail.com
3ec9ed3b2a ignoring expired ssl certificate o0 2023-04-07 19:44:59 +02:00
Christian Kellner
75a536d5ab fixing ui not being shown 2023-03-20 15:08:06 +01:00
18 changed files with 122 additions and 40 deletions

View File

@@ -81,10 +81,10 @@ yarn run test
# Architecture
![Architecture](/doc/architecture.jpg "Architecture")
### Immoscout
I have added **experimental** support for Immoscout. Immoscout is somewhat special, because they have decided to secure their service from bots using Re-Capture. Finding a way around this is barely possible. For _Fredy_ to be able to bypass this check, I'm using a service called [ScrapingAnt](https://scrapingant.com/). The trick is to use a headless browser, rotating proxies and (once successfully validated) to re-send the cookies each time.
### Immoscout / Immonet
I have added **experimental** support for Immoscout and Immonet. They both are somewhat special, because they have decided to secure their service from bots using Re-Capture. Finding a way around this is barely possible. For _Fredy_ to be able to bypass this check, I'm using a service called [ScrapingAnt](https://scrapingant.com/). The trick is to use a headless browser, rotating proxies and (once successfully validated) to re-send the cookies each time.
To be able to use Immoscout, you need to create an account at ScrapingAnt. Configure the API key in the "General Settings" tab (visible when logged in as administrator).
To be able to use Immoscout / Immonet, you need to create an account at ScrapingAnt. Configure the API key in the "General Settings" tab (visible when logged in as administrator).
The rest will be handled by _Fredy_. Keep in mind, the support is experimental. There might be bugs and you might not always pass the re-capture check, but most of the time it works rather well :)
If you need more than the 1000 API calls allowed per month, I'd suggest opting for a paid account... ScrapingAnt loves OpenSource, therefore they have decided to give all _Fredy_ users a 10% discount by using the code **FREDY10** (Disclaimer: I do not earn any money for recommending their service).

View File

@@ -45,15 +45,15 @@ class FredyRuntime {
_getListings(url) {
return new Promise((resolve, reject) => {
const id = this._providerId;
if (scrapingAnt.isImmoscout(id) && !scrapingAnt.isScrapingAntApiKeySet()) {
const error = 'Immoscout can only be used with if you have set an apikey for scrapingAnt.';
if (scrapingAnt.needScrapingAnt(id) && !scrapingAnt.isScrapingAntApiKeySet()) {
const error = 'Immoscout or Immonet can only be used with if you have set an apikey for scrapingAnt.';
/* eslint-disable no-console */
console.log(error);
/* eslint-enable no-console */
reject(error);
return;
}
const u = scrapingAnt.isImmoscout(id) ? scrapingAnt.transformUrlForScrapingAnt(url, id) : url;
const u = scrapingAnt.needScrapingAnt(id) ? scrapingAnt.transformUrlForScrapingAnt(url, id) : url;
try {
if (this._providerConfig.paginate != null) {
xray(u, this._providerConfig.crawlContainer, [this._providerConfig.crawlFields])

View File

@@ -13,7 +13,7 @@ import files from 'serve-static';
import path from 'path';
import { getDirName } from '../utils.js';
const service = restana();
const staticService = files(path.join(getDirName(), '../../ui/public'));
const staticService = files(path.join(getDirName(), '../ui/public'));
const PORT = config.port || 9998;
service.use(bodyParser.json());

View File

@@ -28,7 +28,7 @@ jobRouter.get('/processingTimes', async (req, res) => {
let scrapingAntData = null;
if (config.scrapingAnt.apiKey != null && config.scrapingAnt.apiKey.length > 0) {
try {
const response = await fetch(`https://api.scrapingant.com/v1/usage?x-api-key=${config.scrapingAnt.apiKey}`);
const response = await fetch(`https://api.scrapingant.com/v2/usage?x-api-key=${config.scrapingAnt.apiKey}`);
scrapingAntData = await response.json();
} catch (Exception) {
console.error('Could not query plan data from scraping ant.', Exception);

View File

@@ -9,7 +9,7 @@ const template = fs.readFileSync(path.resolve(__dirname + '/notification/emailTe
const emailTemplate = Handlebars.compile(template);
export const send = ({ serviceName, newListings, notificationConfig, jobKey }) => {
const { apiPublicKey, apiPrivateKey, receiver, from } = notificationConfig.find(
(adapter) => adapter.id === 'mailJet'
(adapter) => adapter.id === 'mailjet'
).fields;
const to = receiver
.trim()
@@ -18,7 +18,7 @@ export const send = ({ serviceName, newListings, notificationConfig, jobKey }) =
Email: r.trim(),
}));
return mailjet
.connect(apiPublicKey, apiPrivateKey)
.apiConnect(apiPublicKey, apiPrivateKey)
.post('send', { version: 'v3.1' })
.request({
Messages: [

View File

@@ -0,0 +1,51 @@
import { markdown2Html } from '../../services/markdown.js';
import { getJob } from '../../services/storage/jobStorage.js';
import fetch from 'node-fetch';
export const send = ({ serviceName, newListings, notificationConfig, jobKey }) => {
const { priority, server, topic } = notificationConfig.find((adapter) => adapter.id === 'ntfy').fields;
const job = getJob(jobKey);
const jobName = job == null ? jobKey : job.name;
const promises = newListings.map((newListing) => {
const message = `Address: ${newListing.address} Size: ${newListing.size.replace(/2m/g, '$m^2$')} Price: ${
newListing.price
}`;
return fetch(server, {
method: 'POST',
body: JSON.stringify({
topic: topic,
message: message,
title: newListing.title,
tags: [serviceName, jobName],
priority: parseInt(priority),
click: newListing.link,
}),
});
});
return Promise.all(promises);
};
export const config = {
id: 'ntfy',
name: 'ntfy',
readme: markdown2Html('lib/notification/adapter/ntfy.md'),
description: 'Fredy will send new listings to your ntfy.',
fields: {
priority: {
type: 'number',
label: 'Priority',
description: 'The priority of the send notification.',
},
server: {
type: 'text',
label: 'Server-URL',
description: 'The server url to the send the notification to.',
},
topic: {
type: 'text',
label: 'topic',
description:
'The topic where fredy should send notifications to. The topic is a secret, only known to you, make sure it is something not generic.',
},
},
};

View File

@@ -0,0 +1,5 @@
### ntfy Adapter
For ntfy, you need to create a topic on your preferred ntfy instance. This is pretty easy. Please visit the steps in the [docs](https://docs.ntfy.sh/publish/) and follow the instructions.
As a result, you get the URL for configuration in fredy. In addition, the priority must be defined.

View File

@@ -1,14 +1,12 @@
import utils from '../utils.js';
let appliedBlackList = [];
function normalize(o) {
const id = parseInt(o.id.substring(o.id.indexOf('_') + 1, o.id.length));
const id = o.id.substring(o.id.lastIndexOf('/') + 1, o.id.length);
const size = o.size != null ? o.size.replace('Wohnfläche ', '') : 'N/A m²';
const price = o.price.replace('Kaufpreis ', '');
const address = o.address.split(' • ')[o.address.split(' • ').length - 1];
const title = o.title || 'No title available';
//normally we would just read the link from the source, but immonet decided to trick user by adding a click listener instead of
//a href to do some weird reporting. (Very user friendly for handicaped ppl... not)
const link = `https://www.immonet.de/angebot/${id}`;
const link = o.id;
return Object.assign(o, { id, address, price, size, title, link });
}
function applyBlacklist(o) {
@@ -18,14 +16,14 @@ function applyBlacklist(o) {
}
const config = {
url: null,
crawlContainer: '#result-list-stage .item',
crawlContainer: '.content-wrapper-tiles .ng-star-inserted',
sortByDateParam: 'sortby=19',
crawlFields: {
id: '@id',
price: 'div[id*="selPrice_"] | trim',
size: 'div[id*="selArea_"] | trim',
title: '.item a img@title',
address: '.item .box-25 .ellipsis .text-100 | removeNewline | trim',
id: '.card a@href',
title: '.card h3 |trim',
price: '.card .has-font-300 .is-bold | trim',
size: '.card .has-font-300 .ml-100 | trim',
address: '.card span:nth-child(2) | trim',
},
paginate: '#idResultList .margin-bottom-6.margin-bottom-sm-12 .panel a.pull-right@href',
normalize: normalize,

View File

@@ -32,7 +32,7 @@ const config = {
};
export const metaInformation = {
name: 'Ebay Kleinanzeigen',
baseUrl: 'https://www.ebay-kleinanzeigen.de/',
baseUrl: 'https://www.kleinanzeigen.de/',
id: 'kleinanzeigen',
};
export const init = (sourceConfig, blacklist, blacklistedDistricts) => {

View File

@@ -1,11 +1,16 @@
import fetch from 'node-fetch';
import { config } from '../utils.js';
import { makeUrlResidential } from './scrapingAnt.js';
import https from 'https';
//if ScrapingAnt got blocked, this http status is returned
const BLOCKED_HTTP_STATUS = 423;
const NOT_FOUND_HTTP_STATUS = 404;
const MAX_RETRIES_SCRAPING_ANT = 10;
const EXPECTED_STATUS_CODES = [BLOCKED_HTTP_STATUS, NOT_FOUND_HTTP_STATUS];
const agent = new https.Agent({
rejectUnauthorized: false,
});
function makeDriver(headers = {}) {
let cookies = '';
async function scrapingAntDriver(context, callback, retryCounter = 0) {
@@ -41,9 +46,10 @@ function makeDriver(headers = {}) {
/* eslint-enable no-console */
}
}
/**
* The regular request driver is taking care of everyting, that doesn't need to be scraped by ScrapingAnt (which is
* everything != Immoscout as of writing this)
* everything != Immoscout & Immonet as of writing this)
*/
return async function driver(context, callback) {
if (context.url.toLowerCase().indexOf('scrapingant') !== -1) {
@@ -55,6 +61,7 @@ function makeDriver(headers = {}) {
...headers,
Cookie: cookies,
},
agent,
});
const result = await response.text();
callback(null, result);

View File

@@ -1,12 +1,22 @@
import { metaInformation } from '../provider/immoscout.js';
import { metaInformation as immoScoutInfo } from '../provider/immoscout.js';
import { metaInformation as immoNetInfo } from '../provider/immonet.js';
import { config } from '../utils.js';
const isImmoscout = (id) => {
return id.toLowerCase() === metaInformation.id;
const additionalImmonetUrlParams = `&wait_for_selector=.content-wrapper-tiles&js_snippet=${new Buffer(
'window.scrollTo(0,document.body.scrollHeight);'
).toString('base64')}`;
const needScrapingAnt = (id) => {
return id.toLowerCase() === immoScoutInfo.id || id.toLowerCase() === immoNetInfo.id;
};
export const transformUrlForScrapingAnt = (url, id) => {
if (isImmoscout(id)) {
//only do calls to scrapingAnt when dealing with Immoscout
url = `https://api.scrapingant.com/v1/general?url=${encodeURIComponent(url)}&proxy_type=datacenter`;
let urlParams = '';
if (needScrapingAnt(id)) {
if (id.toLowerCase() === immoNetInfo.id) {
urlParams = additionalImmonetUrlParams;
}
//only do calls to scrapingAnt when dealing with Immoscout/Immonet
url = `https://api.scrapingant.com/v2/general?url=${encodeURIComponent(url)}&proxy_type=datacenter${urlParams}`;
}
return url;
};
@@ -16,4 +26,4 @@ export const isScrapingAntApiKeySet = () => {
export const makeUrlResidential = (url) => {
return url.replace('datacenter', 'residential');
};
export { isImmoscout };
export { needScrapingAnt };

View File

@@ -1,6 +1,6 @@
{
"name": "fredy",
"version": "7.1.0",
"version": "7.2.1",
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
"scripts": {
"start": "node index.js",

View File

@@ -3,6 +3,7 @@ import { get } from '../mocks/mockNotification.js';
import { mockFredy, providerConfig } from '../utils.js';
import chai from 'chai';
import * as provider from '../../lib/provider/immonet.js';
import * as scrapingAnt from '../../lib/services/scrapingAnt.js';
const expect = chai.expect;
describe('#immonet testsuite()', () => {
after(() => {
@@ -12,6 +13,13 @@ describe('#immonet testsuite()', () => {
it('should test immonet provider', async () => {
const Fredy = await mockFredy();
return await new Promise((resolve) => {
if (!scrapingAnt.isScrapingAntApiKeySet()) {
/* eslint-disable no-console */
console.info('Skipping Immonet test as ScrapingAnt Api Key is not set.');
/* eslint-enable no-console */
resolve();
return;
}
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'immonet', similarityCache);
fredy.execute().then((listing) => {
expect(listing).to.be.a('array');
@@ -20,17 +28,17 @@ describe('#immonet testsuite()', () => {
expect(notificationObj.serviceName).to.equal('immonet');
notificationObj.payload.forEach((notify) => {
/** check the actual structure **/
expect(notify.id).to.be.a('number');
expect(notify.id).to.be.a('string');
expect(notify.price).to.be.a('string');
expect(notify.size).to.be.a('string');
expect(notify.title).to.be.a('string');
expect(notify.link).to.be.a('string');
expect(notify.address).to.be.a('string');
/** check the values if possible **/
expect(notify.price).that.does.include('€');
expect(notify.size).that.does.include('m²');
expect(notify.title).to.be.not.empty;
expect(notify.link).that.does.include('https://www.immonet.de');
expect(notify.address).to.be.not.empty;
});
resolve();

View File

@@ -26,7 +26,7 @@ describe('#kleinanzeigen testsuite()', () => {
expect(notify.address).to.be.a('string');
/** check the values if possible **/
expect(notify.title).to.be.not.empty;
expect(notify.link).that.does.include('https://www.ebay-kleinanzeigen.de');
expect(notify.link).that.does.include('https://www.kleinanzeigen.de');
expect(notify.address).to.be.not.empty;
});
resolve();

View File

@@ -9,7 +9,7 @@
"enabled": true
},
"immonet": {
"url": "https://www.immonet.de/immobiliensuche/sel.do?pageoffset=1&listsize=100&objecttype=1&locationname=Düsseldorf&acid=&actype=&district=8717&district=8718&district=8719&district=8720&district=8721&district=8723&district=8724&district=8725&district=8727&district=8728&district=8729&district=8730&district=8731&district=8732&district=8733&district=8737&district=8738&district=8741&district=8745&district=8747&district=8750&district=8752&district=8754&district=8755&district=8756&district=8759&district=8760&district=8761&district=8763&district=8764&district=8765&ajaxIsRadiusActive=false&sortby=19&suchart=1&radius=0&pcatmtypes=1_1&pCatMTypeStoragefield=&parentcat=1&marketingtype=1&fromprice=&toprice=420000&fromarea=90&toarea=&fromplotarea=&toplotarea=&fromrooms=3&torooms=&objectcat=225&objectcat=18&objectcat=17&objectcat=12&objectcat=16&objectcat=181&objectcat=14&objectcat=15&objectcat=226&objectcat=13&wbs=-1&fromyear=&toyear=",
"url": "https://www.immonet.de/immobiliensuche/beta?pageoffset=1&listsize=100&objecttype=1&locationname=D%C3%BCsseldorf&acid=&actype=&district=8717&district=8718&district=8719&district=8720&district=8721&district=8723&district=8724&district=8725&district=8727&district=8728&district=8729&district=8730&district=8731&district=8732&district=8733&district=8737&district=8738&district=8741&district=8745&district=8747&district=8750&district=8752&district=8754&district=8755&district=8756&district=8759&district=8760&district=8761&district=8763&district=8764&district=8765&ajaxIsRadiusActive=false&sortby=19&suchart=1&radius=0&pcatmtypes=1_1&pCatMTypeStoragefield=&parentcat=1&marketingtype=1&fromprice=&toprice=420000&fromarea=90&toarea=&fromplotarea=&toplotarea=&fromrooms=3&torooms=&objectcat=225&objectcat=18&objectcat=17&objectcat=12&objectcat=16&objectcat=181&objectcat=14&objectcat=15&objectcat=226&objectcat=13&wbs=-1&fromyear=&toyear=",
"enabled": true
},
"immowelt": {
@@ -29,7 +29,7 @@
"enabled": true
},
"kleinanzeigen": {
"url": "https://www.ebay-kleinanzeigen.de/s-immobilien/duesseldorf/anzeige:angebote/wohnung/k0c195l2068r5",
"url": "https://www.kleinanzeigen.de/s-immobilien/duesseldorf/anzeige:angebote/wohnung/k0c195l2068r5",
"enabled": true
},
"neubauKompass": {

View File

@@ -47,7 +47,7 @@ export default function ProcessingTimes({ processingTimes }) {
Credits: {processingTimes.scrapingAntData.remained_credits}/
{processingTimes.scrapingAntData.plan_total_credits} (250 credits per call)
</p>
If you want to scrape Immoscout more often, you have to purchase a premium account of{' '}
If you want to scrape Immoscout or Immonet more often, you have to purchase a premium account of{' '}
<a href="https://scrapingant.com/" target="_blank" rel="noreferrer">
ScrapingAnt
</a>

View File

@@ -25,9 +25,12 @@ const validate = (selectedAdapter) => {
results.push('All fields are mandatory and must be set.');
continue;
}
if (uiElement.type === 'number' && (typeof uiElement.value !== 'number' || uiElement.value < 0)) {
results.push('A number field cannot contain anything else and must be > 0.');
continue;
if (uiElement.type === 'number') {
const numberValue = parseFloat(uiElement.value);
if(isNaN(numberValue) || numberValue < 0) {
results.push('A number field cannot contain anything else and must be > 0.');
continue;
}
}
if (uiElement.type === 'boolean' && typeof uiElement.value !== 'boolean') {
results.push('A boolean field cannot be of a different type.');

View File

@@ -101,7 +101,7 @@ export default function ProviderMutator({ onVisibilityChanged, visible = false,
description={
<div>
<p>
If you chose Immoscout as a provider, make sure to also add the scrapingAnt apiKey to the config.json.
If you chose Immoscout or Immonet as a provider, make sure to also add the scrapingAnt apiKey to the config.json.
(See readme)
</p>
<p>