mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
92db8219b4 | ||
|
|
8ba3a53779 | ||
|
|
e7db4e23f5 | ||
|
|
06c4ebb975 | ||
|
|
b075e09ac2 | ||
|
|
f215ab53db | ||
|
|
4ed92b246f | ||
|
|
4a9b60633a | ||
|
|
2123c1024b | ||
|
|
35767e6774 |
1
.github/workflows/docker.yml
vendored
1
.github/workflows/docker.yml
vendored
@@ -44,3 +44,4 @@ jobs:
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
platforms: linux/amd64, linux/i386, linux/arm64
|
||||
|
||||
21
Dockerfile
21
Dockerfile
@@ -1,19 +1,20 @@
|
||||
# syntax=docker/dockerfile:1.3
|
||||
FROM node:18-alpine AS builder
|
||||
COPY --chown=1000:1000 . /fredy
|
||||
FROM node:18
|
||||
|
||||
WORKDIR /fredy
|
||||
USER 1000
|
||||
|
||||
COPY . /fredy
|
||||
|
||||
RUN yarn install
|
||||
|
||||
RUN yarn global add pm2
|
||||
|
||||
RUN yarn run prod
|
||||
|
||||
FROM node:16-alpine
|
||||
COPY --from=builder --chown=1000:1000 /fredy /fredy
|
||||
RUN mkdir /db /conf && \
|
||||
chown 1000:1000 /db /conf && \
|
||||
chmod 777 -R /db/ && \
|
||||
ln -s /db /fredy/db && ln -s /conf /fredy/conf
|
||||
|
||||
EXPOSE 9998
|
||||
USER 1000
|
||||
VOLUME [ "/conf", "/db" ]
|
||||
WORKDIR /fredy
|
||||
CMD node index.js --no-daemon
|
||||
|
||||
CMD pm2-runtime index.js
|
||||
|
||||
11
README.md
11
README.md
@@ -78,15 +78,20 @@ yarn run test
|
||||
# Architecture
|
||||

|
||||
|
||||
### Immoscout / Immonet
|
||||
I have added **experimental** support for Immoscout and Immonet. They both are somewhat special, because they have decided to secure their service from bots using Re-Capture. Finding a way around this is barely possible. For _Fredy_ to be able to bypass this check, I'm using a service called [ScrapingAnt](https://scrapingant.com/). The trick is to use a headless browser, rotating proxies and (once successfully validated) to re-send the cookies each time.
|
||||
### Immoscout / Immonet / NeubauKompass
|
||||
I have added **experimental** support for Immoscout, Immonet and NeubauKompass. They all are somewhat special, because they have decided to secure their service from bots using Re-Capture. Finding a way around this is barely possible. For _Fredy_ to be able to bypass this check, I'm using a service called [ScrapingAnt](https://scrapingant.com/). The trick is to use a headless browser, rotating proxies and (once successfully validated) to re-send the cookies each time.
|
||||
|
||||
To be able to use Immoscout / Immonet, you need to create an account at ScrapingAnt. Configure the API key in the "General Settings" tab (visible when logged in as administrator).
|
||||
The rest will be handled by _Fredy_. Keep in mind, the support is experimental. There might be bugs and you might not always pass the re-capture check, but most of the time it works rather well :)
|
||||
|
||||
If you need more than the 1000 API calls allowed per month, I'd suggest opting for a paid account... ScrapingAnt loves OpenSource, therefore they have decided to give all _Fredy_ users a 10% discount by using the code **FREDY10** (Disclaimer: I do not earn any money for recommending their service).
|
||||
|
||||
### Contribution guidelines
|
||||
### 👐 Contributing
|
||||
Thanks to all the people who already contributed!
|
||||
|
||||
<a href="https://github.com/orangecoding/fredy/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=orangecoding/fredy" />
|
||||
</a>
|
||||
|
||||
See [Contributing](https://github.com/orangecoding/fredy/blob/master/CONTRIBUTING.md)
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
version: '3.3'
|
||||
version: '3.8'
|
||||
services:
|
||||
fredy:
|
||||
container_name: fredy
|
||||
@@ -13,3 +13,4 @@ services:
|
||||
- ./db:/db
|
||||
ports:
|
||||
- 9998:9998
|
||||
restart: unless-stopped
|
||||
|
||||
@@ -3,7 +3,7 @@ import { getJob } from '../../services/storage/jobStorage.js';
|
||||
import fetch from 'node-fetch';
|
||||
|
||||
export const send = ({ serviceName, newListings, notificationConfig, jobKey }) => {
|
||||
const { priority, server } = notificationConfig.find((adapter) => adapter.id === config.id).fields;
|
||||
const { server } = notificationConfig.find((adapter) => adapter.id === config.id).fields;
|
||||
const job = getJob(jobKey);
|
||||
const jobName = job == null ? jobKey : job.name;
|
||||
const promises = newListings.map((newListing) => {
|
||||
@@ -27,15 +27,10 @@ export const config = {
|
||||
readme: markdown2Html('lib/notification/adapter/apprise.md'),
|
||||
description: 'Fredy will send new listings to your Apprise instance.',
|
||||
fields: {
|
||||
priority: {
|
||||
type: 'number',
|
||||
label: 'Priority',
|
||||
description: 'The priority of the send notification.',
|
||||
},
|
||||
server: {
|
||||
type: 'text',
|
||||
label: 'Server',
|
||||
description: 'The server url to send the notification to.',
|
||||
description: 'The server URL to send the notification to.',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
### Apprise Adapter
|
||||
|
||||
Refer to the [instructions](https://github.com/caronc/apprise-api#installation) on how to set up an Apprise instance and how to configure your preferred notification service.
|
||||
|
||||
In addition to the Apprise instance, the priority must be defined.
|
||||
@@ -6,7 +6,7 @@ function nullOrEmpty(val) {
|
||||
function normalize(o) {
|
||||
const title = nullOrEmpty(o.title) ? 'NO TITLE FOUND' : o.title.replace('NEU', '');
|
||||
const address = nullOrEmpty(o.address) ? 'NO ADDRESS FOUND' : (o.address || '').replace(/\(.*\),.*$/, '').trim();
|
||||
const link = nullOrEmpty(o.address) ? 'NO LINK' : `https://www.immobilienscout24.de${o.link.substring(o.link.indexOf('/expose'))}`;
|
||||
const link = nullOrEmpty(o.link) ? 'NO LINK' : `https://www.immobilienscout24.de${o.link.substring(o.link.indexOf('/expose'))}`;
|
||||
return Object.assign(o, { title, address, link });
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
|
||||
@@ -1,44 +1,47 @@
|
||||
import utils from '../utils.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
const id = o.id.substring(o.id.indexOf('-') + 1, o.id.length);
|
||||
const size = o.size || 'N/A m²';
|
||||
const price = (o.price || '--- €').replace('Preis auf Anfrage', '--- €');
|
||||
const address = o.address || 'No address available';
|
||||
const title = o.title || 'No title available';
|
||||
const link = `https://immo.swp.de/immobilien/${id}`;
|
||||
const description = o.description;
|
||||
return Object.assign(o, { id, address, price, size, title, link, description });
|
||||
const id = o.id.substring(o.id.indexOf('-') + 1, o.id.length);
|
||||
const size = o.size || 'N/A m²';
|
||||
const price = (o.price || '--- €').replace('Preis auf Anfrage', '--- €');
|
||||
const title = o.title || 'No title available';
|
||||
const link = `https://immo.swp.de/immobilien/${id}`;
|
||||
const description = o.description;
|
||||
return Object.assign(o, {id, price, size, title, link, description});
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.js-serp-item',
|
||||
sortByDateParam: 's=most_recently_updated_first',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
price: 'div.item__spec.item-spec-price | trim',
|
||||
size: 'div.item__spec.item-spec-area | trim',
|
||||
title: 'a.js-item-title-link@title',
|
||||
address: 'div.item__locality | removeNewline | trim',
|
||||
description: 'div.item__main-info-points.clearfix p small | removeNewline | trim',
|
||||
},
|
||||
paginate: 'li.page-item.pagination__item a.page-link@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
url: null,
|
||||
crawlContainer: '.js-serp-item',
|
||||
sortByDateParam: 's=most_recently_updated_first',
|
||||
crawlFields: {
|
||||
id: '.js-bookmark-btn@data-id',
|
||||
price: 'div.align-items-start div:first-child | trim',
|
||||
size: 'div.align-items-start div:nth-child(3) | trim',
|
||||
title: '.card-title h2 | trim',
|
||||
link: '.ci-search-result__link@href',
|
||||
description: '.js-show-more-item-sm | removeNewline | trim',
|
||||
},
|
||||
paginate: 'li.page-item.pagination__item a.page-link@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
};
|
||||
export const metaInformation = {
|
||||
name: 'Immo Südwest Presse',
|
||||
baseUrl: 'https://immo.swp.de/',
|
||||
id: 'immoswp',
|
||||
name: 'Immo Südwest Presse',
|
||||
baseUrl: 'https://immo.swp.de/',
|
||||
id: 'immoswp',
|
||||
};
|
||||
export { config };
|
||||
export {config};
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
import utils from '../utils.js';
|
||||
let appliedBlackList = [];
|
||||
function nullOrEmpty(val) {
|
||||
return val == null || val.length === 0;
|
||||
}
|
||||
function normalize(o) {
|
||||
return o;
|
||||
const link = nullOrEmpty(o.link) ? 'NO LINK' : `https://www.neubaukompass.de${o.link.substring(o.link.indexOf('/neubau'))}`;
|
||||
return {...o, link};
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
return !utils.isOneOf(o.title, appliedBlackList);
|
||||
|
||||
@@ -24,13 +24,16 @@ function makeDriver(headers = {}) {
|
||||
},
|
||||
});
|
||||
const result = await response.text();
|
||||
if (EXPECTED_STATUS_CODES.includes(response.status)) {
|
||||
throw new Error(`${response.status}`);
|
||||
}
|
||||
if (cookies.length === 0) {
|
||||
cookies = response.headers.raw()['set-cookie'] || [];
|
||||
}
|
||||
callback(null, result);
|
||||
} catch (exception) {
|
||||
/* eslint-disable no-console */
|
||||
if (!EXPECTED_STATUS_CODES.includes(exception.response?.status)) {
|
||||
if (!EXPECTED_STATUS_CODES.includes(exception.response?.status) && !EXPECTED_STATUS_CODES.includes(Number(exception.message))) {
|
||||
console.error(`Error while trying to scrape data from scraping ant. Received error: ${exception.message}`);
|
||||
callback(null, []);
|
||||
return;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { metaInformation as immoScoutInfo } from '../provider/immoscout.js';
|
||||
import { metaInformation as immoNetInfo } from '../provider/immonet.js';
|
||||
import { metaInformation as neuBauCompassInfo } from '../provider/neubauKompass.js';
|
||||
import { config } from '../utils.js';
|
||||
|
||||
const additionalImmonetUrlParams = `&wait_for_selector=.content-wrapper-tiles&js_snippet=${Buffer.from(
|
||||
@@ -7,7 +8,7 @@ const additionalImmonetUrlParams = `&wait_for_selector=.content-wrapper-tiles&js
|
||||
).toString('base64')}`;
|
||||
|
||||
const needScrapingAnt = (id) => {
|
||||
return id.toLowerCase() === immoScoutInfo.id || id.toLowerCase() === immoNetInfo.id;
|
||||
return id.toLowerCase() === immoScoutInfo.id || id.toLowerCase() === immoNetInfo.id || id.toLowerCase() === neuBauCompassInfo.id.toLowerCase();
|
||||
};
|
||||
export const transformUrlForScrapingAnt = (url, id) => {
|
||||
let urlParams = '';
|
||||
|
||||
40
package.json
40
package.json
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "fredy",
|
||||
"version": "8.0.4",
|
||||
"version": "8.1.0",
|
||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||
"scripts": {
|
||||
"start": "node index.js",
|
||||
@@ -55,54 +55,54 @@
|
||||
"Firefox ESR"
|
||||
],
|
||||
"dependencies": {
|
||||
"@douyinfe/semi-ui": "2.52.0",
|
||||
"@douyinfe/semi-ui": "2.62.1",
|
||||
"@rematch/core": "2.2.0",
|
||||
"@rematch/loading": "2.1.2",
|
||||
"@sendgrid/mail": "8.1.0",
|
||||
"@vitejs/plugin-react": "4.2.1",
|
||||
"@sendgrid/mail": "8.1.3",
|
||||
"@vitejs/plugin-react": "4.3.1",
|
||||
"better-sqlite3": "8.6.0",
|
||||
"body-parser": "1.20.2",
|
||||
"cookie-session": "2.1.0",
|
||||
"handlebars": "4.7.8",
|
||||
"highcharts": "11.3.0",
|
||||
"highcharts": "11.4.6",
|
||||
"highcharts-react-official": "3.2.1",
|
||||
"lodash": "4.17.21",
|
||||
"lowdb": "6.0.1",
|
||||
"markdown": "^0.5.0",
|
||||
"nanoid": "5.0.5",
|
||||
"nanoid": "5.0.7",
|
||||
"node-fetch": "3.3.2",
|
||||
"node-mailjet": "6.0.5",
|
||||
"query-string": "8.2.0",
|
||||
"react": "18.2.0",
|
||||
"react-dom": "18.2.0",
|
||||
"react-redux": "9.1.0",
|
||||
"react": "18.3.1",
|
||||
"react-dom": "18.3.1",
|
||||
"react-redux": "9.1.2",
|
||||
"react-router": "5.2.1",
|
||||
"react-router-dom": "5.3.0",
|
||||
"redux": "5.0.1",
|
||||
"redux-thunk": "3.1.0",
|
||||
"restana": "4.9.7",
|
||||
"restana": "4.9.9",
|
||||
"serve-static": "1.15.0",
|
||||
"slack": "11.0.2",
|
||||
"string-similarity": "^4.0.4",
|
||||
"vite": "5.0.12",
|
||||
"vite": "5.3.4",
|
||||
"x-ray": "2.3.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/core": "7.23.9",
|
||||
"@babel/eslint-parser": "7.23.10",
|
||||
"@babel/preset-env": "7.23.9",
|
||||
"@babel/preset-react": "7.23.3",
|
||||
"chai": "5.0.3",
|
||||
"@babel/core": "7.24.9",
|
||||
"@babel/eslint-parser": "7.24.8",
|
||||
"@babel/preset-env": "7.24.8",
|
||||
"@babel/preset-react": "7.24.7",
|
||||
"chai": "5.1.1",
|
||||
"eslint": "8.56.0",
|
||||
"eslint-config-prettier": "8.8.0",
|
||||
"eslint-plugin-react": "7.33.2",
|
||||
"esmock": "2.6.3",
|
||||
"eslint-plugin-react": "7.35.0",
|
||||
"esmock": "2.6.7",
|
||||
"history": "5.3.0",
|
||||
"husky": "4.3.8",
|
||||
"less": "4.2.0",
|
||||
"lint-staged": "13.2.2",
|
||||
"mocha": "10.2.0",
|
||||
"prettier": "3.2.5",
|
||||
"mocha": "10.7.0",
|
||||
"prettier": "3.3.3",
|
||||
"redux-logger": "3.0.6"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,12 +25,10 @@ describe('#immoswp testsuite()', () => {
|
||||
expect(notify.size).to.be.a('string');
|
||||
expect(notify.title).to.be.a('string');
|
||||
expect(notify.link).to.be.a('string');
|
||||
expect(notify.address).to.be.a('string');
|
||||
/** check the values if possible **/
|
||||
expect(notify.price).that.does.include('€');
|
||||
expect(notify.title).to.be.not.empty;
|
||||
expect(notify.link).that.does.include('https://immo.swp.de');
|
||||
expect(notify.address).to.be.not.empty;
|
||||
});
|
||||
resolve();
|
||||
});
|
||||
|
||||
@@ -1,36 +1,44 @@
|
||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
||||
import { get } from '../mocks/mockNotification.js';
|
||||
import { mockFredy, providerConfig } from '../utils.js';
|
||||
import { expect } from 'chai';
|
||||
import {get} from '../mocks/mockNotification.js';
|
||||
import {mockFredy, providerConfig} from '../utils.js';
|
||||
import {expect} from 'chai';
|
||||
import * as provider from '../../lib/provider/neubauKompass.js';
|
||||
import * as scrapingAnt from '../../lib/services/scrapingAnt.js';
|
||||
|
||||
describe('#neubauKompass testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
provider.init(providerConfig.neubauKompass, [], []);
|
||||
it('should test neubauKompass provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
return await new Promise((resolve) => {
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'neubauKompass', similarityCache);
|
||||
fredy.execute().then((listing) => {
|
||||
expect(listing).to.be.a('array');
|
||||
const notificationObj = get();
|
||||
expect(notificationObj.serviceName).to.equal('neubauKompass');
|
||||
notificationObj.payload.forEach((notify) => {
|
||||
expect(notify).to.be.a('object');
|
||||
/** check the actual structure **/
|
||||
expect(notify.id).to.be.a('string');
|
||||
expect(notify.title).to.be.a('string');
|
||||
expect(notify.link).to.be.a('string');
|
||||
expect(notify.address).to.be.a('string');
|
||||
/** check the values if possible **/
|
||||
expect(notify.title).to.be.not.empty;
|
||||
expect(notify.link).that.does.include('https://www.neubaukompass.de');
|
||||
expect(notify.address).to.be.not.empty;
|
||||
});
|
||||
resolve();
|
||||
});
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
provider.init(providerConfig.neubauKompass, [], []);
|
||||
it.only('should test neubauKompass provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
return await new Promise((resolve) => {
|
||||
if (!scrapingAnt.isScrapingAntApiKeySet()) {
|
||||
/* eslint-disable no-console */
|
||||
console.info('Skipping Neubaukompass test as ScrapingAnt Api Key is not set.');
|
||||
/* eslint-enable no-console */
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'neubauKompass', similarityCache);
|
||||
fredy.execute().then((listing) => {
|
||||
expect(listing).to.be.a('array');
|
||||
const notificationObj = get();
|
||||
expect(notificationObj.serviceName).to.equal('neubauKompass');
|
||||
notificationObj.payload.forEach((notify) => {
|
||||
expect(notify).to.be.a('object');
|
||||
/** check the actual structure **/
|
||||
expect(notify.id).to.be.a('string');
|
||||
expect(notify.title).to.be.a('string');
|
||||
expect(notify.link).to.be.a('string');
|
||||
expect(notify.address).to.be.a('string');
|
||||
/** check the values if possible **/
|
||||
expect(notify.title).to.be.not.empty;
|
||||
expect(notify.link).that.does.include('https://www.neubaukompass.de');
|
||||
expect(notify.address).to.be.not.empty;
|
||||
});
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -184,8 +184,7 @@ const GeneralSettings = function GeneralSettings() {
|
||||
more likely to fail, but they are cheaper. A call with a datacenter proxy cost 10 credits.
|
||||
<h4>Residential-Proxy</h4>
|
||||
High-quality proxy server located in one of the real people houses across the world. Datacenter
|
||||
proxies are faster and more likely to success, but they are more expensive. A call with a datacenter
|
||||
proxy cost 250 credits.
|
||||
proxies are faster and more likely to success, but they are more expensive.
|
||||
<br />
|
||||
<br />
|
||||
<b>
|
||||
|
||||
@@ -45,7 +45,7 @@ export default function ProcessingTimes({ processingTimes }) {
|
||||
{format(new Date(processingTimes.scrapingAntData.end_date))}
|
||||
<br />
|
||||
Credits: {processingTimes.scrapingAntData.remained_credits}/
|
||||
{processingTimes.scrapingAntData.plan_total_credits} (250 credits per call)
|
||||
{processingTimes.scrapingAntData.plan_total_credits}
|
||||
</p>
|
||||
If you want to scrape Immoscout or Immonet more often, you have to purchase a premium account of{' '}
|
||||
<a href="https://scrapingant.com/" target="_blank" rel="noreferrer">
|
||||
|
||||
@@ -101,7 +101,7 @@ export default function ProviderMutator({ onVisibilityChanged, visible = false,
|
||||
description={
|
||||
<div>
|
||||
<p>
|
||||
If you chose Immoscout or Immonet as a provider, make sure to also add the scrapingAnt apiKey to the config.json.
|
||||
If you chose Immoscout, Immonet or NeubauKompass as a provider, make sure to also add the scrapingAnt apiKey to the config.json.
|
||||
(See readme)
|
||||
</p>
|
||||
<p>
|
||||
|
||||
Reference in New Issue
Block a user