mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
Compare commits
70 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c2680fe49f | ||
|
|
2b862b2d98 | ||
|
|
9065448b6b | ||
|
|
b9f49cb5b2 | ||
|
|
53121742c2 | ||
|
|
1a3eae0390 | ||
|
|
a42905d63f | ||
|
|
9917491728 | ||
|
|
f032e6a724 | ||
|
|
111c154ae3 | ||
|
|
2194ffe0f4 | ||
|
|
cfa25fc0e0 | ||
|
|
d50dd61f3e | ||
|
|
31e7f77bde | ||
|
|
a418d64f1a | ||
|
|
d099872950 | ||
|
|
2fd03bce79 | ||
|
|
78a122b3ea | ||
|
|
918c6ade36 | ||
|
|
9fac1aee06 | ||
|
|
f9c6b10976 | ||
|
|
d8ccccb82a | ||
|
|
1f54bcfd3f | ||
|
|
f4c2130829 | ||
|
|
d624e70732 | ||
|
|
0cbfaaf092 | ||
|
|
c6fb856cb6 | ||
|
|
6fe0a9dc3c | ||
|
|
5d52e4152d | ||
|
|
a8e5f8b524 | ||
|
|
4b45ff4430 | ||
|
|
db6211777b | ||
|
|
21dd48527c | ||
|
|
b0d494eed6 | ||
|
|
9efb3e4b94 | ||
|
|
683c47f61c | ||
|
|
b3c11320d4 | ||
|
|
25dfad4f5d | ||
|
|
b7a3823049 | ||
|
|
6964998695 | ||
|
|
ef689cf97e | ||
|
|
bd6a572ab0 | ||
|
|
d96c1ee3fe | ||
|
|
9a09548a07 | ||
|
|
00eabecd08 | ||
|
|
c07dc6220e | ||
|
|
4bab3bd9da | ||
|
|
b113621202 | ||
|
|
030e0ca169 | ||
|
|
3aae81ca19 | ||
|
|
f1effe941f | ||
|
|
cd3631f910 | ||
|
|
8f490f2426 | ||
|
|
48e2ca942f | ||
|
|
b9e4bca244 | ||
|
|
a138dafc31 | ||
|
|
c6bb3c44d4 | ||
|
|
a3471a091a | ||
|
|
b5a96afcc8 | ||
|
|
3903ab59cf | ||
|
|
8fe7cec2a1 | ||
|
|
97deea6f5b | ||
|
|
1ecbbdd774 | ||
|
|
e1db3840f6 | ||
|
|
26127eeac1 | ||
|
|
90a4ee5dcf | ||
|
|
2aaf63c253 | ||
|
|
f52e3e9fd8 | ||
|
|
0d69232395 | ||
|
|
b473cf7fb4 |
@@ -1,7 +1,7 @@
|
||||
node_modules/
|
||||
npm-debug.log
|
||||
test/
|
||||
conf/
|
||||
db/
|
||||
conf/
|
||||
.git/
|
||||
.github/
|
||||
|
||||
28
.github/workflows/docker.yml
vendored
28
.github/workflows/docker.yml
vendored
@@ -1,4 +1,5 @@
|
||||
name: Create and publish Docker image
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
@@ -17,15 +18,24 @@ jobs:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
with:
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@v1
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
@@ -33,15 +43,17 @@ jobs:
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v3
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v2
|
||||
uses: docker/build-push-action@v3
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
platforms: linux/amd64, linux/arm64
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
20
.github/workflows/test.yml
vendored
20
.github/workflows/test.yml
vendored
@@ -1,23 +1,23 @@
|
||||
name: Test
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
branches: [master]
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
branches: [master]
|
||||
schedule:
|
||||
- cron: '0 12 * * *'
|
||||
- cron: '0 12 * * *'
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v2.5.1
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'yarn'
|
||||
|
||||
- run: yarn install
|
||||
- run: yarn run test
|
||||
- run: yarn test
|
||||
|
||||
@@ -113,7 +113,7 @@ I'm using Eslint to maintain quote style and quality. Do not skip it...
|
||||
|
||||
##### To do before merging:
|
||||
|
||||
- executed tests? (`yarn run test`)
|
||||
- executed tests? (`pnpm test`)
|
||||
- sure the changes are useful for everybody? Or is it maybe a custom modification just for your case?
|
||||
|
||||
_Thanks!_ :heart:
|
||||
|
||||
35
Dockerfile
35
Dockerfile
@@ -1,20 +1,35 @@
|
||||
FROM node:20
|
||||
FROM node:22-slim
|
||||
|
||||
WORKDIR /fredy
|
||||
WORKDIR /fredy
|
||||
|
||||
COPY . /fredy
|
||||
# Install Chromium without extra recommended packages and clean apt cache
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends chromium \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN yarn install
|
||||
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \
|
||||
PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
|
||||
|
||||
RUN yarn global add pm2
|
||||
# Copy lockfiles first to leverage cache for dependencies
|
||||
COPY package.json yarn.lock ./
|
||||
|
||||
# Set Yarn timeout, install dependencies and PM2 globally
|
||||
RUN yarn config set network-timeout 600000 \
|
||||
&& yarn install --frozen-lockfile \
|
||||
&& yarn global add pm2
|
||||
|
||||
# Copy application source and build production assets
|
||||
COPY . ./
|
||||
RUN yarn run prod
|
||||
|
||||
RUN mkdir /db /conf && \
|
||||
chown 1000:1000 /db /conf && \
|
||||
chmod 777 -R /db/ && \
|
||||
ln -s /db /fredy/db && ln -s /conf /fredy/conf
|
||||
# Prepare runtime directories and symlinks for data and config
|
||||
RUN mkdir -p /db /conf \
|
||||
&& chown 1000:1000 /db /conf \
|
||||
&& chmod 777 /db /conf \
|
||||
&& ln -s /db /fredy/db \
|
||||
&& ln -s /conf /fredy/conf
|
||||
|
||||
EXPOSE 9998
|
||||
|
||||
CMD pm2-runtime index.js
|
||||
# Start application using PM2 runtime
|
||||
CMD ["pm2-runtime", "index.js"]
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 Christian Kellner
|
||||
Copyright (c) 2025 Christian Kellner
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
17
README.md
17
README.md
@@ -1,6 +1,6 @@
|
||||
<img src="https://github.com/orangecoding/fredy/blob/master/doc/logo.png" width="400">
|
||||
|
||||

|
||||
 [](https://github.com/orangecoding/fredy/actions/workflows/docker.yml)
|
||||
|
||||
Searching an apartment in Germany can be a frustrating task. Not any longer though, as _Fredy_ will take over and will only notify you once new listings have been found that match your requirements.
|
||||
|
||||
@@ -11,7 +11,7 @@ If _Fredy_ finds matching results, it will send them to you via Slack, Email, Te
|
||||
# Sponsorship [](https://github.com/sponsors/orangecoding)
|
||||
If you like my work, consider becoming a sponsor. I'm not expecting anybody to pay for _Fredy_ or any other Open Source Project I'm maintaining, however keep in mind, I'm doing all of this in my spare time :) Thanks.
|
||||
|
||||
<img src="https://github.com/orangecoding/fredy/blob/master/doc/jetbrains.png" width="200">
|
||||
[](https://jb.gg/OpenSourceSupport)
|
||||
|
||||
_Fredy_ is supported by JetBrains under Open Source Support Program
|
||||
|
||||
@@ -46,7 +46,7 @@ A provider contains the URL that points to the search results for the respective
|
||||
**It is important that you order the search results by date, so that _Fredy_ always picks the latest results first!**
|
||||
|
||||
#### Adapter
|
||||
_Fredy_ supports multiple adapters, such as Slack, SendGrid, Telegram etc. A search job can have as many adapters as supported by _Fredy_. Each adapter needs different configuration values, which you have to provide when using them. A adapter dictactes how the frontend renders by telling the frontend what information it needs in order to send listings to the user.
|
||||
_Fredy_ supports multiple adapters, such as Slack, SendGrid, Telegram etc. A search job can have as many adapters as supported by _Fredy_. Each adapter needs different configuration values, which you have to provide when using them. An adapter dictates how the frontend renders by telling the frontend what information it needs in order to send listings to the user.
|
||||
|
||||
#### Jobs
|
||||
A Job wraps adapters and providers. _Fredy_ runs the configured jobs in a specific interval (can be configured in `/conf/config.json`).
|
||||
@@ -82,7 +82,7 @@ yarn run test
|
||||

|
||||
|
||||
### Immoscout
|
||||
Immoscout has implemented advanced bot detection. I’m actively working on bypassing these measures, but until then, selecting Immoscout as a provider will not return any results. I apologize for the inconvenience. 😉
|
||||
Immoscout has implemented advanced bot detection. In order to work around this, we are using a reversed engineered version of their mobile api. See [Immoscout Reverse Engineering Documentation](https://github.com/orangecoding/fredy/blob/master/reverse-engineered-immoscout.md)
|
||||
|
||||
# Analytics
|
||||
Fredy is completely free (and will always remain free). However, it would be a huge help if you’d allow me to collect some analytical data.
|
||||
@@ -110,6 +110,10 @@ Put your config.json into a path of your choice, such as `/path/to/your/conf/`.
|
||||
|
||||
Example: `docker create --name fredy -v /path/to/your/conf/:/conf -p 9998:9998 fredy/fredy`
|
||||
|
||||
## Logs
|
||||
|
||||
You can browse the logs with `docker logs fredy -f`.
|
||||
|
||||
### 👐 Contributing
|
||||
Thanks to all the people who already contributed!
|
||||
|
||||
@@ -119,6 +123,7 @@ Thanks to all the people who already contributed!
|
||||
|
||||
See [Contributing](https://github.com/orangecoding/fredy/blob/master/CONTRIBUTING.md)
|
||||
|
||||
## Logs
|
||||
|
||||
You can browse the logs with `docker logs fredy -f`.
|
||||
## Star History
|
||||
|
||||
[](https://www.star-history.com/#orangecoding/fredy&Date)
|
||||
|
||||
@@ -1,118 +1,124 @@
|
||||
import {NoNewListingsWarning} from './errors.js';
|
||||
import {setKnownListings, getKnownListings} from './services/storage/listingsStorage.js';
|
||||
import { NoNewListingsWarning } from './errors.js';
|
||||
import { setKnownListings, getKnownListings } from './services/storage/listingsStorage.js';
|
||||
import * as notify from './notification/notify.js';
|
||||
import Extractor from './services/extractor/extractor.js';
|
||||
import urlModifier from './services/queryStringMutator.js';
|
||||
|
||||
class FredyRuntime {
|
||||
/**
|
||||
*
|
||||
* @param providerConfig the config for the specific provider, we're going to query at the moment
|
||||
* @param notificationConfig the config for all notifications
|
||||
* @param providerId the id of the provider currently in use
|
||||
* @param jobKey key of the job that is currently running (from within the config)
|
||||
* @param similarityCache cache instance holding values to check for similarity of entries
|
||||
*/
|
||||
constructor(providerConfig, notificationConfig, providerId, jobKey, similarityCache) {
|
||||
this._providerConfig = providerConfig;
|
||||
this._notificationConfig = notificationConfig;
|
||||
this._providerId = providerId;
|
||||
this._jobKey = jobKey;
|
||||
this._similarityCache = similarityCache;
|
||||
}
|
||||
/**
|
||||
*
|
||||
* @param providerConfig the config for the specific provider, we're going to query at the moment
|
||||
* @param notificationConfig the config for all notifications
|
||||
* @param providerId the id of the provider currently in use
|
||||
* @param jobKey key of the job that is currently running (from within the config)
|
||||
* @param similarityCache cache instance holding values to check for similarity of entries
|
||||
*/
|
||||
constructor(providerConfig, notificationConfig, providerId, jobKey, similarityCache) {
|
||||
this._providerConfig = providerConfig;
|
||||
this._notificationConfig = notificationConfig;
|
||||
this._providerId = providerId;
|
||||
this._jobKey = jobKey;
|
||||
this._similarityCache = similarityCache;
|
||||
}
|
||||
|
||||
execute() {
|
||||
return (
|
||||
//modify the url to make sure search order is correctly set
|
||||
Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam))
|
||||
//scraping the site and try finding new listings
|
||||
.then(this._getListings.bind(this))
|
||||
//bring them in a proper form (dictated by the provider)
|
||||
.then(this._normalize.bind(this))
|
||||
//filter listings with stuff tagged by the blacklist of the provider
|
||||
.then(this._filter.bind(this))
|
||||
//check if new listings available. if so proceed
|
||||
.then(this._findNew.bind(this))
|
||||
//store everything in db
|
||||
.then(this._save.bind(this))
|
||||
//check for similar listings. if found, remove them before notifying
|
||||
.then(this._filterBySimilarListings.bind(this))
|
||||
//notify the user using the configured notification adapter
|
||||
.then(this._notify.bind(this))
|
||||
//if an error occurred on the way, handle it here.
|
||||
.catch(this._handleError.bind(this))
|
||||
);
|
||||
}
|
||||
execute() {
|
||||
return (
|
||||
//modify the url to make sure search order is correctly set
|
||||
Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam))
|
||||
//scraping the site and try finding new listings
|
||||
.then(this._providerConfig.getListings?.bind(this) ?? this._getListings.bind(this))
|
||||
//bring them in a proper form (dictated by the provider)
|
||||
.then(this._normalize.bind(this))
|
||||
//filter listings with stuff tagged by the blacklist of the provider
|
||||
.then(this._filter.bind(this))
|
||||
//check if new listings available. if so proceed
|
||||
.then(this._findNew.bind(this))
|
||||
//store everything in db
|
||||
.then(this._save.bind(this))
|
||||
//check for similar listings. if found, remove them before notifying
|
||||
.then(this._filterBySimilarListings.bind(this))
|
||||
//notify the user using the configured notification adapter
|
||||
.then(this._notify.bind(this))
|
||||
//if an error occurred on the way, handle it here.
|
||||
.catch(this._handleError.bind(this))
|
||||
);
|
||||
}
|
||||
|
||||
_getListings(url) {
|
||||
const extractor = new Extractor();
|
||||
return new Promise((resolve, reject) => {
|
||||
extractor.execute(url,this._providerConfig.waitForSelector)
|
||||
.then(() => {
|
||||
const listings = extractor.parseResponseText(this._providerConfig.crawlContainer, this._providerConfig.crawlFields);
|
||||
resolve(listings == null ? [] : listings);
|
||||
}).catch(err => {
|
||||
reject(err);
|
||||
/* eslint-disable no-console */
|
||||
console.error(err);
|
||||
/* eslint-enable no-console */
|
||||
});
|
||||
_getListings(url) {
|
||||
const extractor = new Extractor();
|
||||
return new Promise((resolve, reject) => {
|
||||
extractor
|
||||
.execute(url, this._providerConfig.waitForSelector)
|
||||
.then(() => {
|
||||
const listings = extractor.parseResponseText(
|
||||
this._providerConfig.crawlContainer,
|
||||
this._providerConfig.crawlFields,
|
||||
url,
|
||||
);
|
||||
resolve(listings == null ? [] : listings);
|
||||
})
|
||||
.catch((err) => {
|
||||
reject(err);
|
||||
/* eslint-disable no-console */
|
||||
console.error(err);
|
||||
/* eslint-enable no-console */
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
_normalize(listings) {
|
||||
return listings.map(this._providerConfig.normalize);
|
||||
}
|
||||
_normalize(listings) {
|
||||
return listings.map(this._providerConfig.normalize);
|
||||
}
|
||||
|
||||
_filter(listings) {
|
||||
//only return those where all the fields have been found
|
||||
const keys = Object.keys(this._providerConfig.crawlFields);
|
||||
const filteredListings = listings.filter((item) => keys.every((key) => key in item));
|
||||
return filteredListings.filter(this._providerConfig.filter);
|
||||
}
|
||||
_filter(listings) {
|
||||
//only return those where all the fields have been found
|
||||
const keys = Object.keys(this._providerConfig.crawlFields);
|
||||
const filteredListings = listings.filter((item) => keys.every((key) => key in item));
|
||||
return filteredListings.filter(this._providerConfig.filter);
|
||||
}
|
||||
|
||||
_findNew(listings) {
|
||||
const newListings = listings.filter((o) => getKnownListings(this._jobKey, this._providerId)[o.id] == null);
|
||||
if (newListings.length === 0) {
|
||||
throw new NoNewListingsWarning();
|
||||
}
|
||||
return newListings;
|
||||
_findNew(listings) {
|
||||
const newListings = listings.filter((o) => getKnownListings(this._jobKey, this._providerId)[o.id] == null);
|
||||
if (newListings.length === 0) {
|
||||
throw new NoNewListingsWarning();
|
||||
}
|
||||
return newListings;
|
||||
}
|
||||
|
||||
_notify(newListings) {
|
||||
if (newListings.length === 0) {
|
||||
throw new NoNewListingsWarning();
|
||||
}
|
||||
const sendNotifications = notify.send(this._providerId, newListings, this._notificationConfig, this._jobKey);
|
||||
return Promise.all(sendNotifications).then(() => newListings);
|
||||
_notify(newListings) {
|
||||
if (newListings.length === 0) {
|
||||
throw new NoNewListingsWarning();
|
||||
}
|
||||
const sendNotifications = notify.send(this._providerId, newListings, this._notificationConfig, this._jobKey);
|
||||
return Promise.all(sendNotifications).then(() => newListings);
|
||||
}
|
||||
|
||||
_save(newListings) {
|
||||
const currentListings = getKnownListings(this._jobKey, this._providerId) || {};
|
||||
newListings.forEach((listing) => {
|
||||
currentListings[listing.id] = Date.now();
|
||||
});
|
||||
setKnownListings(this._jobKey, this._providerId, currentListings);
|
||||
return newListings;
|
||||
}
|
||||
_save(newListings) {
|
||||
const currentListings = getKnownListings(this._jobKey, this._providerId) || {};
|
||||
newListings.forEach((listing) => {
|
||||
currentListings[listing.id] = Date.now();
|
||||
});
|
||||
setKnownListings(this._jobKey, this._providerId, currentListings);
|
||||
return newListings;
|
||||
}
|
||||
|
||||
_filterBySimilarListings(listings) {
|
||||
const filteredList = listings.filter((listing) => {
|
||||
const similar = this._similarityCache.hasSimilarEntries(this._jobKey, listing.title);
|
||||
if (similar) {
|
||||
/* eslint-disable no-console */
|
||||
console.debug(`Filtering similar entry for job with id ${this._jobKey} with title: `, listing.title);
|
||||
/* eslint-enable no-console */
|
||||
}
|
||||
return !similar;
|
||||
});
|
||||
filteredList.forEach((filter) => this._similarityCache.addCacheEntry(this._jobKey, filter.title));
|
||||
return filteredList;
|
||||
}
|
||||
_filterBySimilarListings(listings) {
|
||||
const filteredList = listings.filter((listing) => {
|
||||
const similar = this._similarityCache.hasSimilarEntries(this._jobKey, listing.title);
|
||||
if (similar) {
|
||||
/* eslint-disable no-console */
|
||||
console.debug(`Filtering similar entry for job with id ${this._jobKey} with title: `, listing.title);
|
||||
/* eslint-enable no-console */
|
||||
}
|
||||
return !similar;
|
||||
});
|
||||
filteredList.forEach((filter) => this._similarityCache.addCacheEntry(this._jobKey, filter.title));
|
||||
return filteredList;
|
||||
}
|
||||
|
||||
_handleError(err) {
|
||||
if (err.name !== 'NoNewListingsWarning') console.error(err);
|
||||
}
|
||||
_handleError(err) {
|
||||
if (err.name !== 'NoNewListingsWarning') console.error(err);
|
||||
}
|
||||
}
|
||||
|
||||
export default FredyRuntime;
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
import restana from 'restana';
|
||||
import fetch from 'node-fetch';
|
||||
import * as jobStorage from '../../services/storage/jobStorage.js';
|
||||
import * as userStorage from '../../services/storage/userStorage.js';
|
||||
import * as immoscoutProvider from '../../provider/immoscout.js';
|
||||
import { config } from '../../utils.js';
|
||||
import { isAdmin } from '../security.js';
|
||||
import {trackDemoJobCreated} from '../../services/tracking/Tracker.js';
|
||||
import { trackDemoJobCreated } from '../../services/tracking/Tracker.js';
|
||||
const service = restana();
|
||||
const jobRouter = service.newRouter();
|
||||
function doesJobBelongsToUser(job, req) {
|
||||
@@ -17,7 +15,7 @@ function doesJobBelongsToUser(job, req) {
|
||||
if (user == null) {
|
||||
return false;
|
||||
}
|
||||
return user.isAdmin || job.userId === job.userId;
|
||||
return user.isAdmin || job.userId === user.id;
|
||||
}
|
||||
jobRouter.get('/', async (req, res) => {
|
||||
const isUserAdmin = isAdmin(req);
|
||||
@@ -28,7 +26,7 @@ jobRouter.get('/', async (req, res) => {
|
||||
jobRouter.get('/processingTimes', async (req, res) => {
|
||||
res.body = {
|
||||
interval: config.interval,
|
||||
lastRun: config.lastRun || null
|
||||
lastRun: config.lastRun || null,
|
||||
};
|
||||
res.send();
|
||||
});
|
||||
@@ -51,7 +49,7 @@ jobRouter.post('/', async (req, res) => {
|
||||
trackDemoJobCreated({
|
||||
name,
|
||||
provider,
|
||||
adapter: notificationAdapter
|
||||
adapter: notificationAdapter,
|
||||
});
|
||||
res.send();
|
||||
});
|
||||
|
||||
@@ -7,9 +7,11 @@ export const send = ({ serviceName, newListings, notificationConfig, jobKey }) =
|
||||
const job = getJob(jobKey);
|
||||
const jobName = job == null ? jobKey : job.name;
|
||||
const promises = newListings.map((newListing) => {
|
||||
const message = `Address: ${newListing.address} Size: ${newListing.size.replace(/2m/g, '$m^2$')} Price: ${
|
||||
newListing.price
|
||||
}`;
|
||||
const message = `
|
||||
Address: ${newListing.address}
|
||||
Size: ${newListing.size.replace(/2m/g, '$m^2$')}
|
||||
Price: ${newListing.price}
|
||||
Link: ${newListing.link}`;
|
||||
return fetch(server, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({
|
||||
|
||||
@@ -1,50 +1,73 @@
|
||||
import { markdown2Html } from '../../services/markdown.js';
|
||||
import { getJob } from '../../services/storage/jobStorage.js';
|
||||
import {markdown2Html} from '../../services/markdown.js';
|
||||
import {getJob} from '../../services/storage/jobStorage.js';
|
||||
import fetch from 'node-fetch';
|
||||
|
||||
export const send = ({ serviceName, newListings, notificationConfig, jobKey }) => {
|
||||
const { token, user, device } = notificationConfig.find((adapter) => adapter.id === config.id).fields;
|
||||
const job = getJob(jobKey);
|
||||
const jobName = job == null ? jobKey : job.name;
|
||||
const promises = newListings.map((newListing) => {
|
||||
const title = `${jobName} at ${serviceName}: ${newListing.title}`;
|
||||
const message = `Address: ${newListing.address}\nSize: ${newListing.size}\nPrice: ${newListing.price}\nLink: ${newListing.link}`;
|
||||
return fetch('https://api.pushover.net/1/messages.json', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
token: token,
|
||||
user: user,
|
||||
message: message,
|
||||
device: device,
|
||||
title: title,
|
||||
}),
|
||||
export const send = ({serviceName, newListings, notificationConfig, jobKey}) => {
|
||||
const {token, user, device} = notificationConfig.find((adapter) => adapter.id === config.id).fields;
|
||||
const job = getJob(jobKey);
|
||||
const jobName = job == null ? jobKey : job.name;
|
||||
const promises = newListings.map((newListing) => {
|
||||
const title = `${jobName} at ${serviceName}: ${newListing.title}`;
|
||||
const message = `Address: ${newListing.address}\nSize: ${newListing.size}\nPrice: ${newListing.price}\nLink: ${newListing.link}`;
|
||||
return fetch('https://api.pushover.net/1/messages.json', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({
|
||||
token: token,
|
||||
user: user,
|
||||
message: message,
|
||||
device: device,
|
||||
title: title,
|
||||
}),
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
return Promise.all(promises);
|
||||
return Promise.all(promises)
|
||||
.then((responses) => {
|
||||
// Convert all responses to JSON
|
||||
return Promise.all(responses.map((response) => response.json()));
|
||||
})
|
||||
.then((data) => {
|
||||
// Check for errors in the data
|
||||
const error = data
|
||||
.map((item) => (item.errors != null && item.errors.length > 0 ? item.errors.join(', ') : null))
|
||||
.filter((err) => err !== null);
|
||||
|
||||
if (error.length > 0) {
|
||||
// Reject with the combined error messages
|
||||
return Promise.reject(error.join('; '));
|
||||
}
|
||||
|
||||
return data;
|
||||
})
|
||||
.then(() => {
|
||||
return Promise.resolve();
|
||||
})
|
||||
.catch((error) => {
|
||||
return Promise.reject(error);
|
||||
});
|
||||
};
|
||||
|
||||
export const config = {
|
||||
id: 'pushover',
|
||||
name: 'Pushover',
|
||||
readme: markdown2Html('lib/notification/adapter/pushover.md'),
|
||||
description: 'Fredy will send new listings to your mobile using Pushover.',
|
||||
fields: {
|
||||
token: {
|
||||
type: 'text',
|
||||
label: 'API token',
|
||||
description: 'Your application\'s API token.',
|
||||
id: 'pushover',
|
||||
name: 'Pushover',
|
||||
readme: markdown2Html('lib/notification/adapter/pushover.md'),
|
||||
description: 'Fredy will send new listings to your mobile using Pushover.',
|
||||
fields: {
|
||||
token: {
|
||||
type: 'text',
|
||||
label: 'API token',
|
||||
description: 'Your application\'s API token.',
|
||||
},
|
||||
user: {
|
||||
type: 'text',
|
||||
label: 'User key',
|
||||
description: 'Your user/group key.',
|
||||
},
|
||||
device: {
|
||||
type: 'text',
|
||||
label: 'Device name',
|
||||
description: 'The device name to send your notification to. Messages may be addressed to multiple specific devices by joining them with a comma.',
|
||||
},
|
||||
},
|
||||
user: {
|
||||
type: 'text',
|
||||
label: 'User key',
|
||||
description: 'Your user/group key.',
|
||||
},
|
||||
device: {
|
||||
type: 'text',
|
||||
label: 'Device name',
|
||||
description: 'The device name to send your notification to. Messages may be addressed to multiple specific devices by joining them with a comma.',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
let appliedBlackList = [];
|
||||
|
||||
/**
|
||||
@@ -26,7 +26,7 @@ const config = {
|
||||
url: null,
|
||||
crawlContainer: 'div[data-testid="serp-core-classified-card-testid"]',
|
||||
sortByDateParam: 'sortby=19',
|
||||
waitForSelector: 'div[data-testid="serp-core-classified-card-testid"]',
|
||||
waitForSelector: 'div[data-testid="serp-gridcontainer-testid"]',
|
||||
crawlFields: {
|
||||
id: 'button@title |trim', // immonet is a piece of sh*t. See comment above
|
||||
title: 'button@title |trim',
|
||||
|
||||
@@ -1,37 +1,109 @@
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
/**
|
||||
* ImmoScout provider using the mobile API to retrieve listings.
|
||||
*
|
||||
* The mobile API provides the following endpoints:
|
||||
* - GET /search/total?{search parameters}: Returns the total number of listings for the given query
|
||||
* Example: `curl -H "User-Agent: ImmoScout24_1410_30_._" https://api.mobile.immobilienscout24.de/search/total?searchType=region&realestatetype=apartmentrent&pricetype=calculatedtotalrent&geocodes=%2Fde%2Fberlin%2Fberlin `
|
||||
*
|
||||
* - POST /search/list?{search parameters}: Actually retrieves the listings. Body is json encoded and contains
|
||||
* data specifying additional results (advertisements) to return. The format is as follows:
|
||||
* ```
|
||||
* {
|
||||
* "supportedResultListTypes": [],
|
||||
* "userData": {}
|
||||
* }
|
||||
* ```
|
||||
* It is not necessary to provide data for the specified keys.
|
||||
*
|
||||
* Example: `curl -X POST 'https://api.mobile.immobilienscout24.de/search/list?pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region&geocodes=%2Fde%2Fberlin%2Fberlin&pagenumber=1' -H "Connection: keep-alive" -H "User-Agent: ImmoScout24_1410_30_._" -H "Accept: application/json" -H "Content-Type: application/json" -d '{"supportedResultListType": [], "userData": {}}'`
|
||||
|
||||
* - GET /expose/{id} - Returns the details of a listing. The response contains additional details not included in the
|
||||
* listing response.
|
||||
*
|
||||
* Example: `curl -H "User-Agent: ImmoScout24_1410_30_._" "https://api.mobile.immobilienscout24.de/expose/158382494"`
|
||||
*
|
||||
*
|
||||
* It is necessary to set the correct User Agent (see `getListings`) in the request header.
|
||||
*
|
||||
* Note that the mobile API is not publicly documented. I've reverse-engineered
|
||||
* it by intercepting traffic from an android emulator running the immoscout app.
|
||||
* Moreover, the search parameters differ slightly from the web API. I've mapped them
|
||||
* to the web API parameters by comparing a search request with all parameters set between
|
||||
* the web and mobile API. The mobile API actually seems to be a superset of the web API,
|
||||
* but I have decided not to include new parameters as I wanted to keep the existing UX (i.e.,
|
||||
* users only have to provide a link to an existing search).
|
||||
*
|
||||
*/
|
||||
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { convertWebToMobile } from '../services/immoscout/immoscout-web-translater.js';
|
||||
let appliedBlackList = [];
|
||||
|
||||
async function getListings(url) {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'User-Agent': 'ImmoScout24_1410_30_._',
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
supportedResultListTypes: [],
|
||||
userData: {},
|
||||
}),
|
||||
});
|
||||
if (!response.ok) {
|
||||
console.error('Error fetching data from ImmoScout Mobile API:', response.statusText);
|
||||
return [];
|
||||
}
|
||||
|
||||
const responseBody = await response.json();
|
||||
return responseBody.resultListItems
|
||||
.filter((item) => item.type === 'EXPOSE_RESULT')
|
||||
.map((expose) => {
|
||||
const item = expose.item;
|
||||
const [price, size] = item.attributes;
|
||||
return {
|
||||
id: item.id,
|
||||
price: price?.value,
|
||||
size: size?.value,
|
||||
title: item.title,
|
||||
link: `${metaInformation.baseUrl}expose/${item.id}`,
|
||||
address: item.address?.line,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function nullOrEmpty(val) {
|
||||
return val == null || val.length === 0;
|
||||
}
|
||||
function normalize(o) {
|
||||
const title = nullOrEmpty(o.title) ? 'NO TITLE FOUND' : o.title.replace('NEU', '');
|
||||
const address = nullOrEmpty(o.address) ? 'NO ADDRESS FOUND' : (o.address || '').replace(/\(.*\),.*$/, '').trim();
|
||||
const link = nullOrEmpty(o.link) ? 'NO LINK' : `https://www.immobilienscout24.de${o.link.substring(o.link.indexOf('/expose'))}`;
|
||||
const id = buildHash(o.id, o.price);
|
||||
return Object.assign(o, { id, title, address, link });
|
||||
return Object.assign(o, { id, title, address });
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
return !utils.isOneOf(o.title, appliedBlackList);
|
||||
}
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#resultListItems li.result-list__listing',
|
||||
sortByDateParam: 'sorting=2',
|
||||
waitForSelector: 'body',
|
||||
crawlFields: {
|
||||
id: '.result-list-entry@data-obid | int',
|
||||
price: '.result-list-entry .result-list-entry__criteria .grid-item:first-child dd | removeNewline | trim',
|
||||
size: '.result-list-entry .result-list-entry__criteria .grid-item:nth-child(2) dd | removeNewline | trim',
|
||||
title: '.result-list-entry .result-list-entry__brand-title-container h2 | removeNewline | trim',
|
||||
link: '.result-list-entry .result-list-entry__brand-title-container@href',
|
||||
address: '.result-list-entry .result-list-entry__map-link',
|
||||
id: 'id',
|
||||
title: 'title',
|
||||
price: 'price',
|
||||
size: 'size',
|
||||
link: 'link',
|
||||
address: 'address',
|
||||
},
|
||||
// Not required - used by filter to remove and listings that failed to parse
|
||||
sortByDateParam: 'sorting=-firstactivation',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
getListings: getListings,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
config.url = convertWebToMobile(sourceConfig.url);
|
||||
appliedBlackList = blacklist || [];
|
||||
};
|
||||
export const metaInformation = {
|
||||
@@ -39,4 +111,5 @@ export const metaInformation = {
|
||||
baseUrl: 'https://www.immobilienscout24.de/',
|
||||
id: 'immoscout',
|
||||
};
|
||||
|
||||
export { config };
|
||||
|
||||
@@ -1,48 +1,48 @@
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
const size = o.size || 'N/A m²';
|
||||
const price = (o.price || '--- €').replace('Preis auf Anfrage', '--- €');
|
||||
const title = o.title || 'No title available';
|
||||
const immoId = o.id.substring(o.id.indexOf('-') + 1, o.id.length);
|
||||
const link = `https://immo.swp.de/immobilien/${immoId}`;
|
||||
const description = o.description;
|
||||
const id = buildHash(immoId, price);
|
||||
return Object.assign(o, {id, price, size, title, link, description});
|
||||
const size = o.size || 'N/A m²';
|
||||
const price = (o.price || '--- €').replace('Preis auf Anfrage', '--- €');
|
||||
const title = o.title || 'No title available';
|
||||
const immoId = o.id.substring(o.id.indexOf('-') + 1, o.id.length);
|
||||
const link = `https://immo.swp.de/immobilien/${immoId}`;
|
||||
const description = o.description;
|
||||
const id = buildHash(immoId, price);
|
||||
return Object.assign(o, { id, price, size, title, link, description });
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.js-serp-item',
|
||||
sortByDateParam: 's=most_recently_updated_first',
|
||||
waitForSelector: 'body',
|
||||
crawlFields: {
|
||||
id: '.js-bookmark-btn@data-id',
|
||||
price: 'div.align-items-start div:first-child | trim',
|
||||
size: 'div.align-items-start div:nth-child(3) | trim',
|
||||
title: '.card-title h2 | trim',
|
||||
link: '.ci-search-result__link@href',
|
||||
description: '.js-show-more-item-sm | removeNewline | trim',
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
url: null,
|
||||
crawlContainer: '.js-serp-item',
|
||||
sortByDateParam: 's=most_recently_updated_first',
|
||||
waitForSelector: 'body',
|
||||
crawlFields: {
|
||||
id: '.js-bookmark-btn@data-id',
|
||||
price: 'div.align-items-start div:first-child | trim',
|
||||
size: 'div.align-items-start div:nth-child(3) | trim',
|
||||
title: '.js-item-title-link@title | trim',
|
||||
link: '.ci-search-result__link@href',
|
||||
description: '.js-show-more-item-sm | removeNewline | trim',
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
};
|
||||
export const metaInformation = {
|
||||
name: 'Immo Südwest Presse',
|
||||
baseUrl: 'https://immo.swp.de/',
|
||||
id: 'immoswp',
|
||||
name: 'Immo Südwest Presse',
|
||||
baseUrl: 'https://immo.swp.de/',
|
||||
id: 'immoswp',
|
||||
};
|
||||
export {config};
|
||||
export { config };
|
||||
|
||||
@@ -18,12 +18,12 @@ const config = {
|
||||
crawlContainer:
|
||||
'div[data-testid="serp-core-scrollablelistview-testid"]:not(div[data-testid="serp-enlargementlist-testid"] div[data-testid="serp-card-testid"]) div[data-testid="serp-core-classified-card-testid"]',
|
||||
sortByDateParam: 'order=DateDesc',
|
||||
waitForSelector: 'div[data-testid="cardmfe-price-testid"]',
|
||||
waitForSelector: 'div[data-testid="serp-gridcontainer-testid"]',
|
||||
crawlFields: {
|
||||
id: 'a@href',
|
||||
price: 'div[data-testid="cardmfe-price-testid"] | removeNewline | trim',
|
||||
size: 'div[data-testid="cardmfe-keyfacts-testid"] | removeNewline | trim',
|
||||
title: '.css-1cbj9xw',
|
||||
title: 'div[data-testid="cardmfe-description-box-text-test-id"] > div:nth-of-type(2)',
|
||||
link: 'a@href',
|
||||
address: 'div[data-testid="cardmfe-description-box-address"] | removeNewline | trim',
|
||||
},
|
||||
|
||||
@@ -15,7 +15,7 @@ function applyBlacklist(o) {
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const isBlacklistedDistrict =
|
||||
appliedBlacklistedDistricts.length === 0 ? false : utils.isOneOf(o.description, appliedBlacklistedDistricts);
|
||||
return !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
|
||||
return o.title != null && !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const config = {
|
||||
|
||||
@@ -4,7 +4,8 @@ let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
const id = buildHash(o.id, o.price);
|
||||
return Object.assign(o, {id});
|
||||
const link = `https://www.wg-gesucht.de${o.link}`;
|
||||
return Object.assign(o, { id, link });
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
|
||||
@@ -1,29 +1,37 @@
|
||||
import { setInterval } from 'node:timers';
|
||||
import {removeJobsByUserName} from './storage/jobStorage.js';
|
||||
import {config} from '../utils.js';
|
||||
import { removeJobsByUserName } from './storage/jobStorage.js';
|
||||
import { config } from '../utils.js';
|
||||
import { getUsers } from './storage/userStorage.js';
|
||||
|
||||
/**
|
||||
* if we are running in demo environment, we have to cleanup the db files (specifically the jobs table)
|
||||
*/
|
||||
export function cleanupDemoAtMidnight() {
|
||||
const now = new Date();
|
||||
const millisUntilMidnightUTC = (24 - now.getUTCHours()) * 60 * 60 * 1000
|
||||
- now.getUTCMinutes() * 60 * 1000
|
||||
- now.getUTCSeconds() * 1000
|
||||
- now.getUTCMilliseconds();
|
||||
const now = new Date();
|
||||
const millisUntilMidnightUTC =
|
||||
(24 - now.getUTCHours()) * 60 * 60 * 1000 -
|
||||
now.getUTCMinutes() * 60 * 1000 -
|
||||
now.getUTCSeconds() * 1000 -
|
||||
now.getUTCMilliseconds();
|
||||
|
||||
setTimeout(() => {
|
||||
cleanup();
|
||||
setTimeout(() => {
|
||||
setInterval(
|
||||
() => {
|
||||
cleanup();
|
||||
|
||||
setInterval(() => {
|
||||
cleanup();
|
||||
}, 24 * 60 * 60 * 1000);
|
||||
|
||||
}, millisUntilMidnightUTC);
|
||||
},
|
||||
24 * 60 * 60 * 1000,
|
||||
);
|
||||
}, millisUntilMidnightUTC);
|
||||
}
|
||||
|
||||
function cleanup(){
|
||||
if(config.demoMode){
|
||||
removeJobsByUserName('demo');
|
||||
function cleanup() {
|
||||
if (config.demoMode) {
|
||||
const demoUser = getUsers(false).find((user) => user.username === 'demo');
|
||||
if (demoUser == null) {
|
||||
console.error('Demo user not found, cannot remove Jobs');
|
||||
return;
|
||||
}
|
||||
}
|
||||
removeJobsByUserName(demoUser.id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,45 +1,43 @@
|
||||
import {setDebug} from './utils.js';
|
||||
import { setDebug } from './utils.js';
|
||||
import puppeteerExtractor from './puppeteerExtractor.js';
|
||||
import {loadParser, parse} from './parser/parser.js';
|
||||
import { loadParser, parse } from './parser/parser.js';
|
||||
|
||||
const DEFAULT_OPTIONS = {
|
||||
debug: false,
|
||||
puppeteerTimeout: 20_000,
|
||||
puppeteerHeadless: true
|
||||
|
||||
debug: false,
|
||||
puppeteerTimeout: 60_000,
|
||||
puppeteerHeadless: true,
|
||||
};
|
||||
|
||||
export default class Extractor {
|
||||
constructor(options) {
|
||||
this.options = {
|
||||
...DEFAULT_OPTIONS,
|
||||
...options
|
||||
};
|
||||
this.responseText = null;
|
||||
setDebug(this.options);
|
||||
constructor(options) {
|
||||
this.options = {
|
||||
...DEFAULT_OPTIONS,
|
||||
...options,
|
||||
};
|
||||
this.responseText = null;
|
||||
setDebug(this.options);
|
||||
}
|
||||
|
||||
/**
|
||||
* if you are extracting data from a SPA, you must provide a selector, otherwise
|
||||
* your response will never contain what you are really looking for
|
||||
* @param url
|
||||
* @param waitForSelector
|
||||
*/
|
||||
execute = async (url, waitForSelector = null) => {
|
||||
this.responseText = null;
|
||||
try {
|
||||
this.responseText = await puppeteerExtractor(url, waitForSelector, this.options);
|
||||
if (this.responseText != null) {
|
||||
loadParser(this.responseText);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error trying to load page.', error);
|
||||
}
|
||||
return this;
|
||||
};
|
||||
|
||||
/**
|
||||
* if you are extracting data from a SPA, you must provide a selector, otherwise
|
||||
* your response will never contain what you are really looking for
|
||||
* @param url
|
||||
* @param waitForSelector
|
||||
*/
|
||||
execute = async (url, waitForSelector = null) => {
|
||||
this.responseText = null;
|
||||
try {
|
||||
this.responseText = await puppeteerExtractor(url, waitForSelector, this.options);
|
||||
if(this.responseText != null) {
|
||||
loadParser(this.responseText);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error trying to load page.', error);
|
||||
}
|
||||
return this;
|
||||
};
|
||||
|
||||
|
||||
parseResponseText = (crawlContainer, crawlFields) => {
|
||||
return parse(crawlContainer, crawlFields, this.responseText);
|
||||
};
|
||||
parseResponseText = (crawlContainer, crawlFields, url) => {
|
||||
return parse(crawlContainer, crawlFields, this.responseText, url);
|
||||
};
|
||||
}
|
||||
|
||||
@@ -3,92 +3,95 @@ import * as cheerio from 'cheerio';
|
||||
let $ = null;
|
||||
|
||||
export function loadParser(text) {
|
||||
$ = cheerio.load(text);
|
||||
$ = cheerio.load(text);
|
||||
}
|
||||
|
||||
export function parse(crawlContainer, crawlFields, text) {
|
||||
if (!text) {
|
||||
console.warn('Cannot parse, text was empty.');
|
||||
return null;
|
||||
}
|
||||
export function parse(crawlContainer, crawlFields, text, url) {
|
||||
if (!text) {
|
||||
console.warn('No content found for ', url);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!crawlContainer || !crawlFields) {
|
||||
console.warn('Cannot parse, selector was empty.');
|
||||
return null;
|
||||
}
|
||||
if (!crawlContainer || !crawlFields) {
|
||||
console.warn('Cannot parse, selector was empty for url ', url);
|
||||
return null;
|
||||
}
|
||||
|
||||
const result = [];
|
||||
const result = [];
|
||||
|
||||
if ($(crawlContainer).length === 0) {
|
||||
console.error('No elements in crawl container found!');
|
||||
}
|
||||
if ($(crawlContainer).length === 0) {
|
||||
console.warn('No elements in crawl container found for url ', url);
|
||||
return null;
|
||||
}
|
||||
|
||||
$(crawlContainer).each((_, element) => {
|
||||
const container = $(element);
|
||||
const parsedObject = {};
|
||||
$(crawlContainer).each((_, element) => {
|
||||
const container = $(element);
|
||||
const parsedObject = {};
|
||||
|
||||
// Parse fields based on crawlFields
|
||||
for (const [key, fieldSelector] of Object.entries(crawlFields)) {
|
||||
let value;
|
||||
// Parse fields based on crawlFields
|
||||
for (const [key, fieldSelector] of Object.entries(crawlFields)) {
|
||||
let value;
|
||||
|
||||
try {
|
||||
try {
|
||||
const selector = fieldSelector.includes('|')
|
||||
? fieldSelector.substring(0, fieldSelector.indexOf('|')).trim()
|
||||
: fieldSelector;
|
||||
|
||||
const selector = fieldSelector.includes('|') ? fieldSelector.substring(0, fieldSelector.indexOf('|')).trim() : fieldSelector;
|
||||
|
||||
if (selector.includes('@')) {
|
||||
const [sel, attr] = selector.split('@');
|
||||
if (sel.length === 0) {
|
||||
value = container.attr(attr.trim());
|
||||
} else {
|
||||
value = container.find(sel.trim()).attr(attr.trim());
|
||||
}
|
||||
} else {
|
||||
value = container.find(selector.trim()).text();
|
||||
}
|
||||
|
||||
// Apply modifiers if specified
|
||||
if (fieldSelector.includes('|')) {
|
||||
const [_, ...modifiers] = fieldSelector.split('|').map(s => s.trim());
|
||||
value = applyModifiers(value, modifiers);
|
||||
}
|
||||
|
||||
parsedObject[key] = value || null;
|
||||
} catch (error) {
|
||||
console.error(`Error parsing field '${key}' with selector '${fieldSelector}':`, error);
|
||||
parsedObject[key] = null;
|
||||
}
|
||||
}
|
||||
|
||||
if (parsedObject.id != null) {
|
||||
result.push(parsedObject);
|
||||
if (selector.includes('@')) {
|
||||
const [sel, attr] = selector.split('@');
|
||||
if (sel.length === 0) {
|
||||
value = container.attr(attr.trim());
|
||||
} else {
|
||||
value = container.find(sel.trim()).attr(attr.trim());
|
||||
}
|
||||
} else {
|
||||
console.warn('ID not found. Not relaying object.');
|
||||
value = container.find(selector.trim()).text();
|
||||
}
|
||||
});
|
||||
|
||||
return result;
|
||||
// Apply modifiers if specified
|
||||
if (fieldSelector.includes('|')) {
|
||||
/* eslint-disable no-unused-vars */
|
||||
const [_, ...modifiers] = fieldSelector.split('|').map((s) => s.trim());
|
||||
/* eslint-disable no-unused-vars */
|
||||
value = applyModifiers(value, modifiers);
|
||||
}
|
||||
|
||||
parsedObject[key] = value || null;
|
||||
} catch (error) {
|
||||
console.error(`Error parsing field '${key}' with selector '${fieldSelector}':`, error);
|
||||
parsedObject[key] = null;
|
||||
}
|
||||
}
|
||||
|
||||
if (parsedObject.id != null) {
|
||||
result.push(parsedObject);
|
||||
} else {
|
||||
console.warn('ID not found. Not relaying object.');
|
||||
}
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Helper function to apply modifiers
|
||||
function applyModifiers(value, modifiers) {
|
||||
if (!value) return value;
|
||||
if (!value) return value;
|
||||
|
||||
modifiers.forEach(modifier => {
|
||||
switch (modifier) {
|
||||
case 'int':
|
||||
value = parseInt(value, 10);
|
||||
break;
|
||||
case 'trim':
|
||||
value = value.replace(/\s+/g, ' ').trim();
|
||||
break;
|
||||
case 'removeNewline':
|
||||
value = value.replace(/\n/g, ' ');
|
||||
break;
|
||||
default:
|
||||
console.warn(`Unknown modifier: ${modifier}`);
|
||||
}
|
||||
});
|
||||
modifiers.forEach((modifier) => {
|
||||
switch (modifier) {
|
||||
case 'int':
|
||||
value = parseInt(value, 10);
|
||||
break;
|
||||
case 'trim':
|
||||
value = value.replace(/\s+/g, ' ').trim();
|
||||
break;
|
||||
case 'removeNewline':
|
||||
value = value.replace(/\n/g, ' ');
|
||||
break;
|
||||
default:
|
||||
console.warn(`Unknown modifier: ${modifier}`);
|
||||
}
|
||||
});
|
||||
|
||||
return value;
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,48 +1,49 @@
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import {debug, DEFAULT_HEADER, botDetected} from './utils.js';
|
||||
import { debug, DEFAULT_HEADER, botDetected } from './utils.js';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
export default async function execute(url, waitForSelector, options) {
|
||||
let browser;
|
||||
try {
|
||||
debug(`Sending request to ${url} using Puppeteer.`);
|
||||
let browser;
|
||||
try {
|
||||
debug(`Sending request to ${url} using Puppeteer.`);
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: options.puppeteerHeadless ?? true,
|
||||
args: ['--no-sandbox', '--disable-gpu', '--disable-setuid-sandbox']
|
||||
});
|
||||
let page = await browser.newPage();
|
||||
await page.setExtraHTTPHeaders(DEFAULT_HEADER);
|
||||
const response = await page.goto(url, {
|
||||
waitUntil: 'domcontentloaded'
|
||||
});
|
||||
let pageSource;
|
||||
//if we're extracting data from a spa, we must wait for the selector
|
||||
if (waitForSelector != null) {
|
||||
await page.waitForSelector(waitForSelector);
|
||||
pageSource = await page.evaluate(selector => {
|
||||
return document.querySelector(selector).innerHTML;
|
||||
}, waitForSelector);
|
||||
} else {
|
||||
pageSource = await page.content();
|
||||
}
|
||||
|
||||
const statusCode = response.status();
|
||||
|
||||
if (botDetected(pageSource, statusCode)) {
|
||||
console.warn('We have been detected as a bot :-/ Tried url: => ', url);
|
||||
return null;
|
||||
}
|
||||
|
||||
return await page.content();
|
||||
} catch (error) {
|
||||
console.error('Error executing with puppeteer executor', error);
|
||||
return null;
|
||||
} finally {
|
||||
if (browser != null) {
|
||||
await browser.close();
|
||||
}
|
||||
browser = await puppeteer.launch({
|
||||
headless: options.puppeteerHeadless ?? true,
|
||||
args: ['--no-sandbox', '--disable-gpu', '--disable-setuid-sandbox'],
|
||||
timeout: options.puppeteerTimeout || 30_000,
|
||||
});
|
||||
let page = await browser.newPage();
|
||||
await page.setExtraHTTPHeaders(DEFAULT_HEADER);
|
||||
const response = await page.goto(url, {
|
||||
waitUntil: 'domcontentloaded',
|
||||
});
|
||||
let pageSource;
|
||||
//if we're extracting data from a spa, we must wait for the selector
|
||||
if (waitForSelector != null) {
|
||||
await page.waitForSelector(waitForSelector);
|
||||
pageSource = await page.evaluate((selector) => {
|
||||
return document.querySelector(selector).innerHTML;
|
||||
}, waitForSelector);
|
||||
} else {
|
||||
pageSource = await page.content();
|
||||
}
|
||||
}
|
||||
|
||||
const statusCode = response.status();
|
||||
|
||||
if (botDetected(pageSource, statusCode)) {
|
||||
console.warn('We have been detected as a bot :-/ Tried url: => ', url);
|
||||
return null;
|
||||
}
|
||||
|
||||
return await page.content();
|
||||
} catch (error) {
|
||||
console.error('Error executing with puppeteer executor', error);
|
||||
return null;
|
||||
} finally {
|
||||
if (browser != null) {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,35 +1,32 @@
|
||||
let debuggingOn = false;
|
||||
|
||||
export const DEFAULT_HEADER = {
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
|
||||
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
Connection: 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
||||
};
|
||||
|
||||
export const setDebug = options => {
|
||||
debuggingOn = !!options?.debug;
|
||||
export const setDebug = (options) => {
|
||||
debuggingOn = !!options?.debug;
|
||||
};
|
||||
|
||||
export const debug = (message) => {
|
||||
if(debuggingOn) {
|
||||
console.debug(message);
|
||||
}
|
||||
if (debuggingOn) {
|
||||
/* eslint-disable no-console */
|
||||
console.debug(message);
|
||||
/* eslint-enable no-console */
|
||||
}
|
||||
};
|
||||
|
||||
export const botDetected = (pageSource, statusCode) => {
|
||||
const suspiciousStatusCodes = [
|
||||
403, 429
|
||||
];
|
||||
const botDetectionPatterns = [
|
||||
/verify you are human/i,
|
||||
/access denied/i,
|
||||
/x-amz-cf-id/i,
|
||||
];
|
||||
const suspiciousStatusCodes = [403, 429];
|
||||
const botDetectionPatterns = [/verify you are human/i, /access denied/i, /x-amz-cf-id/i];
|
||||
|
||||
const detectedInSource = botDetectionPatterns.some(pattern => pattern.test(pageSource));
|
||||
const detectedByStatus = suspiciousStatusCodes.includes(statusCode);
|
||||
const detectedInSource = botDetectionPatterns.some((pattern) => pattern.test(pageSource));
|
||||
const detectedByStatus = suspiciousStatusCodes.includes(statusCode);
|
||||
|
||||
return detectedInSource || detectedByStatus;
|
||||
};
|
||||
return detectedInSource || detectedByStatus;
|
||||
};
|
||||
|
||||
195
lib/services/immoscout/immoscout-web-translater.js
Normal file
195
lib/services/immoscout/immoscout-web-translater.js
Normal file
@@ -0,0 +1,195 @@
|
||||
/*
|
||||
Rent a flat
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?numberofrooms=1.0-10000.0&price=1.0-10000.0&livingspace=10.0-10000.0&pricetype=rentpermonth&enteredFrom=result_list
|
||||
*/
|
||||
|
||||
/*
|
||||
Rent a flat:
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?enteredFrom=one_step_search
|
||||
Mobile:
|
||||
https://api.mobile.immobilienscout24.de/search/list?numberofrooms=1.5-&searchId=d7c127d8-6630-49e8-a1dd-5ae04dad454d&sorting=standard&pagesize=20&livingspace=10-500&pagenumber=1&realestatetype=apartmentrent&priceType=calculatedtotalrent&price=1-10000&publishedafter=2025-05-14T09:11:54&channel=is24&searchType=region&geocodes=/de/nordrhein-westfalen/duesseldorf&features=adKeysAndStringValues,virtualTour,contactDetails,viareporting,nextgen,calculatedTotalRent,listingsInListFirstSummary,xxlListingType,quickfilters,grouping,projectsInAllRealestateTypes,fairPrice
|
||||
*/
|
||||
|
||||
/*
|
||||
Rent a house:
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/haus-mieten?enteredFrom=one_step_search
|
||||
Mobile:
|
||||
https://api.mobile.immobilienscout24.de/search/map/v3?publishedafter=2025-05-14T09:12:49&pagenumber=1&searchType=region&geocodes=/de/nordrhein-westfalen/duesseldorf&realEstateType=houserent&pagesize=300&features=disableNHBGrouping,nextGen,fairPrice,listingsInListFirstSummary,xxlListingType,contactDetails&sorting=standard
|
||||
*/
|
||||
|
||||
/*
|
||||
buy a flat
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-kaufen?numberofrooms=1.0-10000.0&price=1.0-10000.0&livingspace=1.0-10000.0&enteredFrom=result_list
|
||||
Mobile:
|
||||
https://api.mobile.immobilienscout24.de/search/map/v3?features=disableNHBGrouping,nextGen,fairPrice,listingsInListFirstSummary,xxlListingType,contactDetails&sorting=standard&realEstateType=apartmentbuy&pagesize=300&pagenumber=1&geocodes=/de/nordrhein-westfalen/duesseldorf&publishedafter=2025-05-14T09:14:43&searchType=region
|
||||
*/
|
||||
|
||||
/*
|
||||
Buy a house
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/haus-kaufen?numberofrooms=1.0-10000.0&price=1.0-10000.0E7&livingspace=1.0-10000.0&enteredFrom=result_list
|
||||
Mobile:
|
||||
https://api.mobile.immobilienscout24.de/search/map/v3?geocodes=/de/nordrhein-westfalen/duesseldorf&features=disableNHBGrouping,nextGen,fairPrice,listingsInListFirstSummary,xxlListingType,contactDetails&searchType=region&realEstateType=housebuy&pagenumber=1&pagesize=300&sorting=standard&publishedafter=2025-05-14T09:16:28
|
||||
*/
|
||||
|
||||
/*
|
||||
Buy a house only in parts of a city
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/haus-kaufen?numberofrooms=1.0-10000.0&price=1.0-10000.0E7&livingspace=1.0-10000.0&geocodes=1276010037,1276010014,1276010012&enteredFrom=result_list
|
||||
Mobile:
|
||||
https://api.mobile.immobilienscout24.de/search/list?pagesize=20&pagenumber=1&features=adKeysAndStringValues,virtualTour,contactDetails,viareporting,grouping,nextgen,listingsInListFirstSummary,xxlListingType,quickfilters,fairPrice&sorting=standard&channel=is24&geocodes=/de/nordrhein-westfalen/duesseldorf/stadtbezirk-1&searchType=region&realestatetype=housebuy&publishedafter=2025-05-14T09:17:23
|
||||
*/
|
||||
|
||||
/*
|
||||
Buy a house with radius
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/radius/haus-kaufen?centerofsearchaddress=D%C3%BCsseldorf%3B%3B%3B%3B%3B%3B&numberofrooms=1.0-10000.0&price=1.0-1.0E7&livingspace=1.0-10000.0&geocoordinates=51.22496%3B6.77567%3B5.0&enteredFrom=result_list
|
||||
Mobile:
|
||||
https://api.mobile.immobilienscout24.de/home/search/total?pagenumber=1&pagesize=1&geocoordinates=51.224960;6.775670;4.0&sorting=standard&searchType=radius&features=adKeysAndStringValues,virtualTour,contactDetails,grouping,nextgen,listingsInListFirstSummary,xxlListingType,fairPrice&channel=is24&realestatetype=housebuy&publishedafter=2025-05-14T09:19:43
|
||||
*/
|
||||
|
||||
/*
|
||||
Buy a house with shape
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/shape/haus-kaufen?shape=eW1yd0hpZGloQGBJa1NfQWFsQG9Uc1ZvVmlDbHdAZ2BAaEBjfEB5U3NWY2NCa0RvWmpwQG1KYGdCeldqU3Z4QGBAbENvQmJWaGtA&numberofrooms=1.0-100000.0&price=1.0-1.0E7&livingspace=1.0-100000.0&enteredFrom=result_list#/
|
||||
Mobile:
|
||||
https://api.mobile.immobilienscout24.de/search/map/v3?features=disableNHBGrouping,nextGen,fairPrice,listingsInListFirstSummary,xxlListingType,contactDetails&publishedafter=2025-05-14T09:19:43&sorting=standard&pagesize=300&searchType=shape&realEstateType=housebuy&pagenumber=1&shape=%7D%7BjwHy%7Cqh@jCKdCgAvB_BdB%7DBzAaCjAqCfAqC~@uCt@iCh@eCZkCLyC?_EO%7DEa@%7DEa@iE_@%7BD%5DaDe@gDi@gDo@uCu@kBcB_AeDOiE?iDCgCMuBOkDCkG?yFRgD%60@cB%5C%7BA%60@eBx@aB%7C@kAbAy@rAe@bBUxCAhE?dFh@fGlAzGbBbHlBxGdB%60FrAhDz@xBh@nAf@l@RNNXkCkMJR~B%7CEnCpErCnDtClCvC~ApCh@rCJpC?
|
||||
*/
|
||||
import queryString from 'query-string';
|
||||
|
||||
const PARAM_NAME_MAP = {
|
||||
heatingtypes: 'heatingtypes',
|
||||
haspromotion: 'haspromotion',
|
||||
numberofrooms: 'numberofrooms',
|
||||
livingspace: 'livingspace',
|
||||
energyefficiencyclasses: 'energyefficiencyclasses',
|
||||
exclusioncriteria: 'exclusioncriteria',
|
||||
equipment: 'equipment',
|
||||
petsallowedtypes: 'petsallowedtypes',
|
||||
price: 'price',
|
||||
constructionyear: 'constructionyear',
|
||||
apartmenttypes: 'apartmenttypes',
|
||||
pricetype: 'pricetype',
|
||||
floor: 'floor',
|
||||
geocodes: 'geocodes',
|
||||
geocoordinates: 'geocoordinates',
|
||||
shape: 'shape',
|
||||
sorting: 'sorting',
|
||||
newbuilding: 'newbuilding',
|
||||
};
|
||||
|
||||
const EQUIPMENT_MAP = {
|
||||
parking: 'parking',
|
||||
cellar: 'cellar',
|
||||
builtinkitchen: 'builtInKitchen',
|
||||
lift: 'lift',
|
||||
garden: 'garden',
|
||||
guesttoilet: 'guestToilet',
|
||||
balcony: 'balcony',
|
||||
handicappedaccessible: 'handicappedAccessible',
|
||||
};
|
||||
|
||||
const REAL_ESTATE_TYPE = {
|
||||
'haus-mieten': 'houserent',
|
||||
'wohnung-mieten': 'apartmentrent',
|
||||
'wohnung-kaufen': 'apartmentbuy',
|
||||
'haus-kaufen': 'housebuy',
|
||||
};
|
||||
|
||||
const WEB_PATH_TO_APARTMENT_EQUIPMENT_MAP = {
|
||||
// Category "Balkon/Terrasse"
|
||||
'wohnung-mit-balkon-mieten': { equipment: ['balcony'] },
|
||||
'wohnung-mit-garten-mieten': { equipment: ['garden'] },
|
||||
// Category "Wohnungstyp"
|
||||
'souterrainwohnung-mieten': { apartmenttypes: ['halfbasement'] },
|
||||
'erdgeschosswohnung-mieten': { apartmenttypes: ['groundfloor'] },
|
||||
'hochparterrewohnung-mieten': { apartmenttypes: ['raisedgroundfloor'] },
|
||||
'etagenwohnung-mieten': { apartmenttypes: ['apartment'] },
|
||||
'loft-mieten': { apartmenttypes: ['loft'] },
|
||||
'maisonette-mieten': { apartmenttypes: ['maisonette'] },
|
||||
'terrassenwohnung-mieten': { apartmenttypes: ['terracedflat'] },
|
||||
'penthouse-mieten': { apartmenttypes: ['penthouse'] },
|
||||
'dachgeschosswohnung-mieten': { apartmenttypes: ['roofstorey'] },
|
||||
// Category "Ausstattung"
|
||||
'wohnung-mit-garage-mieten': { equipment: ['parking'] },
|
||||
'wohnung-mit-einbaukueche-mieten': { equipment: ['builtinkitchen'] },
|
||||
'wohnung-mit-keller-mieten': { equipment: ['cellar'] },
|
||||
// Category "Merkmale"
|
||||
'neubauwohnung-mieten': { newbuilding: true },
|
||||
'barrierefreie-wohnung-mieten': { equipment: ['handicappedaccessible'] },
|
||||
};
|
||||
|
||||
export function convertWebToMobile(webUrl) {
|
||||
let url;
|
||||
try {
|
||||
url = new URL(webUrl);
|
||||
} catch {
|
||||
throw new Error(`Invalid URL: ${webUrl}`);
|
||||
}
|
||||
|
||||
const segments = url.pathname.split('/');
|
||||
if (segments[1] !== 'Suche') {
|
||||
throw new Error(`Unexpected path format: ${url.pathname}. We're expecting to see "/Suche" in the path.`);
|
||||
}
|
||||
|
||||
const realTypeKey = segments.at(-1);
|
||||
let realType = REAL_ESTATE_TYPE[realTypeKey];
|
||||
let additionalParamsFromWebPath;
|
||||
|
||||
if (!realType) {
|
||||
// Test for seo optimized apartment path (only used on the ImmoScout web app)
|
||||
if (WEB_PATH_TO_APARTMENT_EQUIPMENT_MAP[realTypeKey]) {
|
||||
additionalParamsFromWebPath = WEB_PATH_TO_APARTMENT_EQUIPMENT_MAP[realTypeKey];
|
||||
realType = REAL_ESTATE_TYPE['wohnung-mieten'];
|
||||
} else {
|
||||
throw new Error(`Real estate type not found: ${realTypeKey}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (segments.includes('shape')) {
|
||||
throw new Error('Shape is currently not supported using Immoscout');
|
||||
}
|
||||
|
||||
const { query: rawParams } = queryString.parseUrl(webUrl, { arrayFormat: 'comma' });
|
||||
const webParams = Object.fromEntries(
|
||||
Object.entries(rawParams).filter(([key]) => key !== 'enteredFrom' && PARAM_NAME_MAP[key]),
|
||||
);
|
||||
|
||||
const geocodes = `/${segments.slice(2, 5).join('/')}`;
|
||||
const isRadius = segments.includes('radius');
|
||||
const mobileParams = {
|
||||
searchType: isRadius ? 'radius' : 'region',
|
||||
realestatetype: realType,
|
||||
...(isRadius ? {} : { geocodes }),
|
||||
...additionalParamsFromWebPath,
|
||||
};
|
||||
|
||||
if (webParams.geocoordinates) {
|
||||
mobileParams.geocoordinates = webParams.geocoordinates;
|
||||
}
|
||||
|
||||
for (const [key, val] of Object.entries(webParams)) {
|
||||
if (key === 'equipment') {
|
||||
const items = [].concat(val).flatMap((v) => `${v}`.split(','));
|
||||
const currentEquipmentParams = mobileParams[PARAM_NAME_MAP[key]];
|
||||
mobileParams[PARAM_NAME_MAP[key]] = [
|
||||
...(currentEquipmentParams ?? []),
|
||||
...items.map((item) => EQUIPMENT_MAP[item.toLowerCase()]).filter(Boolean),
|
||||
];
|
||||
} else {
|
||||
mobileParams[PARAM_NAME_MAP[key]] = val;
|
||||
}
|
||||
}
|
||||
|
||||
const mobileQuery = queryString.stringify(mobileParams, {
|
||||
arrayFormat: 'comma',
|
||||
encode: true,
|
||||
skipEmptyString: true,
|
||||
});
|
||||
|
||||
return `https://api.mobile.immobilienscout24.de/search/list?${mobileQuery}`;
|
||||
}
|
||||
@@ -11,7 +11,6 @@ const db = new LowdashAdapter(adapter, { jobs: [] });
|
||||
|
||||
db.read();
|
||||
|
||||
|
||||
export const upsertJob = ({ jobId, name, blacklist = [], enabled = true, provider, notificationAdapter, userId }) => {
|
||||
const currentJob =
|
||||
jobId == null
|
||||
@@ -77,16 +76,25 @@ export const removeJobsByUserId = (userId) => {
|
||||
.value();
|
||||
db.write();
|
||||
};
|
||||
export const removeJobsByUserName = (userName) => {
|
||||
export const removeJobsByUserName = (userId) => {
|
||||
let removedDemoJobs = 0;
|
||||
db.chain
|
||||
.get('jobs')
|
||||
.filter((job) => job.username === userName)
|
||||
.forEach((job) => listingStorage.removeListings(job.id));
|
||||
.get('jobs')
|
||||
.filter((job) => job.userId === userId)
|
||||
.forEach((job) => {
|
||||
removedDemoJobs++;
|
||||
listingStorage.removeListings(job.id);
|
||||
});
|
||||
db.chain
|
||||
.get('jobs')
|
||||
.remove((job) => job.username === userName)
|
||||
.value();
|
||||
.get('jobs')
|
||||
.remove((job) => job.userId === userId)
|
||||
.value();
|
||||
db.write();
|
||||
if (removedDemoJobs > 0) {
|
||||
/* eslint-disable no-console */
|
||||
console.log(`Removed ${removedDemoJobs} demo jobs`);
|
||||
/* eslint-enable no-console */
|
||||
}
|
||||
};
|
||||
export const getJobs = () => {
|
||||
return db.chain
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { JSONFileSync } from 'lowdb/node';
|
||||
import {config, getDirName} from '../../utils.js';
|
||||
import { config, getDirName } from '../../utils.js';
|
||||
import * as hasher from '../security/hash.js';
|
||||
import { nanoid } from 'nanoid';
|
||||
import * as jobStorage from './jobStorage.js';
|
||||
@@ -7,23 +7,23 @@ import path from 'path';
|
||||
import LowdashAdapter from './LowDashAdapter.js';
|
||||
|
||||
const defaultData = {
|
||||
user: [
|
||||
//you probably want to change the default password ;)
|
||||
{
|
||||
id: nanoid(),
|
||||
lastLogin: Date.now(),
|
||||
username: 'admin',
|
||||
password: hasher.hash('admin'),
|
||||
isAdmin: true,
|
||||
},
|
||||
{
|
||||
id: nanoid(),
|
||||
lastLogin: Date.now(),
|
||||
username: 'demo',
|
||||
password: hasher.hash('demo'),
|
||||
isAdmin: true,
|
||||
},
|
||||
],
|
||||
user: [
|
||||
//you probably want to change the default password ;)
|
||||
{
|
||||
id: nanoid(),
|
||||
lastLogin: Date.now(),
|
||||
username: 'admin',
|
||||
password: hasher.hash('admin'),
|
||||
isAdmin: true,
|
||||
},
|
||||
{
|
||||
id: nanoid(),
|
||||
lastLogin: Date.now(),
|
||||
username: 'demo',
|
||||
password: hasher.hash('demo'),
|
||||
isAdmin: true,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const file = path.join(getDirName(), '../', 'db/users.json');
|
||||
@@ -86,34 +86,38 @@ export const removeUser = (userId) => {
|
||||
db.chain
|
||||
.set(
|
||||
'user',
|
||||
user.filter((u) => u.id !== userId)
|
||||
user.filter((u) => u.id !== userId),
|
||||
)
|
||||
.value();
|
||||
db.write();
|
||||
};
|
||||
|
||||
export const handleDemoUser = () => {
|
||||
if(!config.demoMode){
|
||||
const user = db.chain.get('user').value();
|
||||
db.chain.get('user').value();
|
||||
db.chain.set('user', user.filter((u) => u.username !== 'demo')).value();
|
||||
db.write();
|
||||
}else {
|
||||
const demoUser = db.chain
|
||||
.get('user')
|
||||
.filter((u) => u.username === 'demo')
|
||||
.value();
|
||||
if (demoUser == null || demoUser.length === 0) {
|
||||
db.chain.get('user')
|
||||
.value()
|
||||
.push({
|
||||
id: nanoid(),
|
||||
username: 'demo',
|
||||
password: hasher.hash('demo'),
|
||||
isAdmin: true,
|
||||
});
|
||||
db.write();
|
||||
}
|
||||
if (!config.demoMode) {
|
||||
const user = db.chain.get('user').value();
|
||||
db.chain
|
||||
.set(
|
||||
'user',
|
||||
user.filter((u) => u.username !== 'demo'),
|
||||
)
|
||||
.value();
|
||||
db.write();
|
||||
} else {
|
||||
const demoUser = db.chain
|
||||
.get('user')
|
||||
.filter((u) => u.username === 'demo')
|
||||
.value();
|
||||
if (demoUser == null || demoUser.length === 0) {
|
||||
db.chain
|
||||
.get('user')
|
||||
.value()
|
||||
.push({
|
||||
id: nanoid(),
|
||||
username: 'demo',
|
||||
password: hasher.hash('demo'),
|
||||
isAdmin: true,
|
||||
});
|
||||
db.write();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -9,12 +9,9 @@ function inDevMode(){
|
||||
}
|
||||
|
||||
function isOneOf(word, arr) {
|
||||
if (arr == null || arr.length === 0) {
|
||||
return false;
|
||||
}
|
||||
const expression = String.raw`\b(${arr.join('|')})\b`;
|
||||
const blacklist = new RegExp(expression, 'ig');
|
||||
return blacklist.test(word);
|
||||
if (!arr || arr.length === 0 || word == null) return false;
|
||||
const lowerWord = word.toLowerCase();
|
||||
return arr.some(item => lowerWord.indexOf(item.toLowerCase()) !== -1);
|
||||
}
|
||||
|
||||
function nullOrEmpty(val) {
|
||||
|
||||
54
package.json
54
package.json
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "fredy",
|
||||
"version": "11.0.0",
|
||||
"version": "11.2.4",
|
||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||
"scripts": {
|
||||
"start": "node prod.js",
|
||||
@@ -50,30 +50,30 @@
|
||||
"Firefox ESR"
|
||||
],
|
||||
"dependencies": {
|
||||
"@douyinfe/semi-ui": "2.71.3",
|
||||
"@douyinfe/semi-ui": "2.80.0",
|
||||
"@rematch/core": "2.2.0",
|
||||
"@rematch/loading": "2.1.2",
|
||||
"@sendgrid/mail": "8.1.4",
|
||||
"@vitejs/plugin-react": "4.3.4",
|
||||
"better-sqlite3": "^11.7.0",
|
||||
"body-parser": "1.20.3",
|
||||
"cheerio": "^1.0.0",
|
||||
"@sendgrid/mail": "8.1.5",
|
||||
"@vitejs/plugin-react": "4.5.2",
|
||||
"better-sqlite3": "^11.10.0",
|
||||
"body-parser": "2.2.0",
|
||||
"cheerio": "^1.1.0",
|
||||
"cookie-session": "2.1.0",
|
||||
"handlebars": "4.7.8",
|
||||
"highcharts": "12.1.0",
|
||||
"highcharts-react-official": "3.2.1",
|
||||
"highcharts": "12.2.0",
|
||||
"highcharts-react-official": "3.2.2",
|
||||
"lodash": "4.17.21",
|
||||
"lowdb": "6.0.1",
|
||||
"markdown": "^0.5.0",
|
||||
"mixpanel": "^0.18.0",
|
||||
"nanoid": "5.0.9",
|
||||
"mixpanel": "^0.18.1",
|
||||
"nanoid": "5.1.5",
|
||||
"node-fetch": "3.3.2",
|
||||
"node-mailjet": "6.0.6",
|
||||
"node-mailjet": "6.0.8",
|
||||
"package-up": "^5.0.0",
|
||||
"puppeteer": "^23.10.4",
|
||||
"puppeteer": "^24.10.1",
|
||||
"puppeteer-extra": "^3.3.6",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||
"query-string": "9.1.1",
|
||||
"query-string": "9.2.0",
|
||||
"react": "18.3.1",
|
||||
"react-dom": "18.3.1",
|
||||
"react-redux": "9.2.0",
|
||||
@@ -81,28 +81,28 @@
|
||||
"react-router-dom": "5.3.0",
|
||||
"redux": "5.0.1",
|
||||
"redux-thunk": "3.1.0",
|
||||
"restana": "4.9.9",
|
||||
"serve-static": "1.16.2",
|
||||
"restana": "5.0.0",
|
||||
"serve-static": "2.2.0",
|
||||
"slack": "11.0.2",
|
||||
"string-similarity": "^4.0.4",
|
||||
"vite": "5.4.11"
|
||||
"vite": "6.3.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/core": "7.26.0",
|
||||
"@babel/eslint-parser": "7.25.9",
|
||||
"@babel/preset-env": "7.26.0",
|
||||
"@babel/preset-react": "7.26.3",
|
||||
"chai": "5.1.2",
|
||||
"@babel/core": "7.27.3",
|
||||
"@babel/eslint-parser": "7.27.5",
|
||||
"@babel/preset-env": "7.27.2",
|
||||
"@babel/preset-react": "7.27.1",
|
||||
"chai": "5.2.0",
|
||||
"eslint": "8.56.0",
|
||||
"eslint-config-prettier": "8.8.0",
|
||||
"eslint-plugin-react": "7.37.2",
|
||||
"esmock": "2.6.9",
|
||||
"eslint-plugin-react": "7.37.5",
|
||||
"esmock": "2.7.0",
|
||||
"history": "5.3.0",
|
||||
"husky": "9.1.7",
|
||||
"less": "4.2.1",
|
||||
"lint-staged": "15.2.11",
|
||||
"less": "4.3.0",
|
||||
"lint-staged": "15.5.2",
|
||||
"mocha": "10.8.2",
|
||||
"prettier": "3.4.2",
|
||||
"prettier": "3.5.3",
|
||||
"redux-logger": "3.0.6"
|
||||
}
|
||||
}
|
||||
|
||||
80
reverse-engineered-immoscout.md
Normal file
80
reverse-engineered-immoscout.md
Normal file
@@ -0,0 +1,80 @@
|
||||
# Reverse Engineered Immoscout24's Mobile API
|
||||
|
||||
## What is Immoscout24?
|
||||
|
||||
Immobilienscout24 (commonly known as Immoscout) is one of Germany's largest and most popular real estate platforms. It serves as a marketplace where property owners, real estate agents, and property management companies can list apartments, houses, and commercial properties for rent or sale. For people searching for a new home in Germany, Immoscout is often one of the first platforms they check.
|
||||
|
||||
The platform allows users to filter properties based on various criteria such as location, price, size, number of rooms, and additional features like balconies or built-in kitchens. Immoscout24 is available both as a website and as a mobile application, making it accessible across different devices.
|
||||
|
||||
## Why do we do this?
|
||||
|
||||
Crawling Immoscout24 the oldschool way has become virtually impossible due to their extensive bot detection mechanisms. Immoscout has implemented various anti-scraping measures to prevent automated access to their platform. These measures can include:
|
||||
|
||||
1. IP-based rate limiting
|
||||
2. Browser fingerprinting
|
||||
3. CAPTCHA challenges
|
||||
4. Behavior analysis to detect non-human patterns
|
||||
5. JavaScript-based challenges that must be solved before content is displayed
|
||||
|
||||
These protections make it extremely difficult to reliably extract data from Immoscout using conventional web scraping approaches. Even with techniques like rotating proxies or mimicking human behavior, the bot detection systems have become increasingly effective at identifying and blocking automated access attempts.
|
||||
|
||||
## Mobile API Reverse Engineering
|
||||
|
||||
To work around these limitations, we are in the progress of reverse-engineering Immoscout24's mobile API. The mobile applications need to communicate with Immoscout's servers to retrieve listing data, and these API endpoints typically have fewer anti-bot protections than the web interface.
|
||||
|
||||
The mobile API provides several key endpoints:
|
||||
- Search total endpoint: Returns the total number of listings for a given query
|
||||
- Search list endpoint: Retrieves the actual listings with details
|
||||
- Expose endpoint: Returns detailed information about a specific listing
|
||||
|
||||
Challenges:
|
||||
1. Identifying the necessary endpoints and parameters required to perform searches
|
||||
2. Mapping the mobile API parameters to their web counterparts to maintain compatibility with existing search URLs
|
||||
|
||||
|
||||
## Api Specs
|
||||
|
||||
#### Search for Listings
|
||||
|
||||
`GET /search/total?{search parameters}`
|
||||
*Returns the total number of listings for the given query.*
|
||||
```
|
||||
curl -H "User-Agent: ImmoScout24_1410_30_._" \
|
||||
-H "Accept: application/json" \
|
||||
"https://api.mobile.immobilienscout24.de/search/total?searchType=region&realestatetype=apartmentrent&pricetype=calculatedtotalrent&geocodes=%2Fde%2Fberlin%2Fberlin"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### Retrieve the listings
|
||||
`POST /search/list?{search parameters}`
|
||||
*The body is json encoded and contains data specifying additional results (advertisements) to return. The format is as follows (It is not necessary to provide data for the specified keys.)*
|
||||
```
|
||||
{
|
||||
"supportedResultListTypes": [],
|
||||
"userData": {}
|
||||
}
|
||||
```
|
||||
```
|
||||
curl -X POST 'https://api.mobile.immobilienscout24.de/search/list?pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region&geocodes=%2Fde%2Fberlin%2Fberlin&pagenumber=1' \
|
||||
-H "Connection: keep-alive" \
|
||||
-H "User-Agent: ImmoScout24_1410_30_._" \
|
||||
-H "Accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"supportedResultListType":[],"userData":{}}'
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
#### Get details of listings
|
||||
`GET /expose/{id}`
|
||||
The response contains additional details not included in the listing response.
|
||||
```
|
||||
curl -H "User-Agent: ImmoScout24_1410_30_._" \
|
||||
-H "Accept: application/json" \
|
||||
"https://api.mobile.immobilienscout24.de/expose/158382494"
|
||||
```
|
||||
|
||||
|
||||
## Parameters
|
||||
The parameters between web and mobile are very different which is why we have to translate them. Please see `immoscout-web-translator.js`.
|
||||
@@ -1,40 +1,38 @@
|
||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
||||
import { get } from '../mocks/mockNotification.js';
|
||||
import { mockFredy, providerConfig } from '../utils.js';
|
||||
import { expect } from 'chai';
|
||||
import {get} from '../mocks/mockNotification.js';
|
||||
import {mockFredy, providerConfig} from '../utils.js';
|
||||
import {expect} from 'chai';
|
||||
import * as provider from '../../lib/provider/immonet.js';
|
||||
|
||||
describe('#immonet testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
provider.init(providerConfig.immonet, [], []);
|
||||
it('should test immonet provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
return await new Promise((resolve) => {
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'immonet', similarityCache);
|
||||
fredy.execute().then((listing) => {
|
||||
expect(listing).to.be.a('array');
|
||||
const notificationObj = get();
|
||||
expect(notificationObj).to.be.a('object');
|
||||
expect(notificationObj.serviceName).to.equal('immonet');
|
||||
notificationObj.payload.forEach((notify) => {
|
||||
/** check the actual structure **/
|
||||
expect(notify.id).to.be.a('string');
|
||||
expect(notify.price).to.be.a('string');
|
||||
expect(notify.size).to.be.a('string');
|
||||
expect(notify.title).to.be.a('string');
|
||||
expect(notify.link).to.be.a('string');
|
||||
expect(notify.address).to.be.a('string');
|
||||
|
||||
/** check the values if possible **/
|
||||
expect(notify.price).that.does.include('€');
|
||||
expect(notify.size).that.does.include('m²');
|
||||
expect(notify.title).to.be.not.empty;
|
||||
expect(notify.address).to.be.not.empty;
|
||||
});
|
||||
resolve();
|
||||
});
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
provider.init(providerConfig.immonet, [], []);
|
||||
it('should test immonet provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
return await new Promise((resolve) => {
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'immonet', similarityCache);
|
||||
fredy.execute().then((listing) => {
|
||||
expect(listing).to.be.a('array');
|
||||
const notificationObj = get();
|
||||
expect(notificationObj).to.be.a('object');
|
||||
expect(notificationObj.serviceName).to.equal('immonet');
|
||||
notificationObj.payload.forEach((notify) => {
|
||||
/** check the actual structure **/
|
||||
expect(notify.id).to.be.a('string');
|
||||
expect(notify.price).to.be.a('string');
|
||||
expect(notify.size).to.be.a('string');
|
||||
expect(notify.title).to.be.a('string');
|
||||
expect(notify.link).to.be.a('string');
|
||||
expect(notify.address).to.be.a('string');
|
||||
|
||||
expect(notify.size).that.does.include('m²');
|
||||
expect(notify.title).to.be.not.empty;
|
||||
expect(notify.address).to.be.not.empty;
|
||||
});
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,43 +1,39 @@
|
||||
import { expect } from 'chai';
|
||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
||||
//import {get} from '../mocks/mockNotification.js';
|
||||
import {/*mockFredy, */providerConfig} from '../utils.js';
|
||||
//import {expect} from 'chai';
|
||||
import { mockFredy, providerConfig } from '../utils.js';
|
||||
import { get } from '../mocks/mockNotification.js';
|
||||
import * as provider from '../../lib/provider/immoscout.js';
|
||||
|
||||
describe('#immoscout testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
provider.init(providerConfig.immoscout, [], []);
|
||||
it('should test immoscout provider', async () => {
|
||||
//const Fredy = await mockFredy();
|
||||
return await new Promise((resolve) => {
|
||||
/* eslint-disable no-console */
|
||||
console.info('Skipping Immoscout test for now until we figured out how to surpass bot detection.');
|
||||
/* eslint-enable no-console */
|
||||
resolve();
|
||||
/*
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'immoscout', similarityCache);
|
||||
fredy.execute().then((listing) => {
|
||||
expect(listing).to.be.a('array');
|
||||
const notificationObj = get();
|
||||
expect(notificationObj).to.be.a('object');
|
||||
expect(notificationObj.serviceName).to.equal('immoscout');
|
||||
notificationObj.payload.forEach((notify) => {
|
||||
expect(notify.id).to.be.a('number');
|
||||
expect(notify.price).to.be.a('string');
|
||||
expect(notify.size).to.be.a('string');
|
||||
expect(notify.title).to.be.a('string');
|
||||
expect(notify.link).to.be.a('string');
|
||||
expect(notify.address).to.be.a('string');
|
||||
expect(notify.price).that.does.include('€');
|
||||
expect(notify.size).that.does.include('m²');
|
||||
expect(notify.title).to.be.not.empty;
|
||||
expect(notify.link).that.does.include('https://www.immobilienscout24.de');
|
||||
expect(notify.address).to.be.not.empty;
|
||||
});
|
||||
resolve();
|
||||
});*/
|
||||
describe('#immoscout provider testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
|
||||
provider.init(providerConfig.immoscout, [], []);
|
||||
it('should test immoscout provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
return await new Promise((resolve) => {
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, '', similarityCache);
|
||||
fredy.execute().then((listings) => {
|
||||
expect(listings).to.be.a('array');
|
||||
const notificationObj = get();
|
||||
expect(notificationObj).to.be.a('object');
|
||||
expect(notificationObj.serviceName).to.equal('immoscout');
|
||||
notificationObj.payload.forEach((notify) => {
|
||||
/** check the actual structure **/
|
||||
expect(notify.id).to.be.a('string');
|
||||
expect(notify.price).to.be.a('string');
|
||||
expect(notify.size).to.be.a('string');
|
||||
expect(notify.title).to.be.a('string');
|
||||
expect(notify.link).to.be.a('string');
|
||||
expect(notify.address).to.be.a('string');
|
||||
/** check the values if possible **/
|
||||
expect(notify.size).to.be.not.empty;
|
||||
expect(notify.title).to.be.not.empty;
|
||||
expect(notify.link).that.does.include('https://www.immobilienscout24.de/');
|
||||
});
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
"enabled": true
|
||||
},
|
||||
"wgGesucht": {
|
||||
"url": "https://www.wg-gesucht.de/wg-zimmer-in-Duesseldorf.30.0.1.0.html?offer_filter=1&noDeact=1&city_id=30&category=0&rent_type=0&rMax=5000",
|
||||
"url": "https://www.wg-gesucht.de/wg-zimmer-in-Duesseldorf.30.0.1.0.html",
|
||||
"enabled": true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,14 +20,14 @@
|
||||
"shouldBecome": "https://www.immonet.de/immobiliensuche/sel.do?sortby=19&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
|
||||
"id": "immonet"
|
||||
},
|
||||
{
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten",
|
||||
"shouldBecome": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?sorting=2",
|
||||
"id": "immoscout"
|
||||
},
|
||||
{
|
||||
"url": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/",
|
||||
"shouldBecome": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/?Sortierung=Id&Richtung=DESC",
|
||||
"id": "neubauKompass"
|
||||
},
|
||||
{
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?numberofrooms=1.5-&price=1.0-1000000.0&livingspace=1.0-10000.0&pricetype=rentpermonth&enteredFrom=result_list",
|
||||
"shouldBecome": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?numberofrooms=1.5-&price=1.0-1000000.0&livingspace=1.0-10000.0&pricetype=rentpermonth&enteredFrom=result_list&sorting=-firstactivation",
|
||||
"id": "immoscout"
|
||||
}
|
||||
]
|
||||
76
test/services/immoscout/immoscout-web-translater.test.js
Normal file
76
test/services/immoscout/immoscout-web-translater.test.js
Normal file
@@ -0,0 +1,76 @@
|
||||
import { convertWebToMobile } from '../../../lib/services/immoscout/immoscout-web-translater.js';
|
||||
import { expect } from 'chai';
|
||||
import { readFile } from 'fs/promises';
|
||||
|
||||
export const testData = JSON.parse(await readFile(new URL('./testdata.json', import.meta.url)));
|
||||
|
||||
describe('#immoscout-mobile URL conversion', () => {
|
||||
// Test URL conversion
|
||||
it('should convert a full web URL to mobile URL', () => {
|
||||
const webUrl =
|
||||
'https://www.immobilienscout24.de/Suche/de/berlin/berlin/wohnung-mieten?heatingtypes=central,selfcontainedcentral&haspromotion=false&numberofrooms=2.0-5.0&livingspace=10.0-25.0&energyefficiencyclasses=a,b,c,d,e,f,g,h,a_plus&exclusioncriteria=projectlisting,swapflat&equipment=parking,cellar,builtinkitchen,lift,garden,guesttoilet,balcony&petsallowedtypes=no,yes,negotiable&price=10.0-100.0&constructionyear=1920-2026&apartmenttypes=halfbasement,penthouse,other,loft,groundfloor,terracedflat,raisedgroundfloor,roofstorey,apartment,maisonette&pricetype=calculatedtotalrent&floor=2-7&enteredFrom=result_list';
|
||||
const expectedMobileUrl =
|
||||
'https://api.mobile.immobilienscout24.de/search/list?apartmenttypes=halfbasement,penthouse,other,loft,groundfloor,terracedflat,raisedgroundfloor,roofstorey,apartment,maisonette&constructionyear=1920-2026&energyefficiencyclasses=a,b,c,d,e,f,g,h,a_plus&equipment=parking,cellar,builtInKitchen,lift,garden,guestToilet,balcony&exclusioncriteria=projectlisting,swapflat&floor=2-7&geocodes=%2Fde%2Fberlin%2Fberlin&haspromotion=false&heatingtypes=central,selfcontainedcentral&livingspace=10.0-25.0&numberofrooms=2.0-5.0&petsallowedtypes=no,yes,negotiable&price=10.0-100.0&pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region';
|
||||
|
||||
const actualMobileUrl = convertWebToMobile(webUrl);
|
||||
expect(actualMobileUrl).to.equal(expectedMobileUrl);
|
||||
});
|
||||
|
||||
// Test URL conversion of web-only SEO path
|
||||
it('should convert a SEO web path to the correct query params', () => {
|
||||
const webUrl = 'https://www.immobilienscout24.de/Suche/de/berlin/berlin/wohnung-mit-balkon-mieten?equipment=garden';
|
||||
|
||||
const converted = convertWebToMobile(webUrl);
|
||||
const queryParams = new URL(converted).searchParams;
|
||||
expect(queryParams.get('equipment').split(',')).to.include.members(['garden', 'balcony']);
|
||||
});
|
||||
|
||||
// Test URL conversion with unsupported query parameters
|
||||
it('should remove unsupported query parameters', () => {
|
||||
const webUrl = 'https://www.immobilienscout24.de/Suche/de/berlin/berlin/wohnung-mieten?minimuminternetspeed=100000';
|
||||
const converted = convertWebToMobile(webUrl);
|
||||
expect(converted).that.does.not.include('minimuminternetspeed');
|
||||
});
|
||||
|
||||
// Test URL conversion with invalid URL
|
||||
it('should throw an error for invalid URL', () => {
|
||||
const invalidUrl = 'invalid-url';
|
||||
|
||||
expect(() => convertWebToMobile(invalidUrl)).to.throw('Invalid URL: invalid-url');
|
||||
});
|
||||
|
||||
// Test URL conversion with unexpected path format
|
||||
it('should throw an error for unexpected path format', () => {
|
||||
const webUrl = 'https://www.immobilienscout24.de/invalid/path/format';
|
||||
expect(() => convertWebToMobile(webUrl)).to.throw('Unexpected path format: /invalid/path/format');
|
||||
});
|
||||
|
||||
it('shouldFindResultsForEveryTestData', async () => {
|
||||
for (const webUrlKey of Object.keys(testData)) {
|
||||
const url = convertWebToMobile(testData[webUrlKey].url);
|
||||
const type = testData[webUrlKey].type;
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'User-Agent': 'ImmoScout24_1410_30_._',
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
supportedResultListTypes: [],
|
||||
userData: {},
|
||||
}),
|
||||
});
|
||||
if (!response.ok) {
|
||||
console.error('Error fetching data from ImmoScout Mobile API:', response.statusText);
|
||||
}
|
||||
|
||||
expect([null, true]).to.include(response.ok);
|
||||
const responseBody = await response.json();
|
||||
expect(responseBody.totalResults).to.be.greaterThan(0);
|
||||
expect(responseBody.totalResults).to.be.greaterThan(0);
|
||||
expect(responseBody.resultListItems.length).to.greaterThan(0);
|
||||
expect(responseBody.resultListItems[0].item.realEstateType).to.equal(type);
|
||||
}
|
||||
});
|
||||
});
|
||||
22
test/services/immoscout/testdata.json
Normal file
22
test/services/immoscout/testdata.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"buyHouseInParts": {
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/haus-kaufen?numberofrooms=1.0-10000.0&price=1.0-1000000.0E7&livingspace=1.0-10000.0&geocodes=1276010037,1276010014,1276010012&enteredFrom=result_list",
|
||||
"type": "housebuy"
|
||||
},
|
||||
"buyHouse": {
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/haus-kaufen?numberofrooms=1.0-10000.0&price=1.0-1000000.0E7&livingspace=1.0-10000.0&enteredFrom=result_list",
|
||||
"type": "housebuy"
|
||||
},
|
||||
"rentApartment": {
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?numberofrooms=1.5-&price=1.0-1000000.0&livingspace=1.0-10000.0&pricetype=rentpermonth&enteredFrom=result_list",
|
||||
"type": "apartmentrent"
|
||||
},
|
||||
"buyApartment": {
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-kaufen?numberofrooms=1.5-10000.0&price=1.0-1000000.0&livingspace=1.0-10000.0&enteredFrom=result_list",
|
||||
"type": "apartmentbuy"
|
||||
},
|
||||
"rentHouse": {
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/haus-mieten?enteredFrom=one_step_search",
|
||||
"type": "houserent"
|
||||
}
|
||||
}
|
||||
@@ -34,6 +34,7 @@ describe('similarityCheck', () => {
|
||||
check.setCacheEntry(
|
||||
'where |X| and |Y| are the cardinalities of the two sets (i.e. the number of elements in each set). The Sørensen index equals twice the number of elements common to both sets divided by the sum of the number of elements in each set.',
|
||||
);
|
||||
expect(check.hasSimilarEntries('unrelated text')).to.be.false;
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -94,7 +94,7 @@ export default function JobMutator() {
|
||||
<form>
|
||||
<SegmentPart name="Name">
|
||||
<Input
|
||||
autofocus
|
||||
autoFocus
|
||||
type="text"
|
||||
maxLength={40}
|
||||
placeholder="Name"
|
||||
|
||||
@@ -101,10 +101,7 @@ export default function ProviderMutator({ onVisibilityChanged, visible = false,
|
||||
description={
|
||||
<div>
|
||||
<p>
|
||||
Immoscout will not work at the moment due to advanced bot detection. I'm currently working on a fix.
|
||||
</p>
|
||||
<p>
|
||||
Until a fix has been released, Immoscout won't yield any results.
|
||||
Currently, our Immoscout implementation does not drawing shapes on a map. Use a radius instead.
|
||||
</p>
|
||||
</div>
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user