mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b113621202 | ||
|
|
030e0ca169 | ||
|
|
3aae81ca19 | ||
|
|
f1effe941f | ||
|
|
cd3631f910 | ||
|
|
8f490f2426 | ||
|
|
48e2ca942f | ||
|
|
b9e4bca244 | ||
|
|
a138dafc31 |
20
.github/workflows/test.yml
vendored
20
.github/workflows/test.yml
vendored
@@ -1,23 +1,23 @@
|
||||
name: Test
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
branches: [master]
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
branches: [master]
|
||||
schedule:
|
||||
- cron: '0 12 * * *'
|
||||
- cron: '0 12 * * *'
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v2.5.1
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'yarn'
|
||||
|
||||
- run: yarn install
|
||||
- run: yarn run test
|
||||
- run: yarn test
|
||||
|
||||
@@ -82,7 +82,7 @@ yarn run test
|
||||

|
||||
|
||||
### Immoscout
|
||||
Immoscout has implemented advanced bot detection. I’m actively working on bypassing these measures, but until then, selecting Immoscout as a provider will not return any results. I apologize for the inconvenience. 😉
|
||||
Immoscout has implemented advanced bot detection. In order to work around this, we are using a reversed engineered version of their mobile api. See See [Immoscout Reverse Engineering Documentation](https://github.com/orangecoding/fredy/blob/master/reverse-engineered-immoscout.md)
|
||||
|
||||
# Analytics
|
||||
Fredy is completely free (and will always remain free). However, it would be a huge help if you’d allow me to collect some analytical data.
|
||||
|
||||
@@ -26,7 +26,7 @@ class FredyRuntime {
|
||||
//modify the url to make sure search order is correctly set
|
||||
Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam))
|
||||
//scraping the site and try finding new listings
|
||||
.then(this._getListings.bind(this))
|
||||
.then(this._providerConfig.getListings?.bind(this) ?? this._getListings.bind(this))
|
||||
//bring them in a proper form (dictated by the provider)
|
||||
.then(this._normalize.bind(this))
|
||||
//filter listings with stuff tagged by the blacklist of the provider
|
||||
|
||||
@@ -26,7 +26,7 @@ const config = {
|
||||
url: null,
|
||||
crawlContainer: 'div[data-testid="serp-core-classified-card-testid"]',
|
||||
sortByDateParam: 'sortby=19',
|
||||
waitForSelector: 'div[data-testid="serp-resultscount-testid"]',
|
||||
waitForSelector: 'div[data-testid="serp-gridcontainer-testid"]',
|
||||
crawlFields: {
|
||||
id: 'button@title |trim', // immonet is a piece of sh*t. See comment above
|
||||
title: 'button@title |trim',
|
||||
|
||||
@@ -1,37 +1,109 @@
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
/**
|
||||
* ImmoScout provider using the mobile API to retrieve listings.
|
||||
*
|
||||
* The mobile API provides the following endpoints:
|
||||
* - GET /search/total?{search parameters}: Returns the total number of listings for the given query
|
||||
* Example: `curl -H "User-Agent: ImmoScout24_1410_30_._" https://api.mobile.immobilienscout24.de/search/total?searchType=region&realestatetype=apartmentrent&pricetype=calculatedtotalrent&geocodes=%2Fde%2Fberlin%2Fberlin `
|
||||
*
|
||||
* - POST /search/list?{search parameters}: Actually retrieves the listings. Body is json encoded and contains
|
||||
* data specifying additional results (advertisements) to return. The format is as follows:
|
||||
* ```
|
||||
* {
|
||||
* "supportedResultListTypes": [],
|
||||
* "userData": {}
|
||||
* }
|
||||
* ```
|
||||
* It is not necessary to provide data for the specified keys.
|
||||
*
|
||||
* Example: `curl -X POST 'https://api.mobile.immobilienscout24.de/search/list?pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region&geocodes=%2Fde%2Fberlin%2Fberlin&pagenumber=1' -H "Connection: keep-alive" -H "User-Agent: ImmoScout24_1410_30_._" -H "Accept: application/json" -H "Content-Type: application/json" -d '{"supportedResultListType": [], "userData": {}}'`
|
||||
|
||||
* - GET /expose/{id} - Returns the details of a listing. The response contains additional details not included in the
|
||||
* listing response.
|
||||
*
|
||||
* Example: `curl -H "User-Agent: ImmoScout24_1410_30_._" "https://api.mobile.immobilienscout24.de/expose/158382494"`
|
||||
*
|
||||
*
|
||||
* It is necessary to set the correct User Agent (see `getListings`) in the request header.
|
||||
*
|
||||
* Note that the mobile API is not publicly documented. I've reverse-engineered
|
||||
* it by intercepting traffic from an android emulator running the immoscout app.
|
||||
* Moreover, the search parameters differ slightly from the web API. I've mapped them
|
||||
* to the web API parameters by comparing a search request with all parameters set between
|
||||
* the web and mobile API. The mobile API actually seems to be a superset of the web API,
|
||||
* but I have decided not to include new parameters as I wanted to keep the existing UX (i.e.,
|
||||
* users only have to provide a link to an existing search).
|
||||
*
|
||||
*/
|
||||
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { convertWebToMobile } from '../services/immoscout/immoscout-web-translater.js';
|
||||
let appliedBlackList = [];
|
||||
|
||||
async function getListings(url) {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'User-Agent': 'ImmoScout24_1410_30_._',
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
supportedResultListTypes: [],
|
||||
userData: {},
|
||||
}),
|
||||
});
|
||||
if (!response.ok) {
|
||||
console.error('Error fetching data from ImmoScout Mobile API:', response.statusText);
|
||||
return [];
|
||||
}
|
||||
|
||||
const responseBody = await response.json();
|
||||
return responseBody.resultListItems
|
||||
.filter((item) => item.type === 'EXPOSE_RESULT')
|
||||
.map((expose) => {
|
||||
const item = expose.item;
|
||||
const [price, size] = item.attributes;
|
||||
return {
|
||||
id: item.id,
|
||||
price: price?.value,
|
||||
size: size?.value,
|
||||
title: item.title,
|
||||
link: `${metaInformation.baseUrl}expose/${item.id}`,
|
||||
address: item.address?.line,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function nullOrEmpty(val) {
|
||||
return val == null || val.length === 0;
|
||||
}
|
||||
function normalize(o) {
|
||||
const title = nullOrEmpty(o.title) ? 'NO TITLE FOUND' : o.title.replace('NEU', '');
|
||||
const address = nullOrEmpty(o.address) ? 'NO ADDRESS FOUND' : (o.address || '').replace(/\(.*\),.*$/, '').trim();
|
||||
const link = nullOrEmpty(o.link) ? 'NO LINK' : `https://www.immobilienscout24.de${o.link.substring(o.link.indexOf('/expose'))}`;
|
||||
const id = buildHash(o.id, o.price);
|
||||
return Object.assign(o, { id, title, address, link });
|
||||
return Object.assign(o, { id, title, address });
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
return !utils.isOneOf(o.title, appliedBlackList);
|
||||
}
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#resultListItems li.result-list__listing',
|
||||
sortByDateParam: 'sorting=2',
|
||||
waitForSelector: 'body',
|
||||
crawlFields: {
|
||||
id: '.result-list-entry@data-obid | int',
|
||||
price: '.result-list-entry .result-list-entry__criteria .grid-item:first-child dd | removeNewline | trim',
|
||||
size: '.result-list-entry .result-list-entry__criteria .grid-item:nth-child(2) dd | removeNewline | trim',
|
||||
title: '.result-list-entry .result-list-entry__brand-title-container h2 | removeNewline | trim',
|
||||
link: '.result-list-entry .result-list-entry__brand-title-container@href',
|
||||
address: '.result-list-entry .result-list-entry__map-link',
|
||||
id: 'id',
|
||||
title: 'title',
|
||||
price: 'price',
|
||||
size: 'size',
|
||||
link: 'link',
|
||||
address: 'address',
|
||||
},
|
||||
// Not required - used by filter to remove and listings that failed to parse
|
||||
sortByDateParam: 'sorting=-firstactivation',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
getListings: getListings,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
config.url = convertWebToMobile(sourceConfig.url);
|
||||
appliedBlackList = blacklist || [];
|
||||
};
|
||||
export const metaInformation = {
|
||||
@@ -39,4 +111,5 @@ export const metaInformation = {
|
||||
baseUrl: 'https://www.immobilienscout24.de/',
|
||||
id: 'immoscout',
|
||||
};
|
||||
|
||||
export { config };
|
||||
|
||||
@@ -1,48 +1,48 @@
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
const size = o.size || 'N/A m²';
|
||||
const price = (o.price || '--- €').replace('Preis auf Anfrage', '--- €');
|
||||
const title = o.title || 'No title available';
|
||||
const immoId = o.id.substring(o.id.indexOf('-') + 1, o.id.length);
|
||||
const link = `https://immo.swp.de/immobilien/${immoId}`;
|
||||
const description = o.description;
|
||||
const id = buildHash(immoId, price);
|
||||
return Object.assign(o, {id, price, size, title, link, description});
|
||||
const size = o.size || 'N/A m²';
|
||||
const price = (o.price || '--- €').replace('Preis auf Anfrage', '--- €');
|
||||
const title = o.title || 'No title available';
|
||||
const immoId = o.id.substring(o.id.indexOf('-') + 1, o.id.length);
|
||||
const link = `https://immo.swp.de/immobilien/${immoId}`;
|
||||
const description = o.description;
|
||||
const id = buildHash(immoId, price);
|
||||
return Object.assign(o, { id, price, size, title, link, description });
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.js-serp-item',
|
||||
sortByDateParam: 's=most_recently_updated_first',
|
||||
waitForSelector: 'body',
|
||||
crawlFields: {
|
||||
id: '.js-bookmark-btn@data-id',
|
||||
price: 'div.align-items-start div:first-child | trim',
|
||||
size: 'div.align-items-start div:nth-child(3) | trim',
|
||||
title: '.card-title h2 | trim',
|
||||
link: '.ci-search-result__link@href',
|
||||
description: '.js-show-more-item-sm | removeNewline | trim',
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
url: null,
|
||||
crawlContainer: '.js-serp-item',
|
||||
sortByDateParam: 's=most_recently_updated_first',
|
||||
waitForSelector: 'body',
|
||||
crawlFields: {
|
||||
id: '.js-bookmark-btn@data-id',
|
||||
price: 'div.align-items-start div:first-child | trim',
|
||||
size: 'div.align-items-start div:nth-child(3) | trim',
|
||||
title: '.js-item-title-link@title | trim',
|
||||
link: '.ci-search-result__link@href',
|
||||
description: '.js-show-more-item-sm | removeNewline | trim',
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
};
|
||||
export const metaInformation = {
|
||||
name: 'Immo Südwest Presse',
|
||||
baseUrl: 'https://immo.swp.de/',
|
||||
id: 'immoswp',
|
||||
name: 'Immo Südwest Presse',
|
||||
baseUrl: 'https://immo.swp.de/',
|
||||
id: 'immoswp',
|
||||
};
|
||||
export {config};
|
||||
export { config };
|
||||
|
||||
@@ -23,7 +23,7 @@ const config = {
|
||||
id: 'a@href',
|
||||
price: 'div[data-testid="cardmfe-price-testid"] | removeNewline | trim',
|
||||
size: 'div[data-testid="cardmfe-keyfacts-testid"] | removeNewline | trim',
|
||||
title: '.css-1cbj9xw',
|
||||
title: '.css-jv3zx6',
|
||||
link: 'a@href',
|
||||
address: 'div[data-testid="cardmfe-description-box-address"] | removeNewline | trim',
|
||||
},
|
||||
|
||||
157
lib/services/immoscout/immoscout-web-translater.js
Normal file
157
lib/services/immoscout/immoscout-web-translater.js
Normal file
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
Rent a flat
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?numberofrooms=1.0-10000.0&price=1.0-10000.0&livingspace=10.0-10000.0&pricetype=rentpermonth&enteredFrom=result_list
|
||||
*/
|
||||
|
||||
/*
|
||||
Rent a flat:
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?enteredFrom=one_step_search
|
||||
Mobile:
|
||||
https://api.mobile.immobilienscout24.de/search/list?numberofrooms=1.5-&searchId=d7c127d8-6630-49e8-a1dd-5ae04dad454d&sorting=standard&pagesize=20&livingspace=10-500&pagenumber=1&realestatetype=apartmentrent&priceType=calculatedtotalrent&price=1-10000&publishedafter=2025-05-14T09:11:54&channel=is24&searchType=region&geocodes=/de/nordrhein-westfalen/duesseldorf&features=adKeysAndStringValues,virtualTour,contactDetails,viareporting,nextgen,calculatedTotalRent,listingsInListFirstSummary,xxlListingType,quickfilters,grouping,projectsInAllRealestateTypes,fairPrice
|
||||
*/
|
||||
|
||||
/*
|
||||
Rent a house:
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/haus-mieten?enteredFrom=one_step_search
|
||||
Mobile:
|
||||
https://api.mobile.immobilienscout24.de/search/map/v3?publishedafter=2025-05-14T09:12:49&pagenumber=1&searchType=region&geocodes=/de/nordrhein-westfalen/duesseldorf&realEstateType=houserent&pagesize=300&features=disableNHBGrouping,nextGen,fairPrice,listingsInListFirstSummary,xxlListingType,contactDetails&sorting=standard
|
||||
*/
|
||||
|
||||
/*
|
||||
buy a flat
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-kaufen?numberofrooms=1.0-10000.0&price=1.0-10000.0&livingspace=1.0-10000.0&enteredFrom=result_list
|
||||
Mobile:
|
||||
https://api.mobile.immobilienscout24.de/search/map/v3?features=disableNHBGrouping,nextGen,fairPrice,listingsInListFirstSummary,xxlListingType,contactDetails&sorting=standard&realEstateType=apartmentbuy&pagesize=300&pagenumber=1&geocodes=/de/nordrhein-westfalen/duesseldorf&publishedafter=2025-05-14T09:14:43&searchType=region
|
||||
*/
|
||||
|
||||
/*
|
||||
Buy a house
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/haus-kaufen?numberofrooms=1.0-10000.0&price=1.0-10000.0E7&livingspace=1.0-10000.0&enteredFrom=result_list
|
||||
Mobile:
|
||||
https://api.mobile.immobilienscout24.de/search/map/v3?geocodes=/de/nordrhein-westfalen/duesseldorf&features=disableNHBGrouping,nextGen,fairPrice,listingsInListFirstSummary,xxlListingType,contactDetails&searchType=region&realEstateType=housebuy&pagenumber=1&pagesize=300&sorting=standard&publishedafter=2025-05-14T09:16:28
|
||||
*/
|
||||
|
||||
/*
|
||||
Buy a house only in parts of a city
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/haus-kaufen?numberofrooms=1.0-10000.0&price=1.0-10000.0E7&livingspace=1.0-10000.0&geocodes=1276010037,1276010014,1276010012&enteredFrom=result_list
|
||||
Mobile:
|
||||
https://api.mobile.immobilienscout24.de/search/list?pagesize=20&pagenumber=1&features=adKeysAndStringValues,virtualTour,contactDetails,viareporting,grouping,nextgen,listingsInListFirstSummary,xxlListingType,quickfilters,fairPrice&sorting=standard&channel=is24&geocodes=/de/nordrhein-westfalen/duesseldorf/stadtbezirk-1&searchType=region&realestatetype=housebuy&publishedafter=2025-05-14T09:17:23
|
||||
*/
|
||||
|
||||
/*
|
||||
Buy a house with radius
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/radius/haus-kaufen?centerofsearchaddress=D%C3%BCsseldorf%3B%3B%3B%3B%3B%3B&numberofrooms=1.0-10000.0&price=1.0-1.0E7&livingspace=1.0-10000.0&geocoordinates=51.22496%3B6.77567%3B5.0&enteredFrom=result_list
|
||||
Mobile:
|
||||
https://api.mobile.immobilienscout24.de/home/search/total?pagenumber=1&pagesize=1&geocoordinates=51.224960;6.775670;4.0&sorting=standard&searchType=radius&features=adKeysAndStringValues,virtualTour,contactDetails,grouping,nextgen,listingsInListFirstSummary,xxlListingType,fairPrice&channel=is24&realestatetype=housebuy&publishedafter=2025-05-14T09:19:43
|
||||
*/
|
||||
|
||||
/*
|
||||
Buy a house with shape
|
||||
Web:
|
||||
https://www.immobilienscout24.de/Suche/shape/haus-kaufen?shape=eW1yd0hpZGloQGBJa1NfQWFsQG9Uc1ZvVmlDbHdAZ2BAaEBjfEB5U3NWY2NCa0RvWmpwQG1KYGdCeldqU3Z4QGBAbENvQmJWaGtA&numberofrooms=1.0-100000.0&price=1.0-1.0E7&livingspace=1.0-100000.0&enteredFrom=result_list#/
|
||||
Mobile:
|
||||
https://api.mobile.immobilienscout24.de/search/map/v3?features=disableNHBGrouping,nextGen,fairPrice,listingsInListFirstSummary,xxlListingType,contactDetails&publishedafter=2025-05-14T09:19:43&sorting=standard&pagesize=300&searchType=shape&realEstateType=housebuy&pagenumber=1&shape=%7D%7BjwHy%7Cqh@jCKdCgAvB_BdB%7DBzAaCjAqCfAqC~@uCt@iCh@eCZkCLyC?_EO%7DEa@%7DEa@iE_@%7BD%5DaDe@gDi@gDo@uCu@kBcB_AeDOiE?iDCgCMuBOkDCkG?yFRgD%60@cB%5C%7BA%60@eBx@aB%7C@kAbAy@rAe@bBUxCAhE?dFh@fGlAzGbBbHlBxGdB%60FrAhDz@xBh@nAf@l@RNNXkCkMJR~B%7CEnCpErCnDtClCvC~ApCh@rCJpC?
|
||||
*/
|
||||
import queryString from 'query-string';
|
||||
|
||||
const PARAM_NAME_MAP = {
|
||||
heatingtypes: 'heatingtypes',
|
||||
haspromotion: 'haspromotion',
|
||||
numberofrooms: 'numberofrooms',
|
||||
livingspace: 'livingspace',
|
||||
energyefficiencyclasses: 'energyefficiencyclasses',
|
||||
exclusioncriteria: 'exclusioncriteria',
|
||||
equipment: 'equipment',
|
||||
petsallowedtypes: 'petsallowedtypes',
|
||||
price: 'price',
|
||||
constructionyear: 'constructionyear',
|
||||
apartmenttypes: 'apartmenttypes',
|
||||
pricetype: 'pricetype',
|
||||
floor: 'floor',
|
||||
geocodes: 'geocodes',
|
||||
geocoordinates: 'geocoordinates',
|
||||
shape: 'shape',
|
||||
sorting: 'sorting',
|
||||
};
|
||||
|
||||
const EQUIPMENT_MAP = {
|
||||
parking: 'parking',
|
||||
cellar: 'cellar',
|
||||
builtinkitchen: 'builtInKitchen',
|
||||
lift: 'lift',
|
||||
garden: 'garden',
|
||||
guesttoilet: 'guestToilet',
|
||||
balcony: 'balcony',
|
||||
};
|
||||
|
||||
const REAL_ESTATE_TYPE = {
|
||||
'haus-mieten': 'houserent',
|
||||
'wohnung-mieten': 'apartmentrent',
|
||||
'wohnung-kaufen': 'apartmentbuy',
|
||||
'haus-kaufen': 'housebuy',
|
||||
};
|
||||
|
||||
export function convertWebToMobile(webUrl) {
|
||||
let url;
|
||||
try {
|
||||
url = new URL(webUrl);
|
||||
} catch {
|
||||
throw new Error(`Invalid URL: ${webUrl}`);
|
||||
}
|
||||
|
||||
const segments = url.pathname.split('/');
|
||||
if (segments[1] !== 'Suche') {
|
||||
throw new Error(`Unexpected path format: ${url.pathname}. We're expecting to see "/Suche" in the path.`);
|
||||
}
|
||||
|
||||
const realTypeKey = segments.at(-1);
|
||||
const realType = REAL_ESTATE_TYPE[realTypeKey];
|
||||
if (!realType) {
|
||||
throw new Error(`Real estate type not found: ${realTypeKey}`);
|
||||
}
|
||||
|
||||
if (segments.includes('shape')) {
|
||||
throw new Error('Shape is currently not supported using Immoscout');
|
||||
}
|
||||
|
||||
const { query: rawParams } = queryString.parseUrl(webUrl, { arrayFormat: 'comma' });
|
||||
const webParams = Object.fromEntries(
|
||||
Object.entries(rawParams).filter(([key]) => key !== 'enteredFrom' && PARAM_NAME_MAP[key]),
|
||||
);
|
||||
|
||||
const geocodes = `/${segments.slice(2, 5).join('/')}`;
|
||||
const isRadius = segments.includes('radius');
|
||||
const mobileParams = {
|
||||
searchType: isRadius ? 'radius' : 'region',
|
||||
realestatetype: realType,
|
||||
...(isRadius ? {} : { geocodes }),
|
||||
};
|
||||
|
||||
if (webParams.geocoordinates) {
|
||||
mobileParams.geocoordinates = webParams.geocoordinates;
|
||||
}
|
||||
|
||||
for (const [key, val] of Object.entries(webParams)) {
|
||||
if (key === 'equipment') {
|
||||
const items = [].concat(val).flatMap((v) => `${v}`.split(','));
|
||||
mobileParams[PARAM_NAME_MAP[key]] = items.map((item) => EQUIPMENT_MAP[item.toLowerCase()]).filter(Boolean);
|
||||
} else {
|
||||
mobileParams[PARAM_NAME_MAP[key]] = val;
|
||||
}
|
||||
}
|
||||
|
||||
const mobileQuery = queryString.stringify(mobileParams, {
|
||||
arrayFormat: 'comma',
|
||||
encode: true,
|
||||
skipEmptyString: true,
|
||||
});
|
||||
|
||||
return `https://api.mobile.immobilienscout24.de/search/list?${mobileQuery}`;
|
||||
}
|
||||
40
package.json
40
package.json
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "fredy",
|
||||
"version": "11.0.4",
|
||||
"version": "11.2.0",
|
||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||
"scripts": {
|
||||
"start": "node prod.js",
|
||||
@@ -50,30 +50,30 @@
|
||||
"Firefox ESR"
|
||||
],
|
||||
"dependencies": {
|
||||
"@douyinfe/semi-ui": "2.75.0",
|
||||
"@douyinfe/semi-ui": "2.79.0",
|
||||
"@rematch/core": "2.2.0",
|
||||
"@rematch/loading": "2.1.2",
|
||||
"@sendgrid/mail": "8.1.4",
|
||||
"@vitejs/plugin-react": "4.3.4",
|
||||
"better-sqlite3": "^11.8.1",
|
||||
"body-parser": "1.20.3",
|
||||
"@sendgrid/mail": "8.1.5",
|
||||
"@vitejs/plugin-react": "4.4.1",
|
||||
"better-sqlite3": "^11.10.0",
|
||||
"body-parser": "2.2.0",
|
||||
"cheerio": "^1.0.0",
|
||||
"cookie-session": "2.1.0",
|
||||
"handlebars": "4.7.8",
|
||||
"highcharts": "12.1.2",
|
||||
"highcharts-react-official": "3.2.1",
|
||||
"highcharts": "12.2.0",
|
||||
"highcharts-react-official": "3.2.2",
|
||||
"lodash": "4.17.21",
|
||||
"lowdb": "6.0.1",
|
||||
"markdown": "^0.5.0",
|
||||
"mixpanel": "^0.18.0",
|
||||
"nanoid": "5.1.2",
|
||||
"mixpanel": "^0.18.1",
|
||||
"nanoid": "5.1.5",
|
||||
"node-fetch": "3.3.2",
|
||||
"node-mailjet": "6.0.6",
|
||||
"node-mailjet": "6.0.8",
|
||||
"package-up": "^5.0.0",
|
||||
"puppeteer": "^24.2.1",
|
||||
"puppeteer": "^24.8.2",
|
||||
"puppeteer-extra": "^3.3.6",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||
"query-string": "9.1.1",
|
||||
"query-string": "9.1.2",
|
||||
"react": "18.3.1",
|
||||
"react-dom": "18.3.1",
|
||||
"react-redux": "9.2.0",
|
||||
@@ -88,10 +88,10 @@
|
||||
"vite": "5.4.11"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/core": "7.26.9",
|
||||
"@babel/eslint-parser": "7.26.8",
|
||||
"@babel/preset-env": "7.26.9",
|
||||
"@babel/preset-react": "7.26.3",
|
||||
"@babel/core": "7.27.1",
|
||||
"@babel/eslint-parser": "7.27.1",
|
||||
"@babel/preset-env": "7.27.2",
|
||||
"@babel/preset-react": "7.27.1",
|
||||
"chai": "5.2.0",
|
||||
"eslint": "8.56.0",
|
||||
"eslint-config-prettier": "8.8.0",
|
||||
@@ -99,10 +99,10 @@
|
||||
"esmock": "2.7.0",
|
||||
"history": "5.3.0",
|
||||
"husky": "9.1.7",
|
||||
"less": "4.2.2",
|
||||
"lint-staged": "15.4.3",
|
||||
"less": "4.3.0",
|
||||
"lint-staged": "15.5.2",
|
||||
"mocha": "10.8.2",
|
||||
"prettier": "3.5.2",
|
||||
"prettier": "3.5.3",
|
||||
"redux-logger": "3.0.6"
|
||||
}
|
||||
}
|
||||
|
||||
80
reverse-engineered-immoscout.md
Normal file
80
reverse-engineered-immoscout.md
Normal file
@@ -0,0 +1,80 @@
|
||||
# Reverse Engineered Immoscout24's Mobile API
|
||||
|
||||
## What is Immoscout24?
|
||||
|
||||
Immobilienscout24 (commonly known as Immoscout) is one of Germany's largest and most popular real estate platforms. It serves as a marketplace where property owners, real estate agents, and property management companies can list apartments, houses, and commercial properties for rent or sale. For people searching for a new home in Germany, Immoscout is often one of the first platforms they check.
|
||||
|
||||
The platform allows users to filter properties based on various criteria such as location, price, size, number of rooms, and additional features like balconies or built-in kitchens. Immoscout24 is available both as a website and as a mobile application, making it accessible across different devices.
|
||||
|
||||
## Why do we do this?
|
||||
|
||||
Crawling Immoscout24 the oldschool way has become virtually impossible due to their extensive bot detection mechanisms. Immoscout has implemented various anti-scraping measures to prevent automated access to their platform. These measures can include:
|
||||
|
||||
1. IP-based rate limiting
|
||||
2. Browser fingerprinting
|
||||
3. CAPTCHA challenges
|
||||
4. Behavior analysis to detect non-human patterns
|
||||
5. JavaScript-based challenges that must be solved before content is displayed
|
||||
|
||||
These protections make it extremely difficult to reliably extract data from Immoscout using conventional web scraping approaches. Even with techniques like rotating proxies or mimicking human behavior, the bot detection systems have become increasingly effective at identifying and blocking automated access attempts.
|
||||
|
||||
## Mobile API Reverse Engineering
|
||||
|
||||
To work around these limitations, we are in the progress of reverse-engineering Immoscout24's mobile API. The mobile applications need to communicate with Immoscout's servers to retrieve listing data, and these API endpoints typically have fewer anti-bot protections than the web interface.
|
||||
|
||||
The mobile API provides several key endpoints:
|
||||
- Search total endpoint: Returns the total number of listings for a given query
|
||||
- Search list endpoint: Retrieves the actual listings with details
|
||||
- Expose endpoint: Returns detailed information about a specific listing
|
||||
|
||||
Challenges:
|
||||
1. Identifying the necessary endpoints and parameters required to perform searches
|
||||
2. Mapping the mobile API parameters to their web counterparts to maintain compatibility with existing search URLs
|
||||
|
||||
|
||||
## Api Specs
|
||||
|
||||
#### Search for Listings
|
||||
|
||||
`GET /search/total?{search parameters}`
|
||||
*Returns the total number of listings for the given query.*
|
||||
```
|
||||
curl -H "User-Agent: ImmoScout24_1410_30_._" \
|
||||
-H "Accept: application/json" \
|
||||
"https://api.mobile.immobilienscout24.de/search/total?searchType=region&realestatetype=apartmentrent&pricetype=calculatedtotalrent&geocodes=%2Fde%2Fberlin%2Fberlin"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### Retrieve the listings
|
||||
`POST /search/list?{search parameters}`
|
||||
*The body is json encoded and contains data specifying additional results (advertisements) to return. The format is as follows (It is not necessary to provide data for the specified keys.)*
|
||||
```
|
||||
{
|
||||
"supportedResultListTypes": [],
|
||||
"userData": {}
|
||||
}
|
||||
```
|
||||
```
|
||||
curl -X POST 'https://api.mobile.immobilienscout24.de/search/list?pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region&geocodes=%2Fde%2Fberlin%2Fberlin&pagenumber=1' \
|
||||
-H "Connection: keep-alive" \
|
||||
-H "User-Agent: ImmoScout24_1410_30_._" \
|
||||
-H "Accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"supportedResultListType":[],"userData":{}}'
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
#### Get details of listings
|
||||
`GET /expose/{id}`
|
||||
The response contains additional details not included in the listing response.
|
||||
```
|
||||
curl -H "User-Agent: ImmoScout24_1410_30_._" \
|
||||
-H "Accept: application/json" \
|
||||
"https://api.mobile.immobilienscout24.de/expose/158382494"
|
||||
```
|
||||
|
||||
|
||||
## Parameters
|
||||
The parameters between web and mobile are very different which is why we have to translate them. Please see `immoscout-web-translator.js`.
|
||||
@@ -1,43 +1,39 @@
|
||||
import { expect } from 'chai';
|
||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
||||
//import {get} from '../mocks/mockNotification.js';
|
||||
import {/*mockFredy, */providerConfig} from '../utils.js';
|
||||
//import {expect} from 'chai';
|
||||
import { mockFredy, providerConfig } from '../utils.js';
|
||||
import { get } from '../mocks/mockNotification.js';
|
||||
import * as provider from '../../lib/provider/immoscout.js';
|
||||
|
||||
describe('#immoscout testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
provider.init(providerConfig.immoscout, [], []);
|
||||
it('should test immoscout provider', async () => {
|
||||
//const Fredy = await mockFredy();
|
||||
return await new Promise((resolve) => {
|
||||
/* eslint-disable no-console */
|
||||
console.info('Skipping Immoscout test for now until we figured out how to surpass bot detection.');
|
||||
/* eslint-enable no-console */
|
||||
resolve();
|
||||
/*
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'immoscout', similarityCache);
|
||||
fredy.execute().then((listing) => {
|
||||
expect(listing).to.be.a('array');
|
||||
const notificationObj = get();
|
||||
expect(notificationObj).to.be.a('object');
|
||||
expect(notificationObj.serviceName).to.equal('immoscout');
|
||||
notificationObj.payload.forEach((notify) => {
|
||||
expect(notify.id).to.be.a('number');
|
||||
expect(notify.price).to.be.a('string');
|
||||
expect(notify.size).to.be.a('string');
|
||||
expect(notify.title).to.be.a('string');
|
||||
expect(notify.link).to.be.a('string');
|
||||
expect(notify.address).to.be.a('string');
|
||||
expect(notify.price).that.does.include('€');
|
||||
expect(notify.size).that.does.include('m²');
|
||||
expect(notify.title).to.be.not.empty;
|
||||
expect(notify.link).that.does.include('https://www.immobilienscout24.de');
|
||||
expect(notify.address).to.be.not.empty;
|
||||
});
|
||||
resolve();
|
||||
});*/
|
||||
describe('#immoscout provider testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
|
||||
provider.init(providerConfig.immoscout, [], []);
|
||||
it('should test immoscout provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
return await new Promise((resolve) => {
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, '', similarityCache);
|
||||
fredy.execute().then((listings) => {
|
||||
expect(listings).to.be.a('array');
|
||||
const notificationObj = get();
|
||||
expect(notificationObj).to.be.a('object');
|
||||
expect(notificationObj.serviceName).to.equal('immoscout');
|
||||
notificationObj.payload.forEach((notify) => {
|
||||
/** check the actual structure **/
|
||||
expect(notify.id).to.be.a('string');
|
||||
expect(notify.price).to.be.a('string');
|
||||
expect(notify.size).to.be.a('string');
|
||||
expect(notify.title).to.be.a('string');
|
||||
expect(notify.link).to.be.a('string');
|
||||
expect(notify.address).to.be.a('string');
|
||||
/** check the values if possible **/
|
||||
expect(notify.size).to.be.not.empty;
|
||||
expect(notify.title).to.be.not.empty;
|
||||
expect(notify.link).that.does.include('https://www.immobilienscout24.de/');
|
||||
});
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -20,14 +20,14 @@
|
||||
"shouldBecome": "https://www.immonet.de/immobiliensuche/sel.do?sortby=19&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
|
||||
"id": "immonet"
|
||||
},
|
||||
{
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten",
|
||||
"shouldBecome": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?sorting=2",
|
||||
"id": "immoscout"
|
||||
},
|
||||
{
|
||||
"url": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/",
|
||||
"shouldBecome": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/?Sortierung=Id&Richtung=DESC",
|
||||
"id": "neubauKompass"
|
||||
},
|
||||
{
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?numberofrooms=1.5-&price=1.0-1000000.0&livingspace=1.0-10000.0&pricetype=rentpermonth&enteredFrom=result_list",
|
||||
"shouldBecome": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?numberofrooms=1.5-&price=1.0-1000000.0&livingspace=1.0-10000.0&pricetype=rentpermonth&enteredFrom=result_list&sorting=-firstactivation",
|
||||
"id": "immoscout"
|
||||
}
|
||||
]
|
||||
67
test/services/immoscout/immoscout-web-translater.test.js
Normal file
67
test/services/immoscout/immoscout-web-translater.test.js
Normal file
@@ -0,0 +1,67 @@
|
||||
import { convertWebToMobile } from '../../../lib/services/immoscout/immoscout-web-translater.js';
|
||||
import { expect } from 'chai';
|
||||
import { readFile } from 'fs/promises';
|
||||
|
||||
export const testData = JSON.parse(await readFile(new URL('./testdata.json', import.meta.url)));
|
||||
|
||||
describe('#immoscout-mobile URL conversion', () => {
|
||||
// Test URL conversion
|
||||
it('should convert a full web URL to mobile URL', () => {
|
||||
const webUrl =
|
||||
'https://www.immobilienscout24.de/Suche/de/berlin/berlin/wohnung-mieten?heatingtypes=central,selfcontainedcentral&haspromotion=false&numberofrooms=2.0-5.0&livingspace=10.0-25.0&energyefficiencyclasses=a,b,c,d,e,f,g,h,a_plus&exclusioncriteria=projectlisting,swapflat&equipment=parking,cellar,builtinkitchen,lift,garden,guesttoilet,balcony&petsallowedtypes=no,yes,negotiable&price=10.0-100.0&constructionyear=1920-2026&apartmenttypes=halfbasement,penthouse,other,loft,groundfloor,terracedflat,raisedgroundfloor,roofstorey,apartment,maisonette&pricetype=calculatedtotalrent&floor=2-7&enteredFrom=result_list';
|
||||
const expectedMobileUrl =
|
||||
'https://api.mobile.immobilienscout24.de/search/list?apartmenttypes=halfbasement,penthouse,other,loft,groundfloor,terracedflat,raisedgroundfloor,roofstorey,apartment,maisonette&constructionyear=1920-2026&energyefficiencyclasses=a,b,c,d,e,f,g,h,a_plus&equipment=parking,cellar,builtInKitchen,lift,garden,guestToilet,balcony&exclusioncriteria=projectlisting,swapflat&floor=2-7&geocodes=%2Fde%2Fberlin%2Fberlin&haspromotion=false&heatingtypes=central,selfcontainedcentral&livingspace=10.0-25.0&numberofrooms=2.0-5.0&petsallowedtypes=no,yes,negotiable&price=10.0-100.0&pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region';
|
||||
|
||||
const actualMobileUrl = convertWebToMobile(webUrl);
|
||||
expect(actualMobileUrl).to.equal(expectedMobileUrl);
|
||||
});
|
||||
|
||||
// Test URL conversion with unsupported query parameters
|
||||
it('should remove unsupported query parameters', () => {
|
||||
const webUrl = 'https://www.immobilienscout24.de/Suche/de/berlin/berlin/wohnung-mieten?minimuminternetspeed=100000';
|
||||
const converted = convertWebToMobile(webUrl);
|
||||
expect(converted).that.does.not.include('minimuminternetspeed');
|
||||
});
|
||||
|
||||
// Test URL conversion with invalid URL
|
||||
it('should throw an error for invalid URL', () => {
|
||||
const invalidUrl = 'invalid-url';
|
||||
|
||||
expect(() => convertWebToMobile(invalidUrl)).to.throw('Invalid URL: invalid-url');
|
||||
});
|
||||
|
||||
// Test URL conversion with unexpected path format
|
||||
it('should throw an error for unexpected path format', () => {
|
||||
const webUrl = 'https://www.immobilienscout24.de/invalid/path/format';
|
||||
expect(() => convertWebToMobile(webUrl)).to.throw('Unexpected path format: /invalid/path/format');
|
||||
});
|
||||
|
||||
it('shouldFindResultsForEveryTestData', async () => {
|
||||
for (const webUrlKey of Object.keys(testData)) {
|
||||
const url = convertWebToMobile(testData[webUrlKey].url);
|
||||
const type = testData[webUrlKey].type;
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'User-Agent': 'ImmoScout24_1410_30_._',
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
supportedResultListTypes: [],
|
||||
userData: {},
|
||||
}),
|
||||
});
|
||||
if (!response.ok) {
|
||||
console.error('Error fetching data from ImmoScout Mobile API:', response.statusText);
|
||||
}
|
||||
|
||||
expect([null, true]).to.include(response.ok);
|
||||
const responseBody = await response.json();
|
||||
expect(responseBody.totalResults).to.be.greaterThan(0);
|
||||
expect(responseBody.totalResults).to.be.greaterThan(0);
|
||||
expect(responseBody.resultListItems.length).to.greaterThan(0);
|
||||
expect(responseBody.resultListItems[0].item.realEstateType).to.equal(type);
|
||||
}
|
||||
});
|
||||
});
|
||||
22
test/services/immoscout/testdata.json
Normal file
22
test/services/immoscout/testdata.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"buyHouseInParts": {
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/haus-kaufen?numberofrooms=1.0-10000.0&price=1.0-1000000.0E7&livingspace=1.0-10000.0&geocodes=1276010037,1276010014,1276010012&enteredFrom=result_list",
|
||||
"type": "housebuy"
|
||||
},
|
||||
"buyHouse": {
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/haus-kaufen?numberofrooms=1.0-10000.0&price=1.0-1000000.0E7&livingspace=1.0-10000.0&enteredFrom=result_list",
|
||||
"type": "housebuy"
|
||||
},
|
||||
"rentApartment": {
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?numberofrooms=1.5-&price=1.0-1000000.0&livingspace=1.0-10000.0&pricetype=rentpermonth&enteredFrom=result_list",
|
||||
"type": "apartmentrent"
|
||||
},
|
||||
"buyApartment": {
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-kaufen?numberofrooms=1.5-10000.0&price=1.0-1000000.0&livingspace=1.0-10000.0&enteredFrom=result_list",
|
||||
"type": "apartmentbuy"
|
||||
},
|
||||
"rentHouse": {
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/haus-mieten?enteredFrom=one_step_search",
|
||||
"type": "houserent"
|
||||
}
|
||||
}
|
||||
@@ -101,10 +101,7 @@ export default function ProviderMutator({ onVisibilityChanged, visible = false,
|
||||
description={
|
||||
<div>
|
||||
<p>
|
||||
Immoscout will not work at the moment due to advanced bot detection. I'm currently working on a fix.
|
||||
</p>
|
||||
<p>
|
||||
Until a fix has been released, Immoscout won't yield any results.
|
||||
Currently, our Immoscout implementation does not drawing shapes on a map. Use a radius instead.
|
||||
</p>
|
||||
</div>
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user