fixing immoscout scraper

This commit is contained in:
orangecoding
2026-01-26 19:52:37 +01:00
parent 7879d0e94a
commit 3117044139
5 changed files with 17 additions and 13 deletions

View File

@@ -8,7 +8,7 @@
* *
* The mobile API provides the following endpoints: * The mobile API provides the following endpoints:
* - GET /search/total?{search parameters}: Returns the total number of listings for the given query * - GET /search/total?{search parameters}: Returns the total number of listings for the given query
* Example: `curl -H "User-Agent: ImmoScout_27.3_26.0_._" https://api.mobile.immobilienscout24.de/search/total?searchType=region&realestatetype=apartmentrent&pricetype=calculatedtotalrent&geocodes=%2Fde%2Fberlin%2Fberlin ` * Example: `curl -H "User-Agent: ImmoScout_27.12_26.2_._" https://api.mobile.immobilienscout24.de/search/total?searchType=region&realestatetype=apartmentrent&pricetype=calculatedtotalrent&geocodes=%2Fde%2Fberlin%2Fberlin `
* *
* - POST /search/list?{search parameters}: Actually retrieves the listings. Body is json encoded and contains * - POST /search/list?{search parameters}: Actually retrieves the listings. Body is json encoded and contains
* data specifying additional results (advertisements) to return. The format is as follows: * data specifying additional results (advertisements) to return. The format is as follows:
@@ -20,12 +20,12 @@
* ``` * ```
* It is not necessary to provide data for the specified keys. * It is not necessary to provide data for the specified keys.
* *
* Example: `curl -X POST 'https://api.mobile.immobilienscout24.de/search/list?pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region&geocodes=%2Fde%2Fberlin%2Fberlin&pagenumber=1' -H "Connection: keep-alive" -H "User-Agent: ImmoScout_27.3_26.0_._" -H "Accept: application/json" -H "Content-Type: application/json" -d '{"supportedResultListType": [], "userData": {}}'` * Example: `curl -X POST 'https://api.mobile.immobilienscout24.de/search/list?pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region&geocodes=%2Fde%2Fberlin%2Fberlin&pagenumber=1' -H "Connection: keep-alive" -H "User-Agent: ImmoScout_27.12_26.2_._" -H "Accept: application/json" -H "Content-Type: application/json" -d '{"supportedResultListType": [], "userData": {}}'`
* - GET /expose/{id} - Returns the details of a listing. The response contains additional details not included in the * - GET /expose/{id} - Returns the details of a listing. The response contains additional details not included in the
* listing response. * listing response.
* *
* Example: `curl -H "User-Agent: ImmoScout_27.3_26.0_._" "https://api.mobile.immobilienscout24.de/expose/158382494"` * Example: `curl -H "User-Agent: ImmoScout_27.12_26.2_._" "https://api.mobile.immobilienscout24.de/expose/158382494"`
* *
* *
* It is necessary to set the correct User Agent (see `getListings`) in the request header. * It is necessary to set the correct User Agent (see `getListings`) in the request header.
@@ -52,7 +52,7 @@ async function getListings(url) {
const response = await fetch(url, { const response = await fetch(url, {
method: 'POST', method: 'POST',
headers: { headers: {
'User-Agent': 'ImmoScout_27.3_26.0_._', 'User-Agent': 'ImmoScout_27.12_26.2_._',
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}, },
body: JSON.stringify({ body: JSON.stringify({
@@ -88,7 +88,7 @@ async function getListings(url) {
async function isListingActive(link) { async function isListingActive(link) {
const result = await fetch(convertImmoscoutListingToMobileListing(link), { const result = await fetch(convertImmoscoutListingToMobileListing(link), {
headers: { headers: {
'User-Agent': 'ImmoScout_27.3_26.0_._', 'User-Agent': 'ImmoScout_27.12_26.2_._',
}, },
}); });

View File

@@ -103,6 +103,8 @@ const REAL_ESTATE_TYPE = {
'haus-mieten': 'houserent', 'haus-mieten': 'houserent',
'wohnung-mieten': 'apartmentrent', 'wohnung-mieten': 'apartmentrent',
'wohnung-kaufen': 'apartmentbuy', 'wohnung-kaufen': 'apartmentbuy',
'wohnung-kaufen-mit-balkon': 'apartmentbuy',
'eigentumswohnung-mit-garten': 'apartmentbuy',
'haus-kaufen': 'housebuy', 'haus-kaufen': 'housebuy',
}; };
@@ -146,7 +148,7 @@ export function convertWebToMobile(webUrl) {
const realTypeKey = segments.at(-1); const realTypeKey = segments.at(-1);
let realType = REAL_ESTATE_TYPE[realTypeKey]; let realType = REAL_ESTATE_TYPE[realTypeKey];
let additionalParamsFromWebPath; let additionalParamsFromWebPath = WEB_PATH_TO_APARTMENT_EQUIPMENT_MAP[realTypeKey] || null;
if (!realType) { if (!realType) {
// Test for seo optimized apartment path (only used on the ImmoScout web app) // Test for seo optimized apartment path (only used on the ImmoScout web app)
@@ -167,7 +169,7 @@ export function convertWebToMobile(webUrl) {
Object.entries(rawParams).filter(([key]) => key !== 'enteredFrom' && PARAM_NAME_MAP[key]), Object.entries(rawParams).filter(([key]) => key !== 'enteredFrom' && PARAM_NAME_MAP[key]),
); );
const geocodes = `/${segments.slice(2, 5).join('/')}`; const geocodes = `/${segments.slice(2, segments.length - 1).join('/')}`;
const isRadius = segments.includes('radius'); const isRadius = segments.includes('radius');
const mobileParams = { const mobileParams = {
searchType: isRadius ? 'radius' : 'region', searchType: isRadius ? 'radius' : 'region',

View File

@@ -1,6 +1,6 @@
{ {
"name": "fredy", "name": "fredy",
"version": "19.2.1", "version": "19.2.2",
"description": "[F]ind [R]eal [E]states [d]amn eas[y].", "description": "[F]ind [R]eal [E]states [d]amn eas[y].",
"scripts": { "scripts": {
"prepare": "husky", "prepare": "husky",

View File

@@ -41,7 +41,7 @@ Challenges:
_Returns the total number of listings for the given query._ _Returns the total number of listings for the given query._
``` ```
curl -H "User-Agent: ImmoScout_27.3_26.0_._" \ curl -H "User-Agent: ImmoScout_27.12_26.2_._" \
-H "Accept: application/json" \ -H "Accept: application/json" \
"https://api.mobile.immobilienscout24.de/search/total?searchType=region&realestatetype=apartmentrent&pricetype=calculatedtotalrent&geocodes=%2Fde%2Fberlin%2Fberlin" "https://api.mobile.immobilienscout24.de/search/total?searchType=region&realestatetype=apartmentrent&pricetype=calculatedtotalrent&geocodes=%2Fde%2Fberlin%2Fberlin"
``` ```
@@ -63,7 +63,7 @@ _The body is json encoded and contains data specifying additional results (adver
``` ```
curl -X POST 'https://api.mobile.immobilienscout24.de/search/list?pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region&geocodes=%2Fde%2Fberlin%2Fberlin&pagenumber=1' \ curl -X POST 'https://api.mobile.immobilienscout24.de/search/list?pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region&geocodes=%2Fde%2Fberlin%2Fberlin&pagenumber=1' \
-H "Connection: keep-alive" \ -H "Connection: keep-alive" \
-H "User-Agent: ImmoScout_27.3_26.0_._" \ -H "User-Agent: ImmoScout_27.12_26.2_._" \
-H "Accept: application/json" \ -H "Accept: application/json" \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{"supportedResultListType":[],"userData":{}}' -d '{"supportedResultListType":[],"userData":{}}'
@@ -78,7 +78,7 @@ curl -X POST 'https://api.mobile.immobilienscout24.de/search/list?pricetype=calc
The response contains additional details not included in the listing response. The response contains additional details not included in the listing response.
``` ```
curl -H "User-Agent: ImmoScout_27.3_26.0_._" \ curl -H "User-Agent: ImmoScout_27.12_26.2_._" \
-H "Accept: application/json" \ -H "Accept: application/json" \
"https://api.mobile.immobilienscout24.de/expose/158382494" "https://api.mobile.immobilienscout24.de/expose/158382494"
``` ```

View File

@@ -58,7 +58,7 @@ describe('#immoscout-mobile URL conversion', () => {
const response = await fetch(url, { const response = await fetch(url, {
method: 'POST', method: 'POST',
headers: { headers: {
'User-Agent': 'ImmoScout_27.3_26.0_._', 'User-Agent': 'ImmoScout_27.12_26.2_._',
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}, },
body: JSON.stringify({ body: JSON.stringify({
@@ -75,7 +75,9 @@ describe('#immoscout-mobile URL conversion', () => {
expect(responseBody.totalResults).to.be.greaterThan(0); expect(responseBody.totalResults).to.be.greaterThan(0);
expect(responseBody.totalResults).to.be.greaterThan(0); expect(responseBody.totalResults).to.be.greaterThan(0);
expect(responseBody.resultListItems.length).to.greaterThan(0); expect(responseBody.resultListItems.length).to.greaterThan(0);
expect(responseBody.resultListItems[0].item.realEstateType).to.equal(type); expect(responseBody.resultListItems.filter((r) => r.type === 'EXPOSE_RESULT')[0].item.realEstateType).to.equal(
type,
);
} }
}); });
}); });