Add immoscout mobile API provider to avoid failing bot checks (#125)

* Add provider that uses the immoscout mobile API to avoid failing bot checks.
This commit is contained in:
Patrick Klein
2025-05-09 08:13:52 +01:00
committed by GitHub
parent a138dafc31
commit b9e4bca244
3 changed files with 276 additions and 1 deletions

View File

@@ -26,7 +26,7 @@ class FredyRuntime {
//modify the url to make sure search order is correctly set
Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam))
//scraping the site and try finding new listings
.then(this._getListings.bind(this))
.then(this._providerConfig.getListings?.bind(this) ?? this._getListings.bind(this))
//bring them in a proper form (dictated by the provider)
.then(this._normalize.bind(this))
//filter listings with stuff tagged by the blacklist of the provider

View File

@@ -0,0 +1,203 @@
/**
* ImmoScout provider using the mobile API to retrieve listings.
*
* The mobile API provides the following endpoints:
* - GET /search/total?{search parameters}: Returns the total number of listings for the given query
* Example: `curl -H "User-Agent: ImmoScout24_1410_30_._" https://api.mobile.immobilienscout24.de/search/total?searchType=region&realestatetype=apartmentrent&pricetype=calculatedtotalrent&geocodes=%2Fde%2Fberlin%2Fberlin `
*
* - POST /search/list?{search parameters}: Actually retrieves the listings. Body is json encoded and contains
* data specifying additional results (advertisements) to return. The format is as follows:
* ```
* {
* "supportedResultListTypes": [],
* "userData": {}
* }
* ```
* It is not necessary to provide data for the specified keys.
*
* Example: `curl -X POST 'https://api.mobile.immobilienscout24.de/search/list?pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region&geocodes=%2Fde%2Fberlin%2Fberlin&pagenumber=1' -H "Connection: keep-alive" -H "User-Agent: ImmoScout24_1410_30_._" -H "Accept: application/json" -H "Content-Type: application/json" -d '{"supportedResultListType": [], "userData": {}}'`
* - GET /expose/{id} - Returns the details of a listing. The response contains additional details not included in the
* listing response.
*
* Example: `curl -H "User-Agent: ImmoScout24_1410_30_._" "https://api.mobile.immobilienscout24.de/expose/158382494"`
*
*
* It is necessary to set the correct User Agent (see `getListings`) in the request header.
*
* Note that the mobile API is not publicly documented. I've reverse-engineered
* it by intercepting traffic from an android emulator running the immoscout app.
* Moreover, the search parameters differ slightly from the web API. I've mapped them
* to the web API parameters by comparing a search request with all parameters set between
* the web and mobile API. The mobile API actually seems to be a superset of the web API,
* but I have decided not to include new parameters as I wanted to keep the existing UX (i.e.,
* users only have to provide a link to an existing search).
*
* Limitations:
* - The current implementation of this provider *does not* support non-rental properties,
* although the same approach can be used to implement support. It's just a matter of
* mapping the web search URL to the corresponding mobile API URL.
* - Pagination support is not implemented.
*/
import utils, {buildHash} from '../utils.js';
import queryString from 'query-string';
let appliedBlackList = [];
async function getListings(url) {
const response = await fetch(url, {
method: 'POST',
headers: {
'User-Agent': 'ImmoScout24_1410_30_._',
'Content-Type': 'application/json',
},
body: JSON.stringify({
supportedResultListTypes: [],
userData: {}
})
});
if (!response.ok) {
console.error('Error fetching data from ImmoScout Mobile API:', response.statusText);
return [];
}
const responseBody = await response.json();
return responseBody.resultListItems.filter((item) => item.type === 'EXPOSE_RESULT').map(expose => {
const item = expose.item;
const [price, size, ] = item.attributes;
return {
id: item.id,
price: price?.value,
size: size?.value,
title: item.title,
link: `${metaInformation.baseUrl}/expose/${item.id}`,
address: item.address?.line,
};
});
}
function nullOrEmpty(val) {
return val == null || val.length === 0;
}
function normalize(o) {
const title = nullOrEmpty(o.title) ? 'NO TITLE FOUND' : o.title.replace('NEU', '');
const address = nullOrEmpty(o.address) ? 'NO ADDRESS FOUND' : (o.address || '').replace(/\(.*\),.*$/, '').trim();
const id = buildHash(o.id, o.price);
return Object.assign(o, { id, title, address});
}
function applyBlacklist(o) {
return !utils.isOneOf(o.title, appliedBlackList);
}
const config = {
url: null,
sortByDateParam: 'sorting=-firstactivation',
// Not actually required - used by filter to remove and listings that failed to parse
crawlFields: {
'id': 'id',
'title': 'title',
'price': 'price',
'size': 'size',
'link': 'link',
'address': 'address'
},
normalize: normalize,
filter: applyBlacklist,
getListings: getListings
};
export const init = (sourceConfig, blacklist) => {
config.enabled = sourceConfig.enabled;
config.url = convertWebToMobile(sourceConfig.url);
appliedBlackList = blacklist || [];
};
export const metaInformation = {
name: 'Immoscout',
baseUrl: 'https://www.immobilienscout24.de/',
id: 'immoscout-mobile',
};
export function convertWebToMobile(webUrl) {
let url;
try {
url = new URL(webUrl);
} catch (err) {
throw new Error(`Invalid URL: ${webUrl}`);
}
const segments = url.pathname.split('/');
if (segments.length < 6 || segments[1] !== 'Suche') {
throw new Error(`Unexpected path format: ${url.pathname}`);
}
const geocodes = `/${segments[2]}/${segments[3]}/${segments[4]}`;
const paramNameMap = {
heatingtypes: 'heatingtypes',
haspromotion: 'haspromotion',
numberofrooms: 'numberofrooms',
livingspace: 'livingspace',
energyefficiencyclasses: 'energyefficiencyclasses',
exclusioncriteria: 'exclusioncriteria',
equipment: 'equipment',
petsallowedtypes: 'petsallowedtypes',
price: 'price',
constructionyear: 'constructionyear',
apartmenttypes: 'apartmenttypes',
pricetype: 'pricetype',
floor: 'floor'
};
const equipmentValueMap = {
parking: 'parking',
cellar: 'cellar',
builtinkitchen: 'builtInKitchen',
lift: 'lift',
garden: 'garden',
guesttoilet: 'guestToilet',
balcony: 'balcony'
};
const { query: webParams } = queryString.parseUrl(webUrl, { arrayFormat: 'comma' });
delete webParams['enteredFrom'];
// Check for unsupported parameters
Object.keys(webParams).forEach((key) => {
if (!paramNameMap[key]) {
throw new Error(`Unsupported Web-API parameter: "${key}"`);
}
});
// Build mobile params
const mobileParams = {
searchType: 'region',
geocodes,
realestatetype: 'apartmentrent'
};
Object.entries(webParams).forEach(([webKey, webVal]) => {
let value = webVal;
if (webKey === 'equipment') {
// Map equipment list to camelCase values
if (!Array.isArray(value)) {
value = ('' + value).split(',');
}
value = value.map((token) => {
const lower = token.toLowerCase();
if (!equipmentValueMap[lower]) {
throw new Error(`Unknown equipment type: "${token}"`);
}
return equipmentValueMap[lower];
});
}
mobileParams[paramNameMap[webKey]] = value;
});
const mobileQuery = queryString.stringify(mobileParams, {
arrayFormat: 'comma',
encode: true,
skipEmptyString: true
});
return `https://api.mobile.immobilienscout24.de/search/list?${mobileQuery}`;
}
export { config };

View File

@@ -0,0 +1,72 @@
import {expect} from 'chai';
import {convertWebToMobile} from '../../lib/provider/immoscout-mobile.js';
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
import {mockFredy, providerConfig} from '../utils.js';
import {get} from '../mocks/mockNotification.js';
import * as provider from '../../lib/provider/immoscout-mobile.js';
describe('#immoscout-mobile provider testsuite()', () => {
after(() => {
similarityCache.stopCacheCleanup();
});
provider.init(providerConfig.immoscout, [], []);
it('should test immoscout-mobile provider', async () => {
const Fredy = await mockFredy();
return await new Promise((resolve) => {
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, '', similarityCache);
fredy.execute().then((listings) => {
expect(listings).to.be.a('array');
const notificationObj = get();
expect(notificationObj).to.be.a('object');
expect(notificationObj.serviceName).to.equal('immoscout-mobile');
notificationObj.payload.forEach((notify) => {
/** check the actual structure **/
expect(notify.id).to.be.a('string');
expect(notify.price).to.be.a('string');
expect(notify.size).to.be.a('string');
expect(notify.title).to.be.a('string');
expect(notify.link).to.be.a('string');
expect(notify.address).to.be.a('string');
/** check the values if possible **/
expect(notify.size).to.be.not.empty;
expect(notify.title).to.be.not.empty;
expect(notify.link).that.does.include('https://www.immobilienscout24.de/');
});
resolve();
});
});
});
});
describe('#immoscout-mobile URL conversion', () => {
// Test URL conversion
it('should convert a full web URL to mobile URL', () => {
const webUrl = 'https://www.immobilienscout24.de/Suche/de/berlin/berlin/wohnung-mieten?heatingtypes=central,selfcontainedcentral&haspromotion=false&numberofrooms=2.0-5.0&livingspace=10.0-25.0&energyefficiencyclasses=a,b,c,d,e,f,g,h,a_plus&exclusioncriteria=projectlisting,swapflat&equipment=parking,cellar,builtinkitchen,lift,garden,guesttoilet,balcony&petsallowedtypes=no,yes,negotiable&price=10.0-100.0&constructionyear=1920-2026&apartmenttypes=halfbasement,penthouse,other,loft,groundfloor,terracedflat,raisedgroundfloor,roofstorey,apartment,maisonette&pricetype=calculatedtotalrent&floor=2-7&enteredFrom=result_list';
const expectedMobileUrl = 'https://api.mobile.immobilienscout24.de/search/list?apartmenttypes=halfbasement,penthouse,other,loft,groundfloor,terracedflat,raisedgroundfloor,roofstorey,apartment,maisonette&constructionyear=1920-2026&energyefficiencyclasses=a,b,c,d,e,f,g,h,a_plus&equipment=parking,cellar,builtInKitchen,lift,garden,guestToilet,balcony&exclusioncriteria=projectlisting,swapflat&floor=2-7&geocodes=%2Fde%2Fberlin%2Fberlin&haspromotion=false&heatingtypes=central,selfcontainedcentral&livingspace=10.0-25.0&numberofrooms=2.0-5.0&petsallowedtypes=no,yes,negotiable&price=10.0-100.0&pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region';
const actualMobileUrl = convertWebToMobile(webUrl);
expect(actualMobileUrl).to.equal(expectedMobileUrl);
});
// Test URL conversion with unsupported query parameters
it('should throw an error for unsupported query parameters', () => {
const webUrl = 'https://www.immobilienscout24.de/Suche/de/berlin/berlin/wohnung-mieten?minimuminternetspeed=100000';
expect(() => convertWebToMobile(webUrl)).to.throw('Unsupported Web-API parameter: "minimuminternetspeed"');
});
// Test URL conversion with invalid URL
it('should throw an error for invalid URL', () => {
const invalidUrl = 'invalid-url';
expect(() => convertWebToMobile(invalidUrl)).to.throw('Invalid URL: invalid-url');
});
// Test URL conversion with unexpected path format
it('should throw an error for unexpected path format', () => {
const webUrl = 'https://www.immobilienscout24.de/invalid/path/format';
expect(() => convertWebToMobile(webUrl)).to.throw('Unexpected path format: /invalid/path/format');
});
});