From b9e4bca24408bae5d048d3f72a4277dc196b45a2 Mon Sep 17 00:00:00 2001 From: Patrick Klein <42714034+libklein@users.noreply.github.com> Date: Fri, 9 May 2025 08:13:52 +0100 Subject: [PATCH] Add immoscout mobile API provider to avoid failing bot checks (#125) * Add provider that uses the immoscout mobile API to avoid failing bot checks. --- lib/FredyRuntime.js | 2 +- lib/provider/immoscout-mobile.js | 203 +++++++++++++++++++++++++ test/provider/immoscout-mobile.test.js | 72 +++++++++ 3 files changed, 276 insertions(+), 1 deletion(-) create mode 100644 lib/provider/immoscout-mobile.js create mode 100644 test/provider/immoscout-mobile.test.js diff --git a/lib/FredyRuntime.js b/lib/FredyRuntime.js index 639c74d..f9ece09 100755 --- a/lib/FredyRuntime.js +++ b/lib/FredyRuntime.js @@ -26,7 +26,7 @@ class FredyRuntime { //modify the url to make sure search order is correctly set Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam)) //scraping the site and try finding new listings - .then(this._getListings.bind(this)) + .then(this._providerConfig.getListings?.bind(this) ?? this._getListings.bind(this)) //bring them in a proper form (dictated by the provider) .then(this._normalize.bind(this)) //filter listings with stuff tagged by the blacklist of the provider diff --git a/lib/provider/immoscout-mobile.js b/lib/provider/immoscout-mobile.js new file mode 100644 index 0000000..c49c250 --- /dev/null +++ b/lib/provider/immoscout-mobile.js @@ -0,0 +1,203 @@ +/** + * ImmoScout provider using the mobile API to retrieve listings. + * + * The mobile API provides the following endpoints: + * - GET /search/total?{search parameters}: Returns the total number of listings for the given query + * Example: `curl -H "User-Agent: ImmoScout24_1410_30_._" https://api.mobile.immobilienscout24.de/search/total?searchType=region&realestatetype=apartmentrent&pricetype=calculatedtotalrent&geocodes=%2Fde%2Fberlin%2Fberlin ` + * + * - POST /search/list?{search parameters}: Actually retrieves the listings. Body is json encoded and contains + * data specifying additional results (advertisements) to return. The format is as follows: + * ``` + * { + * "supportedResultListTypes": [], + * "userData": {} + * } + * ``` + * It is not necessary to provide data for the specified keys. + * + * Example: `curl -X POST 'https://api.mobile.immobilienscout24.de/search/list?pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region&geocodes=%2Fde%2Fberlin%2Fberlin&pagenumber=1' -H "Connection: keep-alive" -H "User-Agent: ImmoScout24_1410_30_._" -H "Accept: application/json" -H "Content-Type: application/json" -d '{"supportedResultListType": [], "userData": {}}'` + + * - GET /expose/{id} - Returns the details of a listing. The response contains additional details not included in the + * listing response. + * + * Example: `curl -H "User-Agent: ImmoScout24_1410_30_._" "https://api.mobile.immobilienscout24.de/expose/158382494"` + * + * + * It is necessary to set the correct User Agent (see `getListings`) in the request header. + * + * Note that the mobile API is not publicly documented. I've reverse-engineered + * it by intercepting traffic from an android emulator running the immoscout app. + * Moreover, the search parameters differ slightly from the web API. I've mapped them + * to the web API parameters by comparing a search request with all parameters set between + * the web and mobile API. The mobile API actually seems to be a superset of the web API, + * but I have decided not to include new parameters as I wanted to keep the existing UX (i.e., + * users only have to provide a link to an existing search). + * + * Limitations: + * - The current implementation of this provider *does not* support non-rental properties, + * although the same approach can be used to implement support. It's just a matter of + * mapping the web search URL to the corresponding mobile API URL. + * - Pagination support is not implemented. + */ + +import utils, {buildHash} from '../utils.js'; +import queryString from 'query-string'; +let appliedBlackList = []; + +async function getListings(url) { + const response = await fetch(url, { + method: 'POST', + headers: { + 'User-Agent': 'ImmoScout24_1410_30_._', + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + supportedResultListTypes: [], + userData: {} + }) + }); + if (!response.ok) { + console.error('Error fetching data from ImmoScout Mobile API:', response.statusText); + return []; + } + + const responseBody = await response.json(); + return responseBody.resultListItems.filter((item) => item.type === 'EXPOSE_RESULT').map(expose => { + const item = expose.item; + const [price, size, ] = item.attributes; + return { + id: item.id, + price: price?.value, + size: size?.value, + title: item.title, + link: `${metaInformation.baseUrl}/expose/${item.id}`, + address: item.address?.line, + }; + }); +} + +function nullOrEmpty(val) { + return val == null || val.length === 0; +} +function normalize(o) { + const title = nullOrEmpty(o.title) ? 'NO TITLE FOUND' : o.title.replace('NEU', ''); + const address = nullOrEmpty(o.address) ? 'NO ADDRESS FOUND' : (o.address || '').replace(/\(.*\),.*$/, '').trim(); + const id = buildHash(o.id, o.price); + return Object.assign(o, { id, title, address}); +} +function applyBlacklist(o) { + return !utils.isOneOf(o.title, appliedBlackList); +} +const config = { + url: null, + sortByDateParam: 'sorting=-firstactivation', + // Not actually required - used by filter to remove and listings that failed to parse + crawlFields: { + 'id': 'id', + 'title': 'title', + 'price': 'price', + 'size': 'size', + 'link': 'link', + 'address': 'address' + }, + normalize: normalize, + filter: applyBlacklist, + getListings: getListings +}; +export const init = (sourceConfig, blacklist) => { + config.enabled = sourceConfig.enabled; + config.url = convertWebToMobile(sourceConfig.url); + appliedBlackList = blacklist || []; +}; +export const metaInformation = { + name: 'Immoscout', + baseUrl: 'https://www.immobilienscout24.de/', + id: 'immoscout-mobile', +}; + +export function convertWebToMobile(webUrl) { + let url; + try { + url = new URL(webUrl); + } catch (err) { + throw new Error(`Invalid URL: ${webUrl}`); + } + const segments = url.pathname.split('/'); + if (segments.length < 6 || segments[1] !== 'Suche') { + throw new Error(`Unexpected path format: ${url.pathname}`); + } + const geocodes = `/${segments[2]}/${segments[3]}/${segments[4]}`; + + const paramNameMap = { + heatingtypes: 'heatingtypes', + haspromotion: 'haspromotion', + numberofrooms: 'numberofrooms', + livingspace: 'livingspace', + energyefficiencyclasses: 'energyefficiencyclasses', + exclusioncriteria: 'exclusioncriteria', + equipment: 'equipment', + petsallowedtypes: 'petsallowedtypes', + price: 'price', + constructionyear: 'constructionyear', + apartmenttypes: 'apartmenttypes', + pricetype: 'pricetype', + floor: 'floor' + }; + + const equipmentValueMap = { + parking: 'parking', + cellar: 'cellar', + builtinkitchen: 'builtInKitchen', + lift: 'lift', + garden: 'garden', + guesttoilet: 'guestToilet', + balcony: 'balcony' + }; + + const { query: webParams } = queryString.parseUrl(webUrl, { arrayFormat: 'comma' }); + delete webParams['enteredFrom']; + + // Check for unsupported parameters + Object.keys(webParams).forEach((key) => { + if (!paramNameMap[key]) { + throw new Error(`Unsupported Web-API parameter: "${key}"`); + } + }); + + // Build mobile params + const mobileParams = { + searchType: 'region', + geocodes, + realestatetype: 'apartmentrent' + }; + + Object.entries(webParams).forEach(([webKey, webVal]) => { + let value = webVal; + + if (webKey === 'equipment') { + // Map equipment list to camelCase values + if (!Array.isArray(value)) { + value = ('' + value).split(','); + } + value = value.map((token) => { + const lower = token.toLowerCase(); + if (!equipmentValueMap[lower]) { + throw new Error(`Unknown equipment type: "${token}"`); + } + return equipmentValueMap[lower]; + }); + } + + mobileParams[paramNameMap[webKey]] = value; + }); + + const mobileQuery = queryString.stringify(mobileParams, { + arrayFormat: 'comma', + encode: true, + skipEmptyString: true + }); + + return `https://api.mobile.immobilienscout24.de/search/list?${mobileQuery}`; +} + +export { config }; diff --git a/test/provider/immoscout-mobile.test.js b/test/provider/immoscout-mobile.test.js new file mode 100644 index 0000000..d608f30 --- /dev/null +++ b/test/provider/immoscout-mobile.test.js @@ -0,0 +1,72 @@ +import {expect} from 'chai'; +import {convertWebToMobile} from '../../lib/provider/immoscout-mobile.js'; +import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js'; +import {mockFredy, providerConfig} from '../utils.js'; +import {get} from '../mocks/mockNotification.js'; +import * as provider from '../../lib/provider/immoscout-mobile.js'; + +describe('#immoscout-mobile provider testsuite()', () => { + after(() => { + similarityCache.stopCacheCleanup(); + }); + + provider.init(providerConfig.immoscout, [], []); + it('should test immoscout-mobile provider', async () => { + const Fredy = await mockFredy(); + return await new Promise((resolve) => { + const fredy = new Fredy(provider.config, null, provider.metaInformation.id, '', similarityCache); + fredy.execute().then((listings) => { + expect(listings).to.be.a('array'); + const notificationObj = get(); + expect(notificationObj).to.be.a('object'); + expect(notificationObj.serviceName).to.equal('immoscout-mobile'); + notificationObj.payload.forEach((notify) => { + /** check the actual structure **/ + expect(notify.id).to.be.a('string'); + expect(notify.price).to.be.a('string'); + expect(notify.size).to.be.a('string'); + expect(notify.title).to.be.a('string'); + expect(notify.link).to.be.a('string'); + expect(notify.address).to.be.a('string'); + /** check the values if possible **/ + expect(notify.size).to.be.not.empty; + expect(notify.title).to.be.not.empty; + expect(notify.link).that.does.include('https://www.immobilienscout24.de/'); + }); + resolve(); + }); + }); + }); +}); + +describe('#immoscout-mobile URL conversion', () => { + // Test URL conversion + it('should convert a full web URL to mobile URL', () => { + const webUrl = 'https://www.immobilienscout24.de/Suche/de/berlin/berlin/wohnung-mieten?heatingtypes=central,selfcontainedcentral&haspromotion=false&numberofrooms=2.0-5.0&livingspace=10.0-25.0&energyefficiencyclasses=a,b,c,d,e,f,g,h,a_plus&exclusioncriteria=projectlisting,swapflat&equipment=parking,cellar,builtinkitchen,lift,garden,guesttoilet,balcony&petsallowedtypes=no,yes,negotiable&price=10.0-100.0&constructionyear=1920-2026&apartmenttypes=halfbasement,penthouse,other,loft,groundfloor,terracedflat,raisedgroundfloor,roofstorey,apartment,maisonette&pricetype=calculatedtotalrent&floor=2-7&enteredFrom=result_list'; + const expectedMobileUrl = 'https://api.mobile.immobilienscout24.de/search/list?apartmenttypes=halfbasement,penthouse,other,loft,groundfloor,terracedflat,raisedgroundfloor,roofstorey,apartment,maisonette&constructionyear=1920-2026&energyefficiencyclasses=a,b,c,d,e,f,g,h,a_plus&equipment=parking,cellar,builtInKitchen,lift,garden,guestToilet,balcony&exclusioncriteria=projectlisting,swapflat&floor=2-7&geocodes=%2Fde%2Fberlin%2Fberlin&haspromotion=false&heatingtypes=central,selfcontainedcentral&livingspace=10.0-25.0&numberofrooms=2.0-5.0&petsallowedtypes=no,yes,negotiable&price=10.0-100.0&pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region'; + + const actualMobileUrl = convertWebToMobile(webUrl); + expect(actualMobileUrl).to.equal(expectedMobileUrl); + }); + + // Test URL conversion with unsupported query parameters + it('should throw an error for unsupported query parameters', () => { + const webUrl = 'https://www.immobilienscout24.de/Suche/de/berlin/berlin/wohnung-mieten?minimuminternetspeed=100000'; + + expect(() => convertWebToMobile(webUrl)).to.throw('Unsupported Web-API parameter: "minimuminternetspeed"'); + }); + + // Test URL conversion with invalid URL + it('should throw an error for invalid URL', () => { + const invalidUrl = 'invalid-url'; + + expect(() => convertWebToMobile(invalidUrl)).to.throw('Invalid URL: invalid-url'); + }); + + // Test URL conversion with unexpected path format + it('should throw an error for unexpected path format', () => { + const webUrl = 'https://www.immobilienscout24.de/invalid/path/format'; + + expect(() => convertWebToMobile(webUrl)).to.throw('Unexpected path format: /invalid/path/format'); + }); +}); \ No newline at end of file