mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
Add immoscout mobile API provider to avoid failing bot checks (#125)
* Add provider that uses the immoscout mobile API to avoid failing bot checks.
This commit is contained in:
@@ -26,7 +26,7 @@ class FredyRuntime {
|
||||
//modify the url to make sure search order is correctly set
|
||||
Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam))
|
||||
//scraping the site and try finding new listings
|
||||
.then(this._getListings.bind(this))
|
||||
.then(this._providerConfig.getListings?.bind(this) ?? this._getListings.bind(this))
|
||||
//bring them in a proper form (dictated by the provider)
|
||||
.then(this._normalize.bind(this))
|
||||
//filter listings with stuff tagged by the blacklist of the provider
|
||||
|
||||
203
lib/provider/immoscout-mobile.js
Normal file
203
lib/provider/immoscout-mobile.js
Normal file
@@ -0,0 +1,203 @@
|
||||
/**
|
||||
* ImmoScout provider using the mobile API to retrieve listings.
|
||||
*
|
||||
* The mobile API provides the following endpoints:
|
||||
* - GET /search/total?{search parameters}: Returns the total number of listings for the given query
|
||||
* Example: `curl -H "User-Agent: ImmoScout24_1410_30_._" https://api.mobile.immobilienscout24.de/search/total?searchType=region&realestatetype=apartmentrent&pricetype=calculatedtotalrent&geocodes=%2Fde%2Fberlin%2Fberlin `
|
||||
*
|
||||
* - POST /search/list?{search parameters}: Actually retrieves the listings. Body is json encoded and contains
|
||||
* data specifying additional results (advertisements) to return. The format is as follows:
|
||||
* ```
|
||||
* {
|
||||
* "supportedResultListTypes": [],
|
||||
* "userData": {}
|
||||
* }
|
||||
* ```
|
||||
* It is not necessary to provide data for the specified keys.
|
||||
*
|
||||
* Example: `curl -X POST 'https://api.mobile.immobilienscout24.de/search/list?pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region&geocodes=%2Fde%2Fberlin%2Fberlin&pagenumber=1' -H "Connection: keep-alive" -H "User-Agent: ImmoScout24_1410_30_._" -H "Accept: application/json" -H "Content-Type: application/json" -d '{"supportedResultListType": [], "userData": {}}'`
|
||||
|
||||
* - GET /expose/{id} - Returns the details of a listing. The response contains additional details not included in the
|
||||
* listing response.
|
||||
*
|
||||
* Example: `curl -H "User-Agent: ImmoScout24_1410_30_._" "https://api.mobile.immobilienscout24.de/expose/158382494"`
|
||||
*
|
||||
*
|
||||
* It is necessary to set the correct User Agent (see `getListings`) in the request header.
|
||||
*
|
||||
* Note that the mobile API is not publicly documented. I've reverse-engineered
|
||||
* it by intercepting traffic from an android emulator running the immoscout app.
|
||||
* Moreover, the search parameters differ slightly from the web API. I've mapped them
|
||||
* to the web API parameters by comparing a search request with all parameters set between
|
||||
* the web and mobile API. The mobile API actually seems to be a superset of the web API,
|
||||
* but I have decided not to include new parameters as I wanted to keep the existing UX (i.e.,
|
||||
* users only have to provide a link to an existing search).
|
||||
*
|
||||
* Limitations:
|
||||
* - The current implementation of this provider *does not* support non-rental properties,
|
||||
* although the same approach can be used to implement support. It's just a matter of
|
||||
* mapping the web search URL to the corresponding mobile API URL.
|
||||
* - Pagination support is not implemented.
|
||||
*/
|
||||
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
import queryString from 'query-string';
|
||||
let appliedBlackList = [];
|
||||
|
||||
async function getListings(url) {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'User-Agent': 'ImmoScout24_1410_30_._',
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
supportedResultListTypes: [],
|
||||
userData: {}
|
||||
})
|
||||
});
|
||||
if (!response.ok) {
|
||||
console.error('Error fetching data from ImmoScout Mobile API:', response.statusText);
|
||||
return [];
|
||||
}
|
||||
|
||||
const responseBody = await response.json();
|
||||
return responseBody.resultListItems.filter((item) => item.type === 'EXPOSE_RESULT').map(expose => {
|
||||
const item = expose.item;
|
||||
const [price, size, ] = item.attributes;
|
||||
return {
|
||||
id: item.id,
|
||||
price: price?.value,
|
||||
size: size?.value,
|
||||
title: item.title,
|
||||
link: `${metaInformation.baseUrl}/expose/${item.id}`,
|
||||
address: item.address?.line,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function nullOrEmpty(val) {
|
||||
return val == null || val.length === 0;
|
||||
}
|
||||
function normalize(o) {
|
||||
const title = nullOrEmpty(o.title) ? 'NO TITLE FOUND' : o.title.replace('NEU', '');
|
||||
const address = nullOrEmpty(o.address) ? 'NO ADDRESS FOUND' : (o.address || '').replace(/\(.*\),.*$/, '').trim();
|
||||
const id = buildHash(o.id, o.price);
|
||||
return Object.assign(o, { id, title, address});
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
return !utils.isOneOf(o.title, appliedBlackList);
|
||||
}
|
||||
const config = {
|
||||
url: null,
|
||||
sortByDateParam: 'sorting=-firstactivation',
|
||||
// Not actually required - used by filter to remove and listings that failed to parse
|
||||
crawlFields: {
|
||||
'id': 'id',
|
||||
'title': 'title',
|
||||
'price': 'price',
|
||||
'size': 'size',
|
||||
'link': 'link',
|
||||
'address': 'address'
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
getListings: getListings
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = convertWebToMobile(sourceConfig.url);
|
||||
appliedBlackList = blacklist || [];
|
||||
};
|
||||
export const metaInformation = {
|
||||
name: 'Immoscout',
|
||||
baseUrl: 'https://www.immobilienscout24.de/',
|
||||
id: 'immoscout-mobile',
|
||||
};
|
||||
|
||||
export function convertWebToMobile(webUrl) {
|
||||
let url;
|
||||
try {
|
||||
url = new URL(webUrl);
|
||||
} catch (err) {
|
||||
throw new Error(`Invalid URL: ${webUrl}`);
|
||||
}
|
||||
const segments = url.pathname.split('/');
|
||||
if (segments.length < 6 || segments[1] !== 'Suche') {
|
||||
throw new Error(`Unexpected path format: ${url.pathname}`);
|
||||
}
|
||||
const geocodes = `/${segments[2]}/${segments[3]}/${segments[4]}`;
|
||||
|
||||
const paramNameMap = {
|
||||
heatingtypes: 'heatingtypes',
|
||||
haspromotion: 'haspromotion',
|
||||
numberofrooms: 'numberofrooms',
|
||||
livingspace: 'livingspace',
|
||||
energyefficiencyclasses: 'energyefficiencyclasses',
|
||||
exclusioncriteria: 'exclusioncriteria',
|
||||
equipment: 'equipment',
|
||||
petsallowedtypes: 'petsallowedtypes',
|
||||
price: 'price',
|
||||
constructionyear: 'constructionyear',
|
||||
apartmenttypes: 'apartmenttypes',
|
||||
pricetype: 'pricetype',
|
||||
floor: 'floor'
|
||||
};
|
||||
|
||||
const equipmentValueMap = {
|
||||
parking: 'parking',
|
||||
cellar: 'cellar',
|
||||
builtinkitchen: 'builtInKitchen',
|
||||
lift: 'lift',
|
||||
garden: 'garden',
|
||||
guesttoilet: 'guestToilet',
|
||||
balcony: 'balcony'
|
||||
};
|
||||
|
||||
const { query: webParams } = queryString.parseUrl(webUrl, { arrayFormat: 'comma' });
|
||||
delete webParams['enteredFrom'];
|
||||
|
||||
// Check for unsupported parameters
|
||||
Object.keys(webParams).forEach((key) => {
|
||||
if (!paramNameMap[key]) {
|
||||
throw new Error(`Unsupported Web-API parameter: "${key}"`);
|
||||
}
|
||||
});
|
||||
|
||||
// Build mobile params
|
||||
const mobileParams = {
|
||||
searchType: 'region',
|
||||
geocodes,
|
||||
realestatetype: 'apartmentrent'
|
||||
};
|
||||
|
||||
Object.entries(webParams).forEach(([webKey, webVal]) => {
|
||||
let value = webVal;
|
||||
|
||||
if (webKey === 'equipment') {
|
||||
// Map equipment list to camelCase values
|
||||
if (!Array.isArray(value)) {
|
||||
value = ('' + value).split(',');
|
||||
}
|
||||
value = value.map((token) => {
|
||||
const lower = token.toLowerCase();
|
||||
if (!equipmentValueMap[lower]) {
|
||||
throw new Error(`Unknown equipment type: "${token}"`);
|
||||
}
|
||||
return equipmentValueMap[lower];
|
||||
});
|
||||
}
|
||||
|
||||
mobileParams[paramNameMap[webKey]] = value;
|
||||
});
|
||||
|
||||
const mobileQuery = queryString.stringify(mobileParams, {
|
||||
arrayFormat: 'comma',
|
||||
encode: true,
|
||||
skipEmptyString: true
|
||||
});
|
||||
|
||||
return `https://api.mobile.immobilienscout24.de/search/list?${mobileQuery}`;
|
||||
}
|
||||
|
||||
export { config };
|
||||
72
test/provider/immoscout-mobile.test.js
Normal file
72
test/provider/immoscout-mobile.test.js
Normal file
@@ -0,0 +1,72 @@
|
||||
import {expect} from 'chai';
|
||||
import {convertWebToMobile} from '../../lib/provider/immoscout-mobile.js';
|
||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
||||
import {mockFredy, providerConfig} from '../utils.js';
|
||||
import {get} from '../mocks/mockNotification.js';
|
||||
import * as provider from '../../lib/provider/immoscout-mobile.js';
|
||||
|
||||
describe('#immoscout-mobile provider testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
|
||||
provider.init(providerConfig.immoscout, [], []);
|
||||
it('should test immoscout-mobile provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
return await new Promise((resolve) => {
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, '', similarityCache);
|
||||
fredy.execute().then((listings) => {
|
||||
expect(listings).to.be.a('array');
|
||||
const notificationObj = get();
|
||||
expect(notificationObj).to.be.a('object');
|
||||
expect(notificationObj.serviceName).to.equal('immoscout-mobile');
|
||||
notificationObj.payload.forEach((notify) => {
|
||||
/** check the actual structure **/
|
||||
expect(notify.id).to.be.a('string');
|
||||
expect(notify.price).to.be.a('string');
|
||||
expect(notify.size).to.be.a('string');
|
||||
expect(notify.title).to.be.a('string');
|
||||
expect(notify.link).to.be.a('string');
|
||||
expect(notify.address).to.be.a('string');
|
||||
/** check the values if possible **/
|
||||
expect(notify.size).to.be.not.empty;
|
||||
expect(notify.title).to.be.not.empty;
|
||||
expect(notify.link).that.does.include('https://www.immobilienscout24.de/');
|
||||
});
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('#immoscout-mobile URL conversion', () => {
|
||||
// Test URL conversion
|
||||
it('should convert a full web URL to mobile URL', () => {
|
||||
const webUrl = 'https://www.immobilienscout24.de/Suche/de/berlin/berlin/wohnung-mieten?heatingtypes=central,selfcontainedcentral&haspromotion=false&numberofrooms=2.0-5.0&livingspace=10.0-25.0&energyefficiencyclasses=a,b,c,d,e,f,g,h,a_plus&exclusioncriteria=projectlisting,swapflat&equipment=parking,cellar,builtinkitchen,lift,garden,guesttoilet,balcony&petsallowedtypes=no,yes,negotiable&price=10.0-100.0&constructionyear=1920-2026&apartmenttypes=halfbasement,penthouse,other,loft,groundfloor,terracedflat,raisedgroundfloor,roofstorey,apartment,maisonette&pricetype=calculatedtotalrent&floor=2-7&enteredFrom=result_list';
|
||||
const expectedMobileUrl = 'https://api.mobile.immobilienscout24.de/search/list?apartmenttypes=halfbasement,penthouse,other,loft,groundfloor,terracedflat,raisedgroundfloor,roofstorey,apartment,maisonette&constructionyear=1920-2026&energyefficiencyclasses=a,b,c,d,e,f,g,h,a_plus&equipment=parking,cellar,builtInKitchen,lift,garden,guestToilet,balcony&exclusioncriteria=projectlisting,swapflat&floor=2-7&geocodes=%2Fde%2Fberlin%2Fberlin&haspromotion=false&heatingtypes=central,selfcontainedcentral&livingspace=10.0-25.0&numberofrooms=2.0-5.0&petsallowedtypes=no,yes,negotiable&price=10.0-100.0&pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region';
|
||||
|
||||
const actualMobileUrl = convertWebToMobile(webUrl);
|
||||
expect(actualMobileUrl).to.equal(expectedMobileUrl);
|
||||
});
|
||||
|
||||
// Test URL conversion with unsupported query parameters
|
||||
it('should throw an error for unsupported query parameters', () => {
|
||||
const webUrl = 'https://www.immobilienscout24.de/Suche/de/berlin/berlin/wohnung-mieten?minimuminternetspeed=100000';
|
||||
|
||||
expect(() => convertWebToMobile(webUrl)).to.throw('Unsupported Web-API parameter: "minimuminternetspeed"');
|
||||
});
|
||||
|
||||
// Test URL conversion with invalid URL
|
||||
it('should throw an error for invalid URL', () => {
|
||||
const invalidUrl = 'invalid-url';
|
||||
|
||||
expect(() => convertWebToMobile(invalidUrl)).to.throw('Invalid URL: invalid-url');
|
||||
});
|
||||
|
||||
// Test URL conversion with unexpected path format
|
||||
it('should throw an error for unexpected path format', () => {
|
||||
const webUrl = 'https://www.immobilienscout24.de/invalid/path/format';
|
||||
|
||||
expect(() => convertWebToMobile(webUrl)).to.throw('Unexpected path format: /invalid/path/format');
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user