bringing back immonet by using scrapingant

This commit is contained in:
weakmap@gmail.com
2023-04-15 18:24:51 +02:00
parent 6fbee3e7c6
commit 4ba098e0b6
10 changed files with 47 additions and 30 deletions

View File

@@ -45,15 +45,15 @@ class FredyRuntime {
_getListings(url) {
return new Promise((resolve, reject) => {
const id = this._providerId;
if (scrapingAnt.isImmoscout(id) && !scrapingAnt.isScrapingAntApiKeySet()) {
const error = 'Immoscout can only be used with if you have set an apikey for scrapingAnt.';
if (scrapingAnt.needScrapingAnt(id) && !scrapingAnt.isScrapingAntApiKeySet()) {
const error = 'Immoscout or Immonet can only be used with if you have set an apikey for scrapingAnt.';
/* eslint-disable no-console */
console.log(error);
/* eslint-enable no-console */
reject(error);
return;
}
const u = scrapingAnt.isImmoscout(id) ? scrapingAnt.transformUrlForScrapingAnt(url, id) : url;
const u = scrapingAnt.needScrapingAnt(id) ? scrapingAnt.transformUrlForScrapingAnt(url, id) : url;
try {
if (this._providerConfig.paginate != null) {
xray(u, this._providerConfig.crawlContainer, [this._providerConfig.crawlFields])

View File

@@ -28,7 +28,7 @@ jobRouter.get('/processingTimes', async (req, res) => {
let scrapingAntData = null;
if (config.scrapingAnt.apiKey != null && config.scrapingAnt.apiKey.length > 0) {
try {
const response = await fetch(`https://api.scrapingant.com/v1/usage?x-api-key=${config.scrapingAnt.apiKey}`);
const response = await fetch(`https://api.scrapingant.com/v2/usage?x-api-key=${config.scrapingAnt.apiKey}`);
scrapingAntData = await response.json();
} catch (Exception) {
console.error('Could not query plan data from scraping ant.', Exception);

View File

@@ -1,14 +1,12 @@
import utils from '../utils.js';
let appliedBlackList = [];
function normalize(o) {
const id = parseInt(o.id.substring(o.id.indexOf('_') + 1, o.id.length));
const id = o.id.substring(o.id.lastIndexOf('/') + 1, o.id.length);
const size = o.size != null ? o.size.replace('Wohnfläche ', '') : 'N/A m²';
const price = o.price.replace('Kaufpreis ', '');
const address = o.address.split(' • ')[o.address.split(' • ').length - 1];
const title = o.title || 'No title available';
//normally we would just read the link from the source, but immonet decided to trick user by adding a click listener instead of
//a href to do some weird reporting. (Very user friendly for handicaped ppl... not)
const link = `https://www.immonet.de/angebot/${id}`;
const link = o.id;
return Object.assign(o, { id, address, price, size, title, link });
}
function applyBlacklist(o) {
@@ -18,14 +16,14 @@ function applyBlacklist(o) {
}
const config = {
url: null,
crawlContainer: '#result-list-stage .item',
crawlContainer: '.content-wrapper-tiles .ng-star-inserted',
sortByDateParam: 'sortby=19',
crawlFields: {
id: '@id',
price: 'div[id*="selPrice_"] | trim',
size: 'div[id*="selArea_"] | trim',
title: '.item a img@title',
address: '.item .box-25 .ellipsis .text-100 | removeNewline | trim',
id: '.card a@href',
title: '.card h3 |trim',
price: '.card .has-font-300 .is-bold | trim',
size: '.card .has-font-300 .ml-100 | trim',
address: '.card span:nth-child(2) | trim',
},
paginate: '#idResultList .margin-bottom-6.margin-bottom-sm-12 .panel a.pull-right@href',
normalize: normalize,

View File

@@ -46,9 +46,10 @@ function makeDriver(headers = {}) {
/* eslint-enable no-console */
}
}
/**
* The regular request driver is taking care of everyting, that doesn't need to be scraped by ScrapingAnt (which is
* everything != Immoscout as of writing this)
* everything != Immoscout & Immonet as of writing this)
*/
return async function driver(context, callback) {
if (context.url.toLowerCase().indexOf('scrapingant') !== -1) {

View File

@@ -1,12 +1,22 @@
import { metaInformation } from '../provider/immoscout.js';
import { metaInformation as immoScoutInfo } from '../provider/immoscout.js';
import { metaInformation as immoNetInfo } from '../provider/immonet.js';
import { config } from '../utils.js';
const isImmoscout = (id) => {
return id.toLowerCase() === metaInformation.id;
const additionalImmonetUrlParams = `&wait_for_selector=.content-wrapper-tiles&js_snippet=${new Buffer(
'window.scrollTo(0,document.body.scrollHeight);'
).toString('base64')}`;
const needScrapingAnt = (id) => {
return id.toLowerCase() === immoScoutInfo.id || id.toLowerCase() === immoNetInfo.id;
};
export const transformUrlForScrapingAnt = (url, id) => {
if (isImmoscout(id)) {
//only do calls to scrapingAnt when dealing with Immoscout
url = `https://api.scrapingant.com/v1/general?url=${encodeURIComponent(url)}&proxy_type=datacenter`;
let urlParams = '';
if (needScrapingAnt(id)) {
if (id.toLowerCase() === immoNetInfo.id) {
urlParams = additionalImmonetUrlParams;
}
//only do calls to scrapingAnt when dealing with Immoscout/Immonet
url = `https://api.scrapingant.com/v2/general?url=${encodeURIComponent(url)}&proxy_type=datacenter${urlParams}`;
}
return url;
};
@@ -16,4 +26,4 @@ export const isScrapingAntApiKeySet = () => {
export const makeUrlResidential = (url) => {
return url.replace('datacenter', 'residential');
};
export { isImmoscout };
export { needScrapingAnt };