Compare commits

..

2 Commits

Author SHA1 Message Date
Christian Kellner
111ef8be43 fixing kleinanzeigen test 2024-09-05 13:36:02 +02:00
Christian Kellner
35feb772d7 upgrading dependencies, fixing immowelt, using hash of price and id as unique identifier for listings 2024-09-05 13:34:14 +02:00
16 changed files with 681 additions and 447 deletions

View File

@@ -1,18 +1,22 @@
import utils from '../utils.js';
import utils, {buildHash} from '../utils.js';
let appliedBlackList = [];
function normalize(o) {
let size = `${o.size.replace(' Wohnfläche ', '').trim()}`;
if (o.rooms != null) {
size += ` / / ${o.rooms.trim()}`;
}
const link = `https://www.1a-immobilienmarkt.de/expose/${o.id}.html`;
return Object.assign(o, { size, link });
const id = buildHash(o.id, o.price);
return Object.assign(o, { id, size, link });
}
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted;
}
const config = {
url: null,
crawlContainer: '.tabelle',

View File

@@ -1,4 +1,4 @@
import utils from '../utils.js';
import utils, {buildHash} from '../utils.js';
let appliedBlackList = [];
function shortenLink(link) {
return link.substring(0, link.indexOf('?'));
@@ -7,12 +7,12 @@ function parseId(shortenedLink) {
return shortenedLink.substring(shortenedLink.lastIndexOf('/') + 1);
}
function normalize(o) {
const id = parseId(shortenLink(o.link));
const size = o.size || 'N/A m²';
const price = o.price || 'N/A €';
const title = o.title || 'No title available';
const address = o.address || 'No address available';
const link = shortenLink(o.link);
const id = buildHash(parseId(shortenLink(o.link)), o.price);
return Object.assign(o, { id, price, size, title, address, link });
}
function applyBlacklist(o) {

View File

@@ -1,12 +1,12 @@
import utils from '../utils.js';
import utils, {buildHash} from '../utils.js';
let appliedBlackList = [];
function normalize(o) {
const id = o.id.substring(o.id.lastIndexOf('/') + 1, o.id.length);
const size = o.size != null ? o.size.replace('Wohnfläche ', '') : 'N/A m²';
const price = o.price.replace('Kaufpreis ', '');
const address = o.address.split(' • ')[o.address.split(' • ').length - 1];
const title = o.title || 'No title available';
const link = o.id;
const id = buildHash(o.id.substring(o.id.lastIndexOf('/') + 1, o.id.length), price);
return Object.assign(o, { id, address, price, size, title, link });
}
function applyBlacklist(o) {

View File

@@ -1,4 +1,4 @@
import utils from '../utils.js';
import utils, {buildHash} from '../utils.js';
let appliedBlackList = [];
function nullOrEmpty(val) {
return val == null || val.length === 0;
@@ -7,7 +7,8 @@ function normalize(o) {
const title = nullOrEmpty(o.title) ? 'NO TITLE FOUND' : o.title.replace('NEU', '');
const address = nullOrEmpty(o.address) ? 'NO ADDRESS FOUND' : (o.address || '').replace(/\(.*\),.*$/, '').trim();
const link = nullOrEmpty(o.link) ? 'NO LINK' : `https://www.immobilienscout24.de${o.link.substring(o.link.indexOf('/expose'))}`;
return Object.assign(o, { title, address, link });
const id = buildHash(o.id, o.price);
return Object.assign(o, { id, title, address, link });
}
function applyBlacklist(o) {
return !utils.isOneOf(o.title, appliedBlackList);

View File

@@ -1,14 +1,15 @@
import utils from '../utils.js';
import utils, {buildHash} from '../utils.js';
let appliedBlackList = [];
function normalize(o) {
const id = o.id.substring(o.id.indexOf('-') + 1, o.id.length);
const size = o.size || 'N/A m²';
const price = (o.price || '--- €').replace('Preis auf Anfrage', '--- €');
const title = o.title || 'No title available';
const link = `https://immo.swp.de/immobilien/${id}`;
const immoId = o.id.substring(o.id.indexOf('-') + 1, o.id.length);
const link = `https://immo.swp.de/immobilien/${immoId}`;
const description = o.description;
const id = buildHash(immoId, price);
return Object.assign(o, {id, price, size, title, link, description});
}

View File

@@ -1,37 +1,42 @@
import utils from '../utils.js';
import utils, {buildHash} from '../utils.js';
let appliedBlackList = [];
function normalize(o) {
return o;
const id = buildHash(o.id, o.price);
return Object.assign(o, {id});
}
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted;
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted;
}
const config = {
url: null,
crawlContainer: "div[class^='EstateItem-']",
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
crawlFields: {
id: 'a@id',
price: "div[class^='KeyFacts-'] [data-test='price'] | removeNewline | trim",
size: "div[class^='KeyFacts-'] [data-test='area'] | removeNewline | trim",
title: "div[class^='FactsMain-'] h2",
link: 'a@href',
address: "div[class^='estateFacts-'] span | removeNewline | trim",
},
paginate: '#pnlPaging #nlbPlus@href',
normalize: normalize,
filter: applyBlacklist,
url: null,
crawlContainer: 'div[data-testid="serp-card-testid"]',
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
crawlFields: {
id: 'a@id',
price: 'div[data-testid="cardmfe-price-testid"] | removeNewline | trim',
size: 'div[data-testid="cardmfe-keyfacts-testid"] | removeNewline | trim',
title: '.css-1cbj9xw',
link: 'a@href',
address: 'div[data-testid="cardmfe-description-box-address"] | removeNewline | trim',
},
paginate: '#pnlPaging #nlbPlus@href',
normalize: normalize,
filter: applyBlacklist,
};
export const init = (sourceConfig, blacklist) => {
config.enabled = sourceConfig.enabled;
config.url = sourceConfig.url;
appliedBlackList = blacklist || [];
config.enabled = sourceConfig.enabled;
config.url = sourceConfig.url;
appliedBlackList = blacklist || [];
};
export const metaInformation = {
name: 'Immowelt',
baseUrl: 'https://www.immowelt.de/',
id: 'immowelt',
name: 'Immowelt',
baseUrl: 'https://www.immowelt.de/',
id: 'immowelt',
};
export { config };
export {config};

View File

@@ -1,44 +1,49 @@
import utils from '../utils.js';
import utils, {buildHash} from '../utils.js';
let appliedBlackList = [];
let appliedBlacklistedDistricts = [];
function normalize(o) {
const size = o.size || '--- m²';
return Object.assign(o, { size });
const size = o.size || '--- m²';
const id = buildHash(o.id, o.price);
return Object.assign(o, {id, size});
}
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
const isBlacklistedDistrict =
appliedBlacklistedDistricts.length === 0 ? false : utils.isOneOf(o.description, appliedBlacklistedDistricts);
return !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
const isBlacklistedDistrict =
appliedBlacklistedDistricts.length === 0 ? false : utils.isOneOf(o.description, appliedBlacklistedDistricts);
return !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
}
const config = {
url: null,
crawlContainer: '#srchrslt-adtable .ad-listitem ',
//sort by date is standard oO
sortByDateParam: null,
crawlFields: {
id: '.aditem@data-adid | int',
price: '.aditem-main--middle--price-shipping--price | removeNewline | trim',
size: '.aditem-main .text-module-end span:nth-child(2) | removeNewline | trim',
title: '.aditem-main .text-module-begin a | removeNewline | trim',
link: '.aditem-main .text-module-begin a@href | removeNewline | trim',
description: '.aditem-main p:not(.text-module-end) | removeNewline | trim',
address: '.aditem-main--top--left | trim | removeNewline',
},
paginate: '#srchrslt-pagination .pagination-next@href',
normalize: normalize,
filter: applyBlacklist,
url: null,
crawlContainer: '#srchrslt-adtable .ad-listitem ',
//sort by date is standard oO
sortByDateParam: null,
crawlFields: {
id: '.aditem@data-adid | int',
price: '.aditem-main--middle--price-shipping--price | removeNewline | trim',
size: '.aditem-main .text-module-end span:nth-child(2) | removeNewline | trim',
title: '.aditem-main .text-module-begin a | removeNewline | trim',
link: '.aditem-main .text-module-begin a@href | removeNewline | trim',
description: '.aditem-main p:not(.text-module-end) | removeNewline | trim',
address: '.aditem-main--top--left | trim | removeNewline',
},
paginate: '#srchrslt-pagination .pagination-next@href',
normalize: normalize,
filter: applyBlacklist,
};
export const metaInformation = {
name: 'Ebay Kleinanzeigen',
baseUrl: 'https://www.kleinanzeigen.de/',
id: 'kleinanzeigen',
name: 'Ebay Kleinanzeigen',
baseUrl: 'https://www.kleinanzeigen.de/',
id: 'kleinanzeigen',
};
export const init = (sourceConfig, blacklist, blacklistedDistricts) => {
config.enabled = sourceConfig.enabled;
config.url = sourceConfig.url;
appliedBlacklistedDistricts = blacklistedDistricts || [];
appliedBlackList = blacklist || [];
config.enabled = sourceConfig.enabled;
config.url = sourceConfig.url;
appliedBlacklistedDistricts = blacklistedDistricts || [];
appliedBlackList = blacklist || [];
};
export { config };
export {config};

View File

@@ -1,38 +1,44 @@
import utils from '../utils.js';
import utils, {buildHash} from '../utils.js';
let appliedBlackList = [];
function nullOrEmpty(val) {
return val == null || val.length === 0;
}
function normalize(o) {
const link = nullOrEmpty(o.link) ? 'NO LINK' : `https://www.neubaukompass.de${o.link.substring(o.link.indexOf('/neubau'))}`;
return {...o, link};
const id = buildHash(o.id, o.price);
return Object.assign(o, {id, link});
}
function applyBlacklist(o) {
return !utils.isOneOf(o.title, appliedBlackList);
return !utils.isOneOf(o.title, appliedBlackList);
}
const config = {
url: null,
crawlContainer: '.nbk-container >div article',
sortByDateParam: 'Sortierung=Id&Richtung=DESC',
crawlFields: {
id: '@id',
title: 'a.nbk-truncate@title | removeNewline | trim',
link: 'a.nbk-truncate@href',
address: 'p.nbk-truncate | removeNewline | trim',
price: 'p.nbk-mb-0 | removeNewline | trim',
},
paginate: '.numbered-pager__bottom .numbered-pager--info li:nth-child(2) a@href',
normalize: normalize,
filter: applyBlacklist,
url: null,
crawlContainer: '.nbk-container >div article',
sortByDateParam: 'Sortierung=Id&Richtung=DESC',
crawlFields: {
id: '@id',
title: 'a.nbk-truncate@title | removeNewline | trim',
link: 'a.nbk-truncate@href',
address: 'p.nbk-truncate | removeNewline | trim',
price: 'p.nbk-mb-0 | removeNewline | trim',
},
paginate: '.numbered-pager__bottom .numbered-pager--info li:nth-child(2) a@href',
normalize: normalize,
filter: applyBlacklist,
};
export const init = (sourceConfig, blacklist) => {
config.enabled = sourceConfig.enabled;
config.url = sourceConfig.url;
appliedBlackList = blacklist || [];
config.enabled = sourceConfig.enabled;
config.url = sourceConfig.url;
appliedBlackList = blacklist || [];
};
export const metaInformation = {
name: 'Neubau Kompass',
baseUrl: 'https://www.neubaukompass.de/',
id: 'neubauKompass',
name: 'Neubau Kompass',
baseUrl: 'https://www.neubaukompass.de/',
id: 'neubauKompass',
};
export { config };
export {config};

View File

@@ -1,36 +1,41 @@
import utils from '../utils.js';
import utils, {buildHash} from '../utils.js';
let appliedBlackList = [];
function normalize(o) {
return o;
const id = buildHash(o.id, o.price);
return Object.assign(o, {id});
}
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
return o.id != null && titleNotBlacklisted && descNotBlacklisted;
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
return o.id != null && titleNotBlacklisted && descNotBlacklisted;
}
const config = {
url: null,
crawlContainer: '#main_column .wgg_card',
sortByDateParam: 'sort_column=0&sort_order=0',
crawlFields: {
id: '@data-id',
details: '.row .noprint .col-xs-11 |removeNewline |trim',
price: '.middle .col-xs-3 |removeNewline |trim',
size: '.middle .text-right |removeNewline |trim',
title: '.truncate_title a |removeNewline |trim',
link: '.truncate_title a@href',
},
normalize: normalize,
filter: applyBlacklist,
url: null,
crawlContainer: '#main_column .wgg_card',
sortByDateParam: 'sort_column=0&sort_order=0',
crawlFields: {
id: '@data-id',
details: '.row .noprint .col-xs-11 |removeNewline |trim',
price: '.middle .col-xs-3 |removeNewline |trim',
size: '.middle .text-right |removeNewline |trim',
title: '.truncate_title a |removeNewline |trim',
link: '.truncate_title a@href',
},
normalize: normalize,
filter: applyBlacklist,
};
export const init = (sourceConfig, blacklist) => {
config.enabled = sourceConfig.enabled;
config.url = sourceConfig.url;
appliedBlackList = blacklist || [];
config.enabled = sourceConfig.enabled;
config.url = sourceConfig.url;
appliedBlackList = blacklist || [];
};
export const metaInformation = {
name: 'Wg gesucht',
baseUrl: 'https://www.wg-gesucht.de/',
id: 'wgGesucht',
name: 'Wg gesucht',
baseUrl: 'https://www.wg-gesucht.de/',
id: 'wgGesucht',
};
export { config };
export {config};

View File

@@ -1,51 +1,69 @@
import { dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { readFile } from 'fs/promises';
import {dirname} from 'node:path';
import {fileURLToPath} from 'node:url';
import {readFile} from 'fs/promises';
import {createHash} from 'crypto';
function isOneOf(word, arr) {
if (arr == null || arr.length === 0) {
return false;
}
const expression = String.raw`\b(${arr.join('|')})\b`;
const blacklist = new RegExp(expression, 'ig');
return blacklist.test(word);
if (arr == null || arr.length === 0) {
return false;
}
const expression = String.raw`\b(${arr.join('|')})\b`;
const blacklist = new RegExp(expression, 'ig');
return blacklist.test(word);
}
function nullOrEmpty(val) {
return val == null || val.length === 0;
return val == null || val.length === 0;
}
function timeStringToMs(timeString, now) {
const d = new Date(now);
const parts = timeString.split(':');
d.setHours(parts[0]);
d.setMinutes(parts[1]);
d.setSeconds(0);
return d.getTime();
const d = new Date(now);
const parts = timeString.split(':');
d.setHours(parts[0]);
d.setMinutes(parts[1]);
d.setSeconds(0);
return d.getTime();
}
function duringWorkingHoursOrNotSet(config, now) {
const { workingHours } = config;
if (workingHours == null || nullOrEmpty(workingHours.from) || nullOrEmpty(workingHours.to)) {
return true;
}
const toDate = timeStringToMs(workingHours.to, now);
const fromDate = timeStringToMs(workingHours.from, now);
return fromDate <= now && toDate >= now;
const {workingHours} = config;
if (workingHours == null || nullOrEmpty(workingHours.from) || nullOrEmpty(workingHours.to)) {
return true;
}
const toDate = timeStringToMs(workingHours.to, now);
const fromDate = timeStringToMs(workingHours.from, now);
return fromDate <= now && toDate >= now;
}
function getDirName() {
return dirname(fileURLToPath(import.meta.url));
return dirname(fileURLToPath(import.meta.url));
}
function buildHash(...inputs) {
if (inputs == null) {
return null;
}
const cleaned = inputs.filter(i => i != null && i.length > 0);
if (cleaned.length === 0) {
return null;
}
return createHash('sha256')
.update(cleaned.join(','))
.digest('hex');
}
const config = JSON.parse(await readFile(new URL('../conf/config.json', import.meta.url)));
export { isOneOf };
export { nullOrEmpty };
export { duringWorkingHoursOrNotSet };
export { getDirName };
export { config };
export {isOneOf};
export {nullOrEmpty};
export {duringWorkingHoursOrNotSet};
export {getDirName};
export {config};
export {buildHash};
export default {
isOneOf,
nullOrEmpty,
duringWorkingHoursOrNotSet,
getDirName,
config,
isOneOf,
nullOrEmpty,
duringWorkingHoursOrNotSet,
getDirName,
config,
};

View File

@@ -1,6 +1,6 @@
{
"name": "fredy",
"version": "9.0.0",
"version": "10.0.0",
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
"scripts": {
"start": "node index.js",
@@ -55,7 +55,7 @@
"Firefox ESR"
],
"dependencies": {
"@douyinfe/semi-ui": "2.62.1",
"@douyinfe/semi-ui": "2.65.0",
"@rematch/core": "2.2.0",
"@rematch/loading": "2.1.2",
"@sendgrid/mail": "8.1.3",
@@ -64,14 +64,14 @@
"body-parser": "1.20.2",
"cookie-session": "2.1.0",
"handlebars": "4.7.8",
"highcharts": "11.4.6",
"highcharts": "11.4.8",
"highcharts-react-official": "3.2.1",
"lodash": "4.17.21",
"lowdb": "6.0.1",
"markdown": "^0.5.0",
"nanoid": "5.0.7",
"node-fetch": "3.3.2",
"node-mailjet": "6.0.5",
"node-mailjet": "6.0.6",
"query-string": "8.2.0",
"react": "18.3.1",
"react-dom": "18.3.1",
@@ -84,24 +84,24 @@
"serve-static": "1.15.0",
"slack": "11.0.2",
"string-similarity": "^4.0.4",
"vite": "5.3.4",
"vite": "5.4.3",
"x-ray": "2.3.4"
},
"devDependencies": {
"@babel/core": "7.24.9",
"@babel/eslint-parser": "7.24.8",
"@babel/preset-env": "7.24.8",
"@babel/core": "7.25.2",
"@babel/eslint-parser": "7.25.1",
"@babel/preset-env": "7.25.4",
"@babel/preset-react": "7.24.7",
"chai": "5.1.1",
"eslint": "8.56.0",
"eslint-config-prettier": "8.8.0",
"eslint-plugin-react": "7.35.0",
"eslint-plugin-react": "7.35.2",
"esmock": "2.6.7",
"history": "5.3.0",
"husky": "4.3.8",
"less": "4.2.0",
"lint-staged": "13.2.2",
"mocha": "10.7.0",
"mocha": "10.7.3",
"prettier": "3.3.3",
"redux-logger": "3.0.6"
}

View File

@@ -20,7 +20,7 @@ describe('#einsAImmobilien testsuite()', () => {
expect(notificationObj.serviceName).to.equal('einsAImmobilien');
notificationObj.payload.forEach((notify) => {
/** check the actual structure **/
expect(notify.id).to.be.a('number');
expect(notify.id).to.be.a('string');
expect(notify.price).to.be.a('string');
expect(notify.size).to.be.a('string');
expect(notify.title).to.be.a('string');

View File

@@ -20,7 +20,7 @@ describe('#kleinanzeigen testsuite()', () => {
expect(notificationObj.serviceName).to.equal('kleinanzeigen');
notificationObj.payload.forEach((notify) => {
/** check the actual structure **/
expect(notify.id).to.be.a('number');
expect(notify.id).to.be.a('string');
expect(notify.title).to.be.a('string');
expect(notify.link).to.be.a('string');
expect(notify.address).to.be.a('string');

View File

@@ -10,7 +10,7 @@ describe('#neubauKompass testsuite()', () => {
similarityCache.stopCacheCleanup();
});
provider.init(providerConfig.neubauKompass, [], []);
it.only('should test neubauKompass provider', async () => {
it('should test neubauKompass provider', async () => {
const Fredy = await mockFredy();
return await new Promise((resolve) => {
if (!scrapingAnt.isScrapingAntApiKeySet()) {

16
test/utils/utils.test.js Normal file
View File

@@ -0,0 +1,16 @@
import { expect } from 'chai';
import {buildHash} from '../../lib/utils.js';
describe('utilsCheck', () => {
describe('#utilsCheck()', () => {
it('should be null when null input', () => {
expect(buildHash(null)).to.be.null;
});
it('should be null when null empty', () => {
expect(buildHash('')).to.be.null;
});
it('should return a value', () => {
expect(buildHash('bla', '', null)).to.be.a.string;
});
});
});

739
yarn.lock

File diff suppressed because it is too large Load Diff