Compare commits

..

4 Commits

Author SHA1 Message Date
weakmap@gmail.com
5cceae11cc upgrading dependencies | adding sqlite for later analysis 2024-11-01 17:03:43 +01:00
weakmap@gmail.com
a4c5bfcbf7 fixing tests 2024-10-03 16:09:19 +02:00
weakmap@gmail.com
6d2ab5f958 making sure immowelt does not include suggested ranges 2024-10-03 16:03:47 +02:00
weakmap@gmail.com
d3cb3a5881 regex for einsAImmobilien price normalization | filter listings that does not have all required keys 2024-09-29 16:58:01 +02:00
10 changed files with 941 additions and 1314 deletions

View File

@@ -87,7 +87,10 @@ class FredyRuntime {
return listings.map(this._providerConfig.normalize);
}
_filter(listings) {
return listings.filter(this._providerConfig.filter);
//only return those where all the fields have been found
const keys = Object.keys(this._providerConfig.crawlFields);
const filteredListings = listings.filter((item) => keys.every((key) => key in item));
return filteredListings.filter(this._providerConfig.filter);
}
_findNew(listings) {
const newListings = listings.filter((o) => getKnownListings(this._jobKey, this._providerId)[o.id] == null);

View File

@@ -8,7 +8,7 @@ export const send = ({ serviceName, newListings, notificationConfig, jobKey }) =
const jobName = job == null ? jobKey : job.name;
const promises = newListings.map((newListing) => {
const title = `${jobName} at ${serviceName}: ${newListing.title}`;
const message = `Address: ${newListing.address}\nSize: ${newListing.size}\nPrice: ${newListing.price}\Link: ${newListing.link}`;
const message = `Address: ${newListing.address}\nSize: ${newListing.size}\nPrice: ${newListing.price}\nink: ${newListing.link}`;
return fetch(server, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },

View File

@@ -0,0 +1,25 @@
import { markdown2Html } from '../../services/markdown.js';
import Database from 'better-sqlite3';
export const send = ({ serviceName, newListings, jobKey }) => {
const db = new Database('db/listings.db');
const fields = ['serviceName', 'jobKey', 'id', 'size', 'rooms', 'price', 'address', 'title', 'link', 'description'];
db.prepare(`CREATE TABLE IF NOT EXISTS listing (${fields.join(' TEXT, ')} TEXT);`).run();
const insert = db.prepare(`INSERT INTO listing (${fields.join(', ')}) VALUES (@${fields.join(', @')})`);
newListings.map((listing) => {
let insertListing = {};
fields.map((field) => {
insertListing[field] = listing[field];
});
insertListing.serviceName = serviceName;
insertListing.jobKey = jobKey;
insert.run(insertListing);
});
return Promise.resolve();
};
export const config = {
id: 'sqlite',
name: 'Sqlite',
description: 'This adapter stores listings in a local sqlite3 database.',
config: {},
readme: markdown2Html('lib/notification/adapter/sqlite.md'),
};

View File

@@ -0,0 +1,7 @@
### Sqlite Adapter
This adapter stores search results in a sqlite database located in db/listings.db. This file can be used for further analysis later on.
Fields are:
```
['serviceName', 'jobKey', 'id', 'size', 'rooms', 'price', 'address', 'title', 'link', 'description']
```

View File

@@ -1,4 +1,4 @@
import utils, {buildHash} from '../utils.js';
import utils, { buildHash } from '../utils.js';
let appliedBlackList = [];
function normalize(o) {
@@ -7,10 +7,28 @@ function normalize(o) {
size += ` / / ${o.rooms.trim()}`;
}
const link = `https://www.1a-immobilienmarkt.de/expose/${o.id}.html`;
const id = buildHash(o.id, o.price);
return Object.assign(o, { id, size, link });
const price = normalizePrice(o.price);
const id = buildHash(o.id, price);
return Object.assign(o, { id, price, size, link });
}
/**
* einsAImmobilien sometimes use a weird pricing label such as `775.700,00 EUR Kaufpreis ab 2.475 € mtl`.
* Make sure to extract only the actual price out of the string.
* @param price
* @returns {*}
*/
function normalizePrice(price) {
if (price == null) {
return null;
}
const regex = /(\d{1,3}(?:\.\d{3})*,\d{2})\s?(EUR|€)/g;
const result = price.match(regex);
if (result == null || result.length === 0) {
return price;
}
return result[0];
}
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
@@ -27,7 +45,6 @@ const config = {
size: '.tabelle .inner_object_data .data_boxes div:nth-child(1)',
rooms: '.tabelle .inner_object_data .data_boxes div:nth-child(2)',
title: '.tabelle .inner_object_data .tabelle_inhalt_titel_black | removeNewline | trim',
description: '.tabelle .inner_object_data .objekt_beschreibung | removeNewline | trim',
},
normalize: normalize,
filter: applyBlacklist,

View File

@@ -1,42 +1,43 @@
import utils, {buildHash} from '../utils.js';
import utils, { buildHash } from '../utils.js';
let appliedBlackList = [];
function normalize(o) {
const id = buildHash(o.id, o.price);
return Object.assign(o, {id});
const id = buildHash(o.id, o.price);
return Object.assign(o, { id });
}
function applyBlacklist(o) {
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted;
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted;
}
const config = {
url: null,
crawlContainer: 'div[data-testid="serp-card-testid"]',
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
crawlFields: {
id: 'a@id',
price: 'div[data-testid="cardmfe-price-testid"] | removeNewline | trim',
size: 'div[data-testid="cardmfe-keyfacts-testid"] | removeNewline | trim',
title: '.css-1cbj9xw',
link: 'a@href',
address: 'div[data-testid="cardmfe-description-box-address"] | removeNewline | trim',
},
paginate: '#pnlPaging #nlbPlus@href',
normalize: normalize,
filter: applyBlacklist,
url: null,
crawlContainer:
'div[data-testid="serp-card-testid"]:not(div[data-testid="serp-enlargementlist-testid"] div[data-testid="serp-card-testid"])',
sortByDateParam: 'order=DateDesc',
crawlFields: {
id: 'a@id',
price: 'div[data-testid="cardmfe-price-testid"] | removeNewline | trim',
size: 'div[data-testid="cardmfe-keyfacts-testid"] | removeNewline | trim',
title: '.css-1cbj9xw',
link: 'a@href',
address: 'div[data-testid="cardmfe-description-box-address"] | removeNewline | trim',
},
paginate: '#pnlPaging #nlbPlus@href',
normalize: normalize,
filter: applyBlacklist,
};
export const init = (sourceConfig, blacklist) => {
config.enabled = sourceConfig.enabled;
config.url = sourceConfig.url;
appliedBlackList = blacklist || [];
config.enabled = sourceConfig.enabled;
config.url = sourceConfig.url;
appliedBlackList = blacklist || [];
};
export const metaInformation = {
name: 'Immowelt',
baseUrl: 'https://www.immowelt.de/',
id: 'immowelt',
name: 'Immowelt',
baseUrl: 'https://www.immowelt.de/',
id: 'immowelt',
};
export {config};
export { config };

View File

@@ -1,6 +1,6 @@
{
"name": "fredy",
"version": "10.0.0",
"version": "10.2.0",
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
"scripts": {
"start": "node index.js",
@@ -55,13 +55,13 @@
"Firefox ESR"
],
"dependencies": {
"@douyinfe/semi-ui": "2.65.0",
"@douyinfe/semi-ui": "2.68.3",
"@rematch/core": "2.2.0",
"@rematch/loading": "2.1.2",
"@sendgrid/mail": "8.1.3",
"@vitejs/plugin-react": "4.3.1",
"better-sqlite3": "8.6.0",
"body-parser": "1.20.2",
"@sendgrid/mail": "8.1.4",
"@vitejs/plugin-react": "4.3.3",
"better-sqlite3": "^11.5.0",
"body-parser": "1.20.3",
"cookie-session": "2.1.0",
"handlebars": "4.7.8",
"highcharts": "11.4.8",
@@ -69,10 +69,10 @@
"lodash": "4.17.21",
"lowdb": "6.0.1",
"markdown": "^0.5.0",
"nanoid": "5.0.7",
"nanoid": "5.0.8",
"node-fetch": "3.3.2",
"node-mailjet": "6.0.6",
"query-string": "8.2.0",
"query-string": "9.1.1",
"react": "18.3.1",
"react-dom": "18.3.1",
"react-redux": "9.1.2",
@@ -81,27 +81,27 @@
"redux": "5.0.1",
"redux-thunk": "3.1.0",
"restana": "4.9.9",
"serve-static": "1.15.0",
"serve-static": "1.16.2",
"slack": "11.0.2",
"string-similarity": "^4.0.4",
"vite": "5.4.3",
"vite": "5.4.10",
"x-ray": "2.3.4"
},
"devDependencies": {
"@babel/core": "7.25.2",
"@babel/eslint-parser": "7.25.1",
"@babel/preset-env": "7.25.4",
"@babel/preset-react": "7.24.7",
"chai": "5.1.1",
"@babel/core": "7.26.0",
"@babel/eslint-parser": "7.25.9",
"@babel/preset-env": "7.26.0",
"@babel/preset-react": "7.25.9",
"chai": "5.1.2",
"eslint": "8.56.0",
"eslint-config-prettier": "8.8.0",
"eslint-plugin-react": "7.35.2",
"esmock": "2.6.7",
"eslint-plugin-react": "7.37.2",
"esmock": "2.6.9",
"history": "5.3.0",
"husky": "4.3.8",
"less": "4.2.0",
"lint-staged": "13.2.2",
"mocha": "10.7.3",
"mocha": "10.8.2",
"prettier": "3.3.3",
"redux-logger": "3.0.6"
}

View File

@@ -13,7 +13,7 @@
"enabled": true
},
"immowelt": {
"url": "https://www.immowelt.de/liste/duesseldorf/wohnungen/kaufen?d=true&rmi=3&sd=DESC&sf=TIMESTAMP&sp=1",
"url": "https://www.immowelt.de/classified-search?distributionTypes=Buy,Buy_Auction,Compulsory_Auction&estateTypes=House,Apartment&locations=AD08DE2350",
"enabled": true
},
"immoscout": {

View File

@@ -1,7 +1,7 @@
[
{
"url": "https://www.immowelt.de/liste/40589/wohnungen/mieten?d=true&sd=DESC&sf=PRIMARY_PRICE_AMOUNT&sp=1",
"shouldBecome": "https://www.immowelt.de/liste/40589/wohnungen/mieten?d=true&sd=DESC&sf=TIMESTAMP&sp=1",
"url": "https://www.immowelt.de/classified-search?distributionTypes=Buy,Buy_Auction,Compulsory_Auction&estateTypes=House,Apartment&locations=AD08DE2350",
"shouldBecome": "https://www.immowelt.de/classified-search?distributionTypes=Buy,Buy_Auction,Compulsory_Auction&estateTypes=House,Apartment&locations=AD08DE2350&order=DateDesc",
"id": "immowelt"
},
{

2094
yarn.lock

File diff suppressed because it is too large Load Diff