Compare commits

..

8 Commits
5.3.1 ... 5.4.2

Author SHA1 Message Date
orangecoding
5a13e6a0a0 next build version 2021-11-28 21:09:40 +01:00
orangecoding
7bd36e554c fixing prod build 2021-11-28 21:09:10 +01:00
orangecoding
d7e3dfc05e fixing version tag.. wups.. 2021-11-26 21:04:16 +01:00
orangecoding
c1c4d55ede adding or replacing sort params for provider urls when necessary 2021-11-26 21:02:09 +01:00
orangecoding
aad0884976 ignoring idea files 2021-11-25 19:16:06 +01:00
Christian Kellner
c0ae72424b Upgrade UI dependencies (#37)
upgrading ui dependencies
2021-11-25 19:13:25 +01:00
Christian Kellner
a3aa512db3 next release 2021-11-12 09:15:34 +01:00
Christian Kellner
8361d9c8ff splitting telegram messages into chunks to avoid errors when message exceeds limit of 4096 chars 2021-11-12 09:14:55 +01:00
16 changed files with 1258 additions and 1382 deletions

1
.gitignore vendored
View File

@@ -3,3 +3,4 @@ ui/public/
db/
npm-debug.log
.DS_Store
.idea

View File

@@ -1,3 +1,13 @@
###### [V5.4.0]
- Upgrading dependencies
- Provider urls are now automagically been changed to include the correct sort order for search results
```
Note: It has been an point of confusion since the very beginning of Fredy, that people simply copied the url, but
did not take care of sorting the search results by date. If this is not done, Fredy will most likely not see the latest
results, thus cannot report them. This release fixes it by adding the necessary params (or replaces them).
```
###### [V5.3.0]
- Upgrading dependencies
- It's now possible to send mails to multiple receiver using comma separation for MailJet & Sendgrid

View File

@@ -4,6 +4,7 @@ const { setKnownListings, getKnownListings } = require('./services/storage/listi
const notify = require('./notification/notify');
const xray = require('./services/scraper');
const scrapingAnt = require('./services/scrapingAnt');
const urlModifier = require('./services/queryStringMutator');
class FredyRuntime {
/**
@@ -24,7 +25,8 @@ class FredyRuntime {
execute() {
return (
Promise.resolve(this._providerConfig.url)
//modify the url to make sure search order is correctly set
Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam))
//scraping the site and try finding new listings
.then(this._getListings.bind(this))
//bring them in a proper form (dictated by the provider)

View File

@@ -1,6 +1,19 @@
const { markdown2Html } = require('../../services/markdown');
const axios = require('axios');
/**
* splitting an array into chunks because Telegram only allows for messages up to
* 4096 chars, thus we have to split messages into chunks
* @param inputArray
* @param perChunk
*/
const arrayChunks = (inputArray, perChunk) =>
inputArray.reduce((all, one, i) => {
const ch = Math.floor(i / perChunk);
all[ch] = [].concat(all[ch] || [], one);
return all;
}, []);
/**
* sends new listings to telegram
* @param serviceName e.g immowelt
@@ -12,22 +25,28 @@ const axios = require('axios');
exports.send = ({ serviceName, newListings, notificationConfig, jobKey }) => {
const { token, chatId } = notificationConfig.find((adapter) => adapter.id === 'telegram').fields;
let message = `Job: ${jobKey} | Service <b>${serviceName}</b> found <b>${newListings.length}</b> new listings:\n\n`;
//we have to split messages into chunk, because otherwise messages are going to become too big and will fail
const chunks = arrayChunks(newListings, 3);
message += newListings.map(
(o) =>
`<b>${shorten(o.title.replace(/\*/g, ''), 45)}</b>\n` +
[o.address, o.price, o.size].join(' | ') +
'\n' +
`<a href="${o.link}">${o.link}</a>\n\n`
);
const promises = chunks.map((chunk) => {
let message = `Job: ${jobKey} | Service <b>${serviceName}</b> found <b>${newListings.length}</b> new listings:\n\n`;
message += chunk.map(
(o) =>
`<b>${shorten(o.title.replace(/\*/g, ''), 45)}</b>\n` +
[o.address, o.price, o.size].join(' | ') +
'\n' +
`<a href="${o.link}">${o.link}</a>\n\n`
);
return axios.post(`https://api.telegram.org/bot${token}/sendMessage`, {
chat_id: chatId,
text: message,
parse_mode: 'HTML',
disable_web_page_preview: true,
return axios.post(`https://api.telegram.org/bot${token}/sendMessage`, {
chat_id: chatId,
text: message,
parse_mode: 'HTML',
disable_web_page_preview: true,
});
});
return Promise.all(promises);
};
function shorten(str, len = 30) {

View File

@@ -22,6 +22,7 @@ function applyBlacklist(o) {
const config = {
url: null,
crawlContainer: '.tabelle',
sortByDateParam: 'sort_type=newest',
crawlFields: {
id: '.inner_object_data input[name="marker_objekt_id"]@value | int',
price: '.tabelle .inner_object_data .single_data_price | removeNewline | trim',

View File

@@ -24,6 +24,7 @@ function applyBlacklist(o) {
const config = {
url: null,
crawlContainer: '#result-list-stage .item',
sortByDateParam: 'sortby=19',
crawlFields: {
id: '@id',
price: 'div[id*="selPrice_"] | trim',

View File

@@ -20,6 +20,7 @@ function applyBlacklist(o) {
const config = {
url: null,
crawlContainer: '#resultListItems li.result-list__listing',
sortByDateParam: 'sorting=2',
crawlFields: {
id: '.result-list-entry@data-obid | int',
price: '.result-list-entry .result-list-entry__criteria .grid-item:first-child dd | removeNewline | trim',

View File

@@ -16,6 +16,7 @@ function applyBlacklist(o) {
const config = {
url: null,
crawlContainer: "div[class^='EstateItem-']",
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
crawlFields: {
id: 'a@id',
price: "div[class^='KeyFacts-'] [data-test='price'] | removeNewline | trim",

View File

@@ -21,6 +21,8 @@ function applyBlacklist(o) {
const config = {
url: null,
crawlContainer: '#srchrslt-adtable .ad-listitem ',
//sort by date is standard oO
sortByDateParam: null,
crawlFields: {
id: '.aditem@data-adid | int',
price: '.aditem-main--middle--price | removeNewline | trim',

View File

@@ -13,6 +13,7 @@ function applyBlacklist(o) {
const config = {
url: null,
crawlContainer: '.nbk-container >div article',
sortByDateParam: 'Sortierung=Id&Richtung=DESC',
crawlFields: {
id: '@id',
title: 'a.nbk-truncate@title | removeNewline | trim',

View File

@@ -16,6 +16,7 @@ function applyBlacklist(o) {
const config = {
url: null,
crawlContainer: '#main_column .wgg_card',
sortByDateParam: 'sort_column=0&sort_order=0',
crawlFields: {
id: '@data-id',
details: '.row .noprint .col-xs-11 |removeNewline |trim',

View File

@@ -0,0 +1,22 @@
const queryString = require('query-string');
/**
* for Fredy, it is important to sort search results by date, starting with the latest listing. if it is not sorted, we
* might never actually find the newest results, no matter how many pages we crawl.
* It has been written in the documentation, but obviously nobody reads docu theses days which is why it's been done
* automagically now.
*
* @param _url actual provider url containing the searchParams
* @param sortByDateParam param(s) indicating the correct sort order
* @returns {`${string}?${string}`} correctly formatted url
*/
module.exports = (_url, sortByDateParam) => {
//if no mutation is necessary, just return the original url
if (sortByDateParam == null) {
return _url;
}
const original = queryString.parseUrl(_url);
const mutate = queryString.parse(sortByDateParam);
return `${original.url}?${queryString.stringify({ ...original.query, ...mutate })}`;
};

View File

@@ -1,13 +1,14 @@
{
"name": "fredy",
"version": "5.3.1",
"version": "5.4.2",
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
"scripts": {
"start": "node index.js",
"dev": "yarn && export BUILD_DEV='true' && export NODE_ENV='development' && webpack-dev-server --progress --colors --watch --config ./webpack.dev.js",
"prod": "export BUILD_DEV='false' && webpack --node-env=production --config ./webpack.prod.js",
"format": "prettier --write lib/**/*.js ui/src/**/*.js test/**/*.js *.js --single-quote --print-width 120",
"test": "mocha --timeout 20000 test/**/*.test.js"
"test": "mocha --timeout 20000 test/**/*.test.js",
"lint": "eslint ./index.js ./lib/**/*.js ./test/**/*.js"
},
"husky": {
"hooks": {
@@ -52,29 +53,30 @@
"Firefox ESR"
],
"dependencies": {
"@rematch/core": "2.1.0",
"@rematch/loading": "2.1.0",
"@sendgrid/mail": "7.4.7",
"@rematch/core": "2.2.0",
"@rematch/loading": "2.1.2",
"@sendgrid/mail": "7.6.0",
"axios": "0.24.0",
"axios-retry": "^3.2.4",
"body-parser": "1.19.0",
"cookie-session": "1.4.0",
"handlebars": "4.7.7",
"highcharts": "9.2.2",
"highcharts-react-official": "3.0.0",
"highcharts": "9.3.1",
"highcharts-react-official": "3.1.0",
"lowdb": "1.0.0",
"markdown": "^0.5.0",
"nanoid": "3.1.28",
"nanoid": "3.1.30",
"node-mailjet": "3.3.4",
"query-string": "^7.0.1",
"react": "17.0.2",
"react-dom": "17.0.2",
"react-redux": "7.2.5",
"react-redux": "7.2.6",
"react-router": "5.2.1",
"react-router-dom": "5.3.0",
"react-switch": "^6.0.0",
"redux": "4.1.1",
"redux-thunk": "2.3.0",
"restana": "4.9.1",
"redux": "4.1.2",
"redux-thunk": "2.4.0",
"restana": "4.9.2",
"semantic-ui-react": "2.0.4",
"serve-static": "^1.14.1",
"slack": "11.0.2",
@@ -82,32 +84,32 @@
"x-ray": "2.3.4"
},
"devDependencies": {
"@babel/core": "7.15.5",
"@babel/preset-env": "7.15.6",
"@babel/preset-react": "7.14.5",
"@babel/core": "7.16.0",
"@babel/preset-env": "7.16.4",
"@babel/preset-react": "7.16.0",
"babel-eslint": "10.1.0",
"babel-loader": "8.2.2",
"babel-loader": "8.2.3",
"chai": "4.3.4",
"clean-webpack-plugin": "4.0.0",
"copy-webpack-plugin": "9.0.1",
"css-loader": "6.3.0",
"copy-webpack-plugin": "10.0.0",
"css-loader": "6.5.1",
"eslint": "7.32.0",
"eslint-config-prettier": "8.3.0",
"eslint-plugin-react": "7.26.1",
"eslint-plugin-react": "7.27.1",
"file-loader": "6.2.0",
"history": "5.0.1",
"history": "5.1.0",
"husky": "4.3.8",
"less": "4.1.1",
"less-loader": "10.0.1",
"lint-staged": "11.1.2",
"mocha": "9.1.2",
"prettier": "2.4.1",
"less": "4.1.2",
"less-loader": "10.2.0",
"lint-staged": "12.1.2",
"mocha": "9.1.3",
"prettier": "2.5.0",
"proxyquire": "2.1.3",
"redux-logger": "3.0.6",
"style-loader": "3.3.0",
"style-loader": "3.3.1",
"url-loader": "4.1.1",
"webpack": "5.56.0",
"webpack-cli": "3.3.12",
"webpack": "5.64.4",
"webpack-cli": "4.9.1",
"webpack-dev-server": "3.11.2",
"webpack-merge": "5.8.0"
}

View File

@@ -0,0 +1,32 @@
const testData = require('./testData.json');
const expect = require('chai').expect;
const fs = require('fs');
const mutator = require('../../lib/services/queryStringMutator.js');
const queryString = require('query-string');
/**
* Test test might look a bit weird at first, but listen stranger...
* It's not wise to compare 2 urls, as this means all url params must be in the expected order. This is however not
* guaranteed, as params (and their order) are totally variable.
*/
describe('queryStringMutator', () => {
it('should fix all urls', () => {
let _provider = fs.readdirSync('./lib/provider/').map((integPath) => require(`../../lib/provider/${integPath}`));
for (let test of testData) {
const provider = _provider.find((p) => p.metaInformation.id === test.id);
if (provider == null) {
throw new Error(`Cannot find provider for given id: ${test.id}`);
}
const fixedUrl = mutator(test.url, provider.config.sortByDateParam);
const expectedParams = queryString.parseUrl(test.shouldBecome);
const actualParams = queryString.parseUrl(fixedUrl);
//check if all new params are existing
expect(Object.keys(expectedParams.query)).to.include.members(Object.keys(actualParams.query));
expect(Object.values(expectedParams.query)).to.include.members(Object.values(actualParams.query));
}
});
});

View File

@@ -0,0 +1,33 @@
[
{
"url": "https://www.immowelt.de/liste/40589/wohnungen/mieten?d=true&sd=DESC&sf=PRIMARY_PRICE_AMOUNT&sp=1",
"shouldBecome": "https://www.immowelt.de/liste/40589/wohnungen/mieten?d=true&sd=DESC&sf=TIMESTAMP&sp=1",
"id": "immowelt"
},
{
"url": "https://www.1a-immobilienmarkt.de/suchen/duesseldorf/wohnung-mieten.html?search=yes",
"shouldBecome": "https://www.1a-immobilienmarkt.de/suchen/duesseldorf/wohnung-mieten.html?search=yes&sort_type=newest",
"id": "einsAImmobilien"
},
{
"url": "https://www.wg-gesucht.de/1-zimmer-wohnungen-in-Dusseldorf.30.1.1.0.html?sort_column=1&sort_order=0",
"shouldBecome": "https://www.wg-gesucht.de/1-zimmer-wohnungen-in-Dusseldorf.30.1.1.0.html?sort_column=0&sort_order=0",
"id": "wgGesucht"
},
{
"url": "https://www.immonet.de/immobiliensuche/sel.do?sortby=0&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
"shouldBecome": "https://www.immonet.de/immobiliensuche/sel.do?sortby=19&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
"id": "immonet"
},
{
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten",
"shouldBecome": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?sorting=2",
"id": "immoscout"
},
{
"url": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/",
"shouldBecome": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/?Sortierung=Id&Richtung=DESC",
"id": "neubauKompass"
}
]

2427
yarn.lock

File diff suppressed because it is too large Load Diff