mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5a13e6a0a0 | ||
|
|
7bd36e554c | ||
|
|
d7e3dfc05e | ||
|
|
c1c4d55ede | ||
|
|
aad0884976 | ||
|
|
c0ae72424b | ||
|
|
a3aa512db3 | ||
|
|
8361d9c8ff |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -3,3 +3,4 @@ ui/public/
|
||||
db/
|
||||
npm-debug.log
|
||||
.DS_Store
|
||||
.idea
|
||||
|
||||
10
CHANGELOG.md
10
CHANGELOG.md
@@ -1,3 +1,13 @@
|
||||
###### [V5.4.0]
|
||||
- Upgrading dependencies
|
||||
- Provider urls are now automagically been changed to include the correct sort order for search results
|
||||
|
||||
```
|
||||
Note: It has been an point of confusion since the very beginning of Fredy, that people simply copied the url, but
|
||||
did not take care of sorting the search results by date. If this is not done, Fredy will most likely not see the latest
|
||||
results, thus cannot report them. This release fixes it by adding the necessary params (or replaces them).
|
||||
```
|
||||
|
||||
###### [V5.3.0]
|
||||
- Upgrading dependencies
|
||||
- It's now possible to send mails to multiple receiver using comma separation for MailJet & Sendgrid
|
||||
|
||||
@@ -4,6 +4,7 @@ const { setKnownListings, getKnownListings } = require('./services/storage/listi
|
||||
const notify = require('./notification/notify');
|
||||
const xray = require('./services/scraper');
|
||||
const scrapingAnt = require('./services/scrapingAnt');
|
||||
const urlModifier = require('./services/queryStringMutator');
|
||||
|
||||
class FredyRuntime {
|
||||
/**
|
||||
@@ -24,7 +25,8 @@ class FredyRuntime {
|
||||
|
||||
execute() {
|
||||
return (
|
||||
Promise.resolve(this._providerConfig.url)
|
||||
//modify the url to make sure search order is correctly set
|
||||
Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam))
|
||||
//scraping the site and try finding new listings
|
||||
.then(this._getListings.bind(this))
|
||||
//bring them in a proper form (dictated by the provider)
|
||||
|
||||
@@ -1,6 +1,19 @@
|
||||
const { markdown2Html } = require('../../services/markdown');
|
||||
const axios = require('axios');
|
||||
|
||||
/**
|
||||
* splitting an array into chunks because Telegram only allows for messages up to
|
||||
* 4096 chars, thus we have to split messages into chunks
|
||||
* @param inputArray
|
||||
* @param perChunk
|
||||
*/
|
||||
const arrayChunks = (inputArray, perChunk) =>
|
||||
inputArray.reduce((all, one, i) => {
|
||||
const ch = Math.floor(i / perChunk);
|
||||
all[ch] = [].concat(all[ch] || [], one);
|
||||
return all;
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* sends new listings to telegram
|
||||
* @param serviceName e.g immowelt
|
||||
@@ -12,22 +25,28 @@ const axios = require('axios');
|
||||
exports.send = ({ serviceName, newListings, notificationConfig, jobKey }) => {
|
||||
const { token, chatId } = notificationConfig.find((adapter) => adapter.id === 'telegram').fields;
|
||||
|
||||
let message = `Job: ${jobKey} | Service <b>${serviceName}</b> found <b>${newListings.length}</b> new listings:\n\n`;
|
||||
//we have to split messages into chunk, because otherwise messages are going to become too big and will fail
|
||||
const chunks = arrayChunks(newListings, 3);
|
||||
|
||||
message += newListings.map(
|
||||
(o) =>
|
||||
`<b>${shorten(o.title.replace(/\*/g, ''), 45)}</b>\n` +
|
||||
[o.address, o.price, o.size].join(' | ') +
|
||||
'\n' +
|
||||
`<a href="${o.link}">${o.link}</a>\n\n`
|
||||
);
|
||||
const promises = chunks.map((chunk) => {
|
||||
let message = `Job: ${jobKey} | Service <b>${serviceName}</b> found <b>${newListings.length}</b> new listings:\n\n`;
|
||||
message += chunk.map(
|
||||
(o) =>
|
||||
`<b>${shorten(o.title.replace(/\*/g, ''), 45)}</b>\n` +
|
||||
[o.address, o.price, o.size].join(' | ') +
|
||||
'\n' +
|
||||
`<a href="${o.link}">${o.link}</a>\n\n`
|
||||
);
|
||||
|
||||
return axios.post(`https://api.telegram.org/bot${token}/sendMessage`, {
|
||||
chat_id: chatId,
|
||||
text: message,
|
||||
parse_mode: 'HTML',
|
||||
disable_web_page_preview: true,
|
||||
return axios.post(`https://api.telegram.org/bot${token}/sendMessage`, {
|
||||
chat_id: chatId,
|
||||
text: message,
|
||||
parse_mode: 'HTML',
|
||||
disable_web_page_preview: true,
|
||||
});
|
||||
});
|
||||
|
||||
return Promise.all(promises);
|
||||
};
|
||||
|
||||
function shorten(str, len = 30) {
|
||||
|
||||
@@ -22,6 +22,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.tabelle',
|
||||
sortByDateParam: 'sort_type=newest',
|
||||
crawlFields: {
|
||||
id: '.inner_object_data input[name="marker_objekt_id"]@value | int',
|
||||
price: '.tabelle .inner_object_data .single_data_price | removeNewline | trim',
|
||||
|
||||
@@ -24,6 +24,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#result-list-stage .item',
|
||||
sortByDateParam: 'sortby=19',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
price: 'div[id*="selPrice_"] | trim',
|
||||
|
||||
@@ -20,6 +20,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#resultListItems li.result-list__listing',
|
||||
sortByDateParam: 'sorting=2',
|
||||
crawlFields: {
|
||||
id: '.result-list-entry@data-obid | int',
|
||||
price: '.result-list-entry .result-list-entry__criteria .grid-item:first-child dd | removeNewline | trim',
|
||||
|
||||
@@ -16,6 +16,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: "div[class^='EstateItem-']",
|
||||
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
|
||||
crawlFields: {
|
||||
id: 'a@id',
|
||||
price: "div[class^='KeyFacts-'] [data-test='price'] | removeNewline | trim",
|
||||
|
||||
@@ -21,6 +21,8 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#srchrslt-adtable .ad-listitem ',
|
||||
//sort by date is standard oO
|
||||
sortByDateParam: null,
|
||||
crawlFields: {
|
||||
id: '.aditem@data-adid | int',
|
||||
price: '.aditem-main--middle--price | removeNewline | trim',
|
||||
|
||||
@@ -13,6 +13,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.nbk-container >div article',
|
||||
sortByDateParam: 'Sortierung=Id&Richtung=DESC',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
title: 'a.nbk-truncate@title | removeNewline | trim',
|
||||
|
||||
@@ -16,6 +16,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#main_column .wgg_card',
|
||||
sortByDateParam: 'sort_column=0&sort_order=0',
|
||||
crawlFields: {
|
||||
id: '@data-id',
|
||||
details: '.row .noprint .col-xs-11 |removeNewline |trim',
|
||||
|
||||
22
lib/services/queryStringMutator.js
Normal file
22
lib/services/queryStringMutator.js
Normal file
@@ -0,0 +1,22 @@
|
||||
const queryString = require('query-string');
|
||||
|
||||
/**
|
||||
* for Fredy, it is important to sort search results by date, starting with the latest listing. if it is not sorted, we
|
||||
* might never actually find the newest results, no matter how many pages we crawl.
|
||||
* It has been written in the documentation, but obviously nobody reads docu theses days which is why it's been done
|
||||
* automagically now.
|
||||
*
|
||||
* @param _url actual provider url containing the searchParams
|
||||
* @param sortByDateParam param(s) indicating the correct sort order
|
||||
* @returns {`${string}?${string}`} correctly formatted url
|
||||
*/
|
||||
module.exports = (_url, sortByDateParam) => {
|
||||
//if no mutation is necessary, just return the original url
|
||||
if (sortByDateParam == null) {
|
||||
return _url;
|
||||
}
|
||||
|
||||
const original = queryString.parseUrl(_url);
|
||||
const mutate = queryString.parse(sortByDateParam);
|
||||
return `${original.url}?${queryString.stringify({ ...original.query, ...mutate })}`;
|
||||
};
|
||||
58
package.json
58
package.json
@@ -1,13 +1,14 @@
|
||||
{
|
||||
"name": "fredy",
|
||||
"version": "5.3.1",
|
||||
"version": "5.4.2",
|
||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||
"scripts": {
|
||||
"start": "node index.js",
|
||||
"dev": "yarn && export BUILD_DEV='true' && export NODE_ENV='development' && webpack-dev-server --progress --colors --watch --config ./webpack.dev.js",
|
||||
"prod": "export BUILD_DEV='false' && webpack --node-env=production --config ./webpack.prod.js",
|
||||
"format": "prettier --write lib/**/*.js ui/src/**/*.js test/**/*.js *.js --single-quote --print-width 120",
|
||||
"test": "mocha --timeout 20000 test/**/*.test.js"
|
||||
"test": "mocha --timeout 20000 test/**/*.test.js",
|
||||
"lint": "eslint ./index.js ./lib/**/*.js ./test/**/*.js"
|
||||
},
|
||||
"husky": {
|
||||
"hooks": {
|
||||
@@ -52,29 +53,30 @@
|
||||
"Firefox ESR"
|
||||
],
|
||||
"dependencies": {
|
||||
"@rematch/core": "2.1.0",
|
||||
"@rematch/loading": "2.1.0",
|
||||
"@sendgrid/mail": "7.4.7",
|
||||
"@rematch/core": "2.2.0",
|
||||
"@rematch/loading": "2.1.2",
|
||||
"@sendgrid/mail": "7.6.0",
|
||||
"axios": "0.24.0",
|
||||
"axios-retry": "^3.2.4",
|
||||
"body-parser": "1.19.0",
|
||||
"cookie-session": "1.4.0",
|
||||
"handlebars": "4.7.7",
|
||||
"highcharts": "9.2.2",
|
||||
"highcharts-react-official": "3.0.0",
|
||||
"highcharts": "9.3.1",
|
||||
"highcharts-react-official": "3.1.0",
|
||||
"lowdb": "1.0.0",
|
||||
"markdown": "^0.5.0",
|
||||
"nanoid": "3.1.28",
|
||||
"nanoid": "3.1.30",
|
||||
"node-mailjet": "3.3.4",
|
||||
"query-string": "^7.0.1",
|
||||
"react": "17.0.2",
|
||||
"react-dom": "17.0.2",
|
||||
"react-redux": "7.2.5",
|
||||
"react-redux": "7.2.6",
|
||||
"react-router": "5.2.1",
|
||||
"react-router-dom": "5.3.0",
|
||||
"react-switch": "^6.0.0",
|
||||
"redux": "4.1.1",
|
||||
"redux-thunk": "2.3.0",
|
||||
"restana": "4.9.1",
|
||||
"redux": "4.1.2",
|
||||
"redux-thunk": "2.4.0",
|
||||
"restana": "4.9.2",
|
||||
"semantic-ui-react": "2.0.4",
|
||||
"serve-static": "^1.14.1",
|
||||
"slack": "11.0.2",
|
||||
@@ -82,32 +84,32 @@
|
||||
"x-ray": "2.3.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/core": "7.15.5",
|
||||
"@babel/preset-env": "7.15.6",
|
||||
"@babel/preset-react": "7.14.5",
|
||||
"@babel/core": "7.16.0",
|
||||
"@babel/preset-env": "7.16.4",
|
||||
"@babel/preset-react": "7.16.0",
|
||||
"babel-eslint": "10.1.0",
|
||||
"babel-loader": "8.2.2",
|
||||
"babel-loader": "8.2.3",
|
||||
"chai": "4.3.4",
|
||||
"clean-webpack-plugin": "4.0.0",
|
||||
"copy-webpack-plugin": "9.0.1",
|
||||
"css-loader": "6.3.0",
|
||||
"copy-webpack-plugin": "10.0.0",
|
||||
"css-loader": "6.5.1",
|
||||
"eslint": "7.32.0",
|
||||
"eslint-config-prettier": "8.3.0",
|
||||
"eslint-plugin-react": "7.26.1",
|
||||
"eslint-plugin-react": "7.27.1",
|
||||
"file-loader": "6.2.0",
|
||||
"history": "5.0.1",
|
||||
"history": "5.1.0",
|
||||
"husky": "4.3.8",
|
||||
"less": "4.1.1",
|
||||
"less-loader": "10.0.1",
|
||||
"lint-staged": "11.1.2",
|
||||
"mocha": "9.1.2",
|
||||
"prettier": "2.4.1",
|
||||
"less": "4.1.2",
|
||||
"less-loader": "10.2.0",
|
||||
"lint-staged": "12.1.2",
|
||||
"mocha": "9.1.3",
|
||||
"prettier": "2.5.0",
|
||||
"proxyquire": "2.1.3",
|
||||
"redux-logger": "3.0.6",
|
||||
"style-loader": "3.3.0",
|
||||
"style-loader": "3.3.1",
|
||||
"url-loader": "4.1.1",
|
||||
"webpack": "5.56.0",
|
||||
"webpack-cli": "3.3.12",
|
||||
"webpack": "5.64.4",
|
||||
"webpack-cli": "4.9.1",
|
||||
"webpack-dev-server": "3.11.2",
|
||||
"webpack-merge": "5.8.0"
|
||||
}
|
||||
|
||||
32
test/queryStringMutator/queryStringMutator.test.js
Normal file
32
test/queryStringMutator/queryStringMutator.test.js
Normal file
@@ -0,0 +1,32 @@
|
||||
const testData = require('./testData.json');
|
||||
const expect = require('chai').expect;
|
||||
const fs = require('fs');
|
||||
|
||||
const mutator = require('../../lib/services/queryStringMutator.js');
|
||||
const queryString = require('query-string');
|
||||
|
||||
/**
|
||||
* Test test might look a bit weird at first, but listen stranger...
|
||||
* It's not wise to compare 2 urls, as this means all url params must be in the expected order. This is however not
|
||||
* guaranteed, as params (and their order) are totally variable.
|
||||
*/
|
||||
describe('queryStringMutator', () => {
|
||||
it('should fix all urls', () => {
|
||||
let _provider = fs.readdirSync('./lib/provider/').map((integPath) => require(`../../lib/provider/${integPath}`));
|
||||
|
||||
for (let test of testData) {
|
||||
const provider = _provider.find((p) => p.metaInformation.id === test.id);
|
||||
if (provider == null) {
|
||||
throw new Error(`Cannot find provider for given id: ${test.id}`);
|
||||
}
|
||||
|
||||
const fixedUrl = mutator(test.url, provider.config.sortByDateParam);
|
||||
const expectedParams = queryString.parseUrl(test.shouldBecome);
|
||||
const actualParams = queryString.parseUrl(fixedUrl);
|
||||
|
||||
//check if all new params are existing
|
||||
expect(Object.keys(expectedParams.query)).to.include.members(Object.keys(actualParams.query));
|
||||
expect(Object.values(expectedParams.query)).to.include.members(Object.values(actualParams.query));
|
||||
}
|
||||
});
|
||||
});
|
||||
33
test/queryStringMutator/testData.json
Normal file
33
test/queryStringMutator/testData.json
Normal file
@@ -0,0 +1,33 @@
|
||||
[
|
||||
{
|
||||
"url": "https://www.immowelt.de/liste/40589/wohnungen/mieten?d=true&sd=DESC&sf=PRIMARY_PRICE_AMOUNT&sp=1",
|
||||
"shouldBecome": "https://www.immowelt.de/liste/40589/wohnungen/mieten?d=true&sd=DESC&sf=TIMESTAMP&sp=1",
|
||||
"id": "immowelt"
|
||||
},
|
||||
{
|
||||
"url": "https://www.1a-immobilienmarkt.de/suchen/duesseldorf/wohnung-mieten.html?search=yes",
|
||||
"shouldBecome": "https://www.1a-immobilienmarkt.de/suchen/duesseldorf/wohnung-mieten.html?search=yes&sort_type=newest",
|
||||
"id": "einsAImmobilien"
|
||||
},
|
||||
{
|
||||
"url": "https://www.wg-gesucht.de/1-zimmer-wohnungen-in-Dusseldorf.30.1.1.0.html?sort_column=1&sort_order=0",
|
||||
"shouldBecome": "https://www.wg-gesucht.de/1-zimmer-wohnungen-in-Dusseldorf.30.1.1.0.html?sort_column=0&sort_order=0",
|
||||
"id": "wgGesucht"
|
||||
},
|
||||
|
||||
{
|
||||
"url": "https://www.immonet.de/immobiliensuche/sel.do?sortby=0&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
|
||||
"shouldBecome": "https://www.immonet.de/immobiliensuche/sel.do?sortby=19&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
|
||||
"id": "immonet"
|
||||
},
|
||||
{
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten",
|
||||
"shouldBecome": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?sorting=2",
|
||||
"id": "immoscout"
|
||||
},
|
||||
{
|
||||
"url": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/",
|
||||
"shouldBecome": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/?Sortierung=Id&Richtung=DESC",
|
||||
"id": "neubauKompass"
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user