mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5a13e6a0a0 | ||
|
|
7bd36e554c | ||
|
|
d7e3dfc05e | ||
|
|
c1c4d55ede | ||
|
|
aad0884976 | ||
|
|
c0ae72424b |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -3,3 +3,4 @@ ui/public/
|
|||||||
db/
|
db/
|
||||||
npm-debug.log
|
npm-debug.log
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
.idea
|
||||||
|
|||||||
10
CHANGELOG.md
10
CHANGELOG.md
@@ -1,3 +1,13 @@
|
|||||||
|
###### [V5.4.0]
|
||||||
|
- Upgrading dependencies
|
||||||
|
- Provider urls are now automagically been changed to include the correct sort order for search results
|
||||||
|
|
||||||
|
```
|
||||||
|
Note: It has been an point of confusion since the very beginning of Fredy, that people simply copied the url, but
|
||||||
|
did not take care of sorting the search results by date. If this is not done, Fredy will most likely not see the latest
|
||||||
|
results, thus cannot report them. This release fixes it by adding the necessary params (or replaces them).
|
||||||
|
```
|
||||||
|
|
||||||
###### [V5.3.0]
|
###### [V5.3.0]
|
||||||
- Upgrading dependencies
|
- Upgrading dependencies
|
||||||
- It's now possible to send mails to multiple receiver using comma separation for MailJet & Sendgrid
|
- It's now possible to send mails to multiple receiver using comma separation for MailJet & Sendgrid
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ const { setKnownListings, getKnownListings } = require('./services/storage/listi
|
|||||||
const notify = require('./notification/notify');
|
const notify = require('./notification/notify');
|
||||||
const xray = require('./services/scraper');
|
const xray = require('./services/scraper');
|
||||||
const scrapingAnt = require('./services/scrapingAnt');
|
const scrapingAnt = require('./services/scrapingAnt');
|
||||||
|
const urlModifier = require('./services/queryStringMutator');
|
||||||
|
|
||||||
class FredyRuntime {
|
class FredyRuntime {
|
||||||
/**
|
/**
|
||||||
@@ -24,7 +25,8 @@ class FredyRuntime {
|
|||||||
|
|
||||||
execute() {
|
execute() {
|
||||||
return (
|
return (
|
||||||
Promise.resolve(this._providerConfig.url)
|
//modify the url to make sure search order is correctly set
|
||||||
|
Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam))
|
||||||
//scraping the site and try finding new listings
|
//scraping the site and try finding new listings
|
||||||
.then(this._getListings.bind(this))
|
.then(this._getListings.bind(this))
|
||||||
//bring them in a proper form (dictated by the provider)
|
//bring them in a proper form (dictated by the provider)
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ function applyBlacklist(o) {
|
|||||||
const config = {
|
const config = {
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: '.tabelle',
|
crawlContainer: '.tabelle',
|
||||||
|
sortByDateParam: 'sort_type=newest',
|
||||||
crawlFields: {
|
crawlFields: {
|
||||||
id: '.inner_object_data input[name="marker_objekt_id"]@value | int',
|
id: '.inner_object_data input[name="marker_objekt_id"]@value | int',
|
||||||
price: '.tabelle .inner_object_data .single_data_price | removeNewline | trim',
|
price: '.tabelle .inner_object_data .single_data_price | removeNewline | trim',
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ function applyBlacklist(o) {
|
|||||||
const config = {
|
const config = {
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: '#result-list-stage .item',
|
crawlContainer: '#result-list-stage .item',
|
||||||
|
sortByDateParam: 'sortby=19',
|
||||||
crawlFields: {
|
crawlFields: {
|
||||||
id: '@id',
|
id: '@id',
|
||||||
price: 'div[id*="selPrice_"] | trim',
|
price: 'div[id*="selPrice_"] | trim',
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ function applyBlacklist(o) {
|
|||||||
const config = {
|
const config = {
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: '#resultListItems li.result-list__listing',
|
crawlContainer: '#resultListItems li.result-list__listing',
|
||||||
|
sortByDateParam: 'sorting=2',
|
||||||
crawlFields: {
|
crawlFields: {
|
||||||
id: '.result-list-entry@data-obid | int',
|
id: '.result-list-entry@data-obid | int',
|
||||||
price: '.result-list-entry .result-list-entry__criteria .grid-item:first-child dd | removeNewline | trim',
|
price: '.result-list-entry .result-list-entry__criteria .grid-item:first-child dd | removeNewline | trim',
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ function applyBlacklist(o) {
|
|||||||
const config = {
|
const config = {
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: "div[class^='EstateItem-']",
|
crawlContainer: "div[class^='EstateItem-']",
|
||||||
|
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
|
||||||
crawlFields: {
|
crawlFields: {
|
||||||
id: 'a@id',
|
id: 'a@id',
|
||||||
price: "div[class^='KeyFacts-'] [data-test='price'] | removeNewline | trim",
|
price: "div[class^='KeyFacts-'] [data-test='price'] | removeNewline | trim",
|
||||||
|
|||||||
@@ -21,6 +21,8 @@ function applyBlacklist(o) {
|
|||||||
const config = {
|
const config = {
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: '#srchrslt-adtable .ad-listitem ',
|
crawlContainer: '#srchrslt-adtable .ad-listitem ',
|
||||||
|
//sort by date is standard oO
|
||||||
|
sortByDateParam: null,
|
||||||
crawlFields: {
|
crawlFields: {
|
||||||
id: '.aditem@data-adid | int',
|
id: '.aditem@data-adid | int',
|
||||||
price: '.aditem-main--middle--price | removeNewline | trim',
|
price: '.aditem-main--middle--price | removeNewline | trim',
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ function applyBlacklist(o) {
|
|||||||
const config = {
|
const config = {
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: '.nbk-container >div article',
|
crawlContainer: '.nbk-container >div article',
|
||||||
|
sortByDateParam: 'Sortierung=Id&Richtung=DESC',
|
||||||
crawlFields: {
|
crawlFields: {
|
||||||
id: '@id',
|
id: '@id',
|
||||||
title: 'a.nbk-truncate@title | removeNewline | trim',
|
title: 'a.nbk-truncate@title | removeNewline | trim',
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ function applyBlacklist(o) {
|
|||||||
const config = {
|
const config = {
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: '#main_column .wgg_card',
|
crawlContainer: '#main_column .wgg_card',
|
||||||
|
sortByDateParam: 'sort_column=0&sort_order=0',
|
||||||
crawlFields: {
|
crawlFields: {
|
||||||
id: '@data-id',
|
id: '@data-id',
|
||||||
details: '.row .noprint .col-xs-11 |removeNewline |trim',
|
details: '.row .noprint .col-xs-11 |removeNewline |trim',
|
||||||
|
|||||||
22
lib/services/queryStringMutator.js
Normal file
22
lib/services/queryStringMutator.js
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
const queryString = require('query-string');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* for Fredy, it is important to sort search results by date, starting with the latest listing. if it is not sorted, we
|
||||||
|
* might never actually find the newest results, no matter how many pages we crawl.
|
||||||
|
* It has been written in the documentation, but obviously nobody reads docu theses days which is why it's been done
|
||||||
|
* automagically now.
|
||||||
|
*
|
||||||
|
* @param _url actual provider url containing the searchParams
|
||||||
|
* @param sortByDateParam param(s) indicating the correct sort order
|
||||||
|
* @returns {`${string}?${string}`} correctly formatted url
|
||||||
|
*/
|
||||||
|
module.exports = (_url, sortByDateParam) => {
|
||||||
|
//if no mutation is necessary, just return the original url
|
||||||
|
if (sortByDateParam == null) {
|
||||||
|
return _url;
|
||||||
|
}
|
||||||
|
|
||||||
|
const original = queryString.parseUrl(_url);
|
||||||
|
const mutate = queryString.parse(sortByDateParam);
|
||||||
|
return `${original.url}?${queryString.stringify({ ...original.query, ...mutate })}`;
|
||||||
|
};
|
||||||
58
package.json
58
package.json
@@ -1,13 +1,14 @@
|
|||||||
{
|
{
|
||||||
"name": "fredy",
|
"name": "fredy",
|
||||||
"version": "5.3.2",
|
"version": "5.4.2",
|
||||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"start": "node index.js",
|
"start": "node index.js",
|
||||||
"dev": "yarn && export BUILD_DEV='true' && export NODE_ENV='development' && webpack-dev-server --progress --colors --watch --config ./webpack.dev.js",
|
"dev": "yarn && export BUILD_DEV='true' && export NODE_ENV='development' && webpack-dev-server --progress --colors --watch --config ./webpack.dev.js",
|
||||||
"prod": "export BUILD_DEV='false' && webpack --node-env=production --config ./webpack.prod.js",
|
"prod": "export BUILD_DEV='false' && webpack --node-env=production --config ./webpack.prod.js",
|
||||||
"format": "prettier --write lib/**/*.js ui/src/**/*.js test/**/*.js *.js --single-quote --print-width 120",
|
"format": "prettier --write lib/**/*.js ui/src/**/*.js test/**/*.js *.js --single-quote --print-width 120",
|
||||||
"test": "mocha --timeout 20000 test/**/*.test.js"
|
"test": "mocha --timeout 20000 test/**/*.test.js",
|
||||||
|
"lint": "eslint ./index.js ./lib/**/*.js ./test/**/*.js"
|
||||||
},
|
},
|
||||||
"husky": {
|
"husky": {
|
||||||
"hooks": {
|
"hooks": {
|
||||||
@@ -52,29 +53,30 @@
|
|||||||
"Firefox ESR"
|
"Firefox ESR"
|
||||||
],
|
],
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@rematch/core": "2.1.0",
|
"@rematch/core": "2.2.0",
|
||||||
"@rematch/loading": "2.1.0",
|
"@rematch/loading": "2.1.2",
|
||||||
"@sendgrid/mail": "7.4.7",
|
"@sendgrid/mail": "7.6.0",
|
||||||
"axios": "0.24.0",
|
"axios": "0.24.0",
|
||||||
"axios-retry": "^3.2.4",
|
"axios-retry": "^3.2.4",
|
||||||
"body-parser": "1.19.0",
|
"body-parser": "1.19.0",
|
||||||
"cookie-session": "1.4.0",
|
"cookie-session": "1.4.0",
|
||||||
"handlebars": "4.7.7",
|
"handlebars": "4.7.7",
|
||||||
"highcharts": "9.2.2",
|
"highcharts": "9.3.1",
|
||||||
"highcharts-react-official": "3.0.0",
|
"highcharts-react-official": "3.1.0",
|
||||||
"lowdb": "1.0.0",
|
"lowdb": "1.0.0",
|
||||||
"markdown": "^0.5.0",
|
"markdown": "^0.5.0",
|
||||||
"nanoid": "3.1.28",
|
"nanoid": "3.1.30",
|
||||||
"node-mailjet": "3.3.4",
|
"node-mailjet": "3.3.4",
|
||||||
|
"query-string": "^7.0.1",
|
||||||
"react": "17.0.2",
|
"react": "17.0.2",
|
||||||
"react-dom": "17.0.2",
|
"react-dom": "17.0.2",
|
||||||
"react-redux": "7.2.5",
|
"react-redux": "7.2.6",
|
||||||
"react-router": "5.2.1",
|
"react-router": "5.2.1",
|
||||||
"react-router-dom": "5.3.0",
|
"react-router-dom": "5.3.0",
|
||||||
"react-switch": "^6.0.0",
|
"react-switch": "^6.0.0",
|
||||||
"redux": "4.1.1",
|
"redux": "4.1.2",
|
||||||
"redux-thunk": "2.3.0",
|
"redux-thunk": "2.4.0",
|
||||||
"restana": "4.9.1",
|
"restana": "4.9.2",
|
||||||
"semantic-ui-react": "2.0.4",
|
"semantic-ui-react": "2.0.4",
|
||||||
"serve-static": "^1.14.1",
|
"serve-static": "^1.14.1",
|
||||||
"slack": "11.0.2",
|
"slack": "11.0.2",
|
||||||
@@ -82,32 +84,32 @@
|
|||||||
"x-ray": "2.3.4"
|
"x-ray": "2.3.4"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@babel/core": "7.15.5",
|
"@babel/core": "7.16.0",
|
||||||
"@babel/preset-env": "7.15.6",
|
"@babel/preset-env": "7.16.4",
|
||||||
"@babel/preset-react": "7.14.5",
|
"@babel/preset-react": "7.16.0",
|
||||||
"babel-eslint": "10.1.0",
|
"babel-eslint": "10.1.0",
|
||||||
"babel-loader": "8.2.2",
|
"babel-loader": "8.2.3",
|
||||||
"chai": "4.3.4",
|
"chai": "4.3.4",
|
||||||
"clean-webpack-plugin": "4.0.0",
|
"clean-webpack-plugin": "4.0.0",
|
||||||
"copy-webpack-plugin": "9.0.1",
|
"copy-webpack-plugin": "10.0.0",
|
||||||
"css-loader": "6.3.0",
|
"css-loader": "6.5.1",
|
||||||
"eslint": "7.32.0",
|
"eslint": "7.32.0",
|
||||||
"eslint-config-prettier": "8.3.0",
|
"eslint-config-prettier": "8.3.0",
|
||||||
"eslint-plugin-react": "7.26.1",
|
"eslint-plugin-react": "7.27.1",
|
||||||
"file-loader": "6.2.0",
|
"file-loader": "6.2.0",
|
||||||
"history": "5.0.1",
|
"history": "5.1.0",
|
||||||
"husky": "4.3.8",
|
"husky": "4.3.8",
|
||||||
"less": "4.1.1",
|
"less": "4.1.2",
|
||||||
"less-loader": "10.0.1",
|
"less-loader": "10.2.0",
|
||||||
"lint-staged": "11.1.2",
|
"lint-staged": "12.1.2",
|
||||||
"mocha": "9.1.2",
|
"mocha": "9.1.3",
|
||||||
"prettier": "2.4.1",
|
"prettier": "2.5.0",
|
||||||
"proxyquire": "2.1.3",
|
"proxyquire": "2.1.3",
|
||||||
"redux-logger": "3.0.6",
|
"redux-logger": "3.0.6",
|
||||||
"style-loader": "3.3.0",
|
"style-loader": "3.3.1",
|
||||||
"url-loader": "4.1.1",
|
"url-loader": "4.1.1",
|
||||||
"webpack": "5.56.0",
|
"webpack": "5.64.4",
|
||||||
"webpack-cli": "3.3.12",
|
"webpack-cli": "4.9.1",
|
||||||
"webpack-dev-server": "3.11.2",
|
"webpack-dev-server": "3.11.2",
|
||||||
"webpack-merge": "5.8.0"
|
"webpack-merge": "5.8.0"
|
||||||
}
|
}
|
||||||
|
|||||||
32
test/queryStringMutator/queryStringMutator.test.js
Normal file
32
test/queryStringMutator/queryStringMutator.test.js
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
const testData = require('./testData.json');
|
||||||
|
const expect = require('chai').expect;
|
||||||
|
const fs = require('fs');
|
||||||
|
|
||||||
|
const mutator = require('../../lib/services/queryStringMutator.js');
|
||||||
|
const queryString = require('query-string');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test test might look a bit weird at first, but listen stranger...
|
||||||
|
* It's not wise to compare 2 urls, as this means all url params must be in the expected order. This is however not
|
||||||
|
* guaranteed, as params (and their order) are totally variable.
|
||||||
|
*/
|
||||||
|
describe('queryStringMutator', () => {
|
||||||
|
it('should fix all urls', () => {
|
||||||
|
let _provider = fs.readdirSync('./lib/provider/').map((integPath) => require(`../../lib/provider/${integPath}`));
|
||||||
|
|
||||||
|
for (let test of testData) {
|
||||||
|
const provider = _provider.find((p) => p.metaInformation.id === test.id);
|
||||||
|
if (provider == null) {
|
||||||
|
throw new Error(`Cannot find provider for given id: ${test.id}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const fixedUrl = mutator(test.url, provider.config.sortByDateParam);
|
||||||
|
const expectedParams = queryString.parseUrl(test.shouldBecome);
|
||||||
|
const actualParams = queryString.parseUrl(fixedUrl);
|
||||||
|
|
||||||
|
//check if all new params are existing
|
||||||
|
expect(Object.keys(expectedParams.query)).to.include.members(Object.keys(actualParams.query));
|
||||||
|
expect(Object.values(expectedParams.query)).to.include.members(Object.values(actualParams.query));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
33
test/queryStringMutator/testData.json
Normal file
33
test/queryStringMutator/testData.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"url": "https://www.immowelt.de/liste/40589/wohnungen/mieten?d=true&sd=DESC&sf=PRIMARY_PRICE_AMOUNT&sp=1",
|
||||||
|
"shouldBecome": "https://www.immowelt.de/liste/40589/wohnungen/mieten?d=true&sd=DESC&sf=TIMESTAMP&sp=1",
|
||||||
|
"id": "immowelt"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://www.1a-immobilienmarkt.de/suchen/duesseldorf/wohnung-mieten.html?search=yes",
|
||||||
|
"shouldBecome": "https://www.1a-immobilienmarkt.de/suchen/duesseldorf/wohnung-mieten.html?search=yes&sort_type=newest",
|
||||||
|
"id": "einsAImmobilien"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://www.wg-gesucht.de/1-zimmer-wohnungen-in-Dusseldorf.30.1.1.0.html?sort_column=1&sort_order=0",
|
||||||
|
"shouldBecome": "https://www.wg-gesucht.de/1-zimmer-wohnungen-in-Dusseldorf.30.1.1.0.html?sort_column=0&sort_order=0",
|
||||||
|
"id": "wgGesucht"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"url": "https://www.immonet.de/immobiliensuche/sel.do?sortby=0&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
|
||||||
|
"shouldBecome": "https://www.immonet.de/immobiliensuche/sel.do?sortby=19&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
|
||||||
|
"id": "immonet"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten",
|
||||||
|
"shouldBecome": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?sorting=2",
|
||||||
|
"id": "immoscout"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/",
|
||||||
|
"shouldBecome": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/?Sortierung=Id&Richtung=DESC",
|
||||||
|
"id": "neubauKompass"
|
||||||
|
}
|
||||||
|
]
|
||||||
Reference in New Issue
Block a user