diff --git a/CHANGELOG.md b/CHANGELOG.md index 097b3c6..8a8a080 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +###### [V5.4.0] +- Upgrading dependencies +- Provider urls are now automagically been changed to include the correct sort order for search results + +``` +Note: It has been an point of confusion since the very beginning of Fredy, that people simply copied the url, but +did not take care of sorting the search results by date. If this is not done, Fredy will most likely not see the latest +results, thus cannot report them. This release fixes it by adding the necessary params (or replaces them). +``` + ###### [V5.3.0] - Upgrading dependencies - It's now possible to send mails to multiple receiver using comma separation for MailJet & Sendgrid diff --git a/lib/FredyRuntime.js b/lib/FredyRuntime.js index 412ac64..3d3f6a4 100755 --- a/lib/FredyRuntime.js +++ b/lib/FredyRuntime.js @@ -4,6 +4,7 @@ const { setKnownListings, getKnownListings } = require('./services/storage/listi const notify = require('./notification/notify'); const xray = require('./services/scraper'); const scrapingAnt = require('./services/scrapingAnt'); +const urlModifier = require('./services/queryStringMutator'); class FredyRuntime { /** @@ -24,7 +25,8 @@ class FredyRuntime { execute() { return ( - Promise.resolve(this._providerConfig.url) + //modify the url to make sure search order is correctly set + Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam)) //scraping the site and try finding new listings .then(this._getListings.bind(this)) //bring them in a proper form (dictated by the provider) diff --git a/lib/provider/einsAImmobilien.js b/lib/provider/einsAImmobilien.js index 1b4e752..cb7427c 100755 --- a/lib/provider/einsAImmobilien.js +++ b/lib/provider/einsAImmobilien.js @@ -22,6 +22,7 @@ function applyBlacklist(o) { const config = { url: null, crawlContainer: '.tabelle', + sortByDateParam: 'sort_type=newest', crawlFields: { id: '.inner_object_data input[name="marker_objekt_id"]@value | int', price: '.tabelle .inner_object_data .single_data_price | removeNewline | trim', diff --git a/lib/provider/immonet.js b/lib/provider/immonet.js index 3c8bb60..90243f7 100755 --- a/lib/provider/immonet.js +++ b/lib/provider/immonet.js @@ -24,6 +24,7 @@ function applyBlacklist(o) { const config = { url: null, crawlContainer: '#result-list-stage .item', + sortByDateParam: 'sortby=19', crawlFields: { id: '@id', price: 'div[id*="selPrice_"] | trim', diff --git a/lib/provider/immoscout.js b/lib/provider/immoscout.js index f7a52a4..4d81c2e 100644 --- a/lib/provider/immoscout.js +++ b/lib/provider/immoscout.js @@ -20,6 +20,7 @@ function applyBlacklist(o) { const config = { url: null, crawlContainer: '#resultListItems li.result-list__listing', + sortByDateParam: 'sorting=2', crawlFields: { id: '.result-list-entry@data-obid | int', price: '.result-list-entry .result-list-entry__criteria .grid-item:first-child dd | removeNewline | trim', diff --git a/lib/provider/immowelt.js b/lib/provider/immowelt.js index 5fa9e42..40781bd 100755 --- a/lib/provider/immowelt.js +++ b/lib/provider/immowelt.js @@ -16,6 +16,7 @@ function applyBlacklist(o) { const config = { url: null, crawlContainer: "div[class^='EstateItem-']", + sortByDateParam: 'sd=DESC&sf=TIMESTAMP', crawlFields: { id: 'a@id', price: "div[class^='KeyFacts-'] [data-test='price'] | removeNewline | trim", diff --git a/lib/provider/kleinanzeigen.js b/lib/provider/kleinanzeigen.js index 61755ba..f972e29 100755 --- a/lib/provider/kleinanzeigen.js +++ b/lib/provider/kleinanzeigen.js @@ -21,6 +21,8 @@ function applyBlacklist(o) { const config = { url: null, crawlContainer: '#srchrslt-adtable .ad-listitem ', + //sort by date is standard oO + sortByDateParam: null, crawlFields: { id: '.aditem@data-adid | int', price: '.aditem-main--middle--price | removeNewline | trim', diff --git a/lib/provider/neubauKompass.js b/lib/provider/neubauKompass.js index cd9a091..90721bd 100755 --- a/lib/provider/neubauKompass.js +++ b/lib/provider/neubauKompass.js @@ -13,6 +13,7 @@ function applyBlacklist(o) { const config = { url: null, crawlContainer: '.nbk-container >div article', + sortByDateParam: 'Sortierung=Id&Richtung=DESC', crawlFields: { id: '@id', title: 'a.nbk-truncate@title | removeNewline | trim', diff --git a/lib/provider/wgGesucht.js b/lib/provider/wgGesucht.js index 73e76ba..19d67cf 100755 --- a/lib/provider/wgGesucht.js +++ b/lib/provider/wgGesucht.js @@ -16,6 +16,7 @@ function applyBlacklist(o) { const config = { url: null, crawlContainer: '#main_column .wgg_card', + sortByDateParam: 'sort_column=0&sort_order=0', crawlFields: { id: '@data-id', details: '.row .noprint .col-xs-11 |removeNewline |trim', diff --git a/lib/services/queryStringMutator.js b/lib/services/queryStringMutator.js new file mode 100644 index 0000000..1354425 --- /dev/null +++ b/lib/services/queryStringMutator.js @@ -0,0 +1,22 @@ +const queryString = require('query-string'); + +/** + * for Fredy, it is important to sort search results by date, starting with the latest listing. if it is not sorted, we + * might never actually find the newest results, no matter how many pages we crawl. + * It has been written in the documentation, but obviously nobody reads docu theses days which is why it's been done + * automagically now. + * + * @param _url actual provider url containing the searchParams + * @param sortByDateParam param(s) indicating the correct sort order + * @returns {`${string}?${string}`} correctly formatted url + */ +module.exports = (_url, sortByDateParam) => { + //if no mutation is necessary, just return the original url + if (sortByDateParam == null) { + return _url; + } + + const original = queryString.parseUrl(_url); + const mutate = queryString.parse(sortByDateParam); + return `${original.url}?${queryString.stringify({ ...original.query, ...mutate })}`; +}; diff --git a/package.json b/package.json index 2e4aab2..45ddb50 100755 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "fredy", - "version": "5.3.2", + "version": "5.4.ß", "description": "[F]ind [R]eal [E]states [d]amn eas[y].", "scripts": { "start": "node index.js", @@ -67,6 +67,7 @@ "markdown": "^0.5.0", "nanoid": "3.1.30", "node-mailjet": "3.3.4", + "query-string": "^7.0.1", "react": "17.0.2", "react-dom": "17.0.2", "react-redux": "7.2.6", diff --git a/test/queryStringMutator/queryStringMutator.test.js b/test/queryStringMutator/queryStringMutator.test.js new file mode 100644 index 0000000..2ad419a --- /dev/null +++ b/test/queryStringMutator/queryStringMutator.test.js @@ -0,0 +1,32 @@ +const testData = require('./testData.json'); +const expect = require('chai').expect; +const fs = require('fs'); + +const mutator = require('../../lib/services/queryStringMutator.js'); +const queryString = require('query-string'); + +/** + * Test test might look a bit weird at first, but listen stranger... + * It's not wise to compare 2 urls, as this means all url params must be in the expected order. This is however not + * guaranteed, as params (and their order) are totally variable. + */ +describe('queryStringMutator', () => { + it('should fix all urls', () => { + let _provider = fs.readdirSync('./lib/provider/').map((integPath) => require(`../../lib/provider/${integPath}`)); + + for (let test of testData) { + const provider = _provider.find((p) => p.metaInformation.id === test.id); + if (provider == null) { + throw new Error(`Cannot find provider for given id: ${test.id}`); + } + + const fixedUrl = mutator(test.url, provider.config.sortByDateParam); + const expectedParams = queryString.parseUrl(test.shouldBecome); + const actualParams = queryString.parseUrl(fixedUrl); + + //check if all new params are existing + expect(Object.keys(expectedParams.query)).to.include.members(Object.keys(actualParams.query)); + expect(Object.values(expectedParams.query)).to.include.members(Object.values(actualParams.query)); + } + }); +}); diff --git a/test/queryStringMutator/testData.json b/test/queryStringMutator/testData.json new file mode 100644 index 0000000..17c01ed --- /dev/null +++ b/test/queryStringMutator/testData.json @@ -0,0 +1,33 @@ +[ + { + "url": "https://www.immowelt.de/liste/40589/wohnungen/mieten?d=true&sd=DESC&sf=PRIMARY_PRICE_AMOUNT&sp=1", + "shouldBecome": "https://www.immowelt.de/liste/40589/wohnungen/mieten?d=true&sd=DESC&sf=TIMESTAMP&sp=1", + "id": "immowelt" + }, + { + "url": "https://www.1a-immobilienmarkt.de/suchen/duesseldorf/wohnung-mieten.html?search=yes", + "shouldBecome": "https://www.1a-immobilienmarkt.de/suchen/duesseldorf/wohnung-mieten.html?search=yes&sort_type=newest", + "id": "einsAImmobilien" + }, + { + "url": "https://www.wg-gesucht.de/1-zimmer-wohnungen-in-Dusseldorf.30.1.1.0.html?sort_column=1&sort_order=0", + "shouldBecome": "https://www.wg-gesucht.de/1-zimmer-wohnungen-in-Dusseldorf.30.1.1.0.html?sort_column=0&sort_order=0", + "id": "wgGesucht" + }, + + { + "url": "https://www.immonet.de/immobiliensuche/sel.do?sortby=0&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf", + "shouldBecome": "https://www.immonet.de/immobiliensuche/sel.do?sortby=19&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf", + "id": "immonet" + }, + { + "url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten", + "shouldBecome": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?sorting=2", + "id": "immoscout" + }, + { + "url": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/", + "shouldBecome": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/?Sortierung=Id&Richtung=DESC", + "id": "neubauKompass" + } +] \ No newline at end of file diff --git a/yarn.lock b/yarn.lock index 7472172..54dc9b9 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3631,6 +3631,11 @@ fill-range@^7.0.1: dependencies: to-regex-range "^5.0.1" +filter-obj@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/filter-obj/-/filter-obj-1.1.0.tgz#9b311112bc6c6127a16e016c6c5d7f19e0805c5b" + integrity sha1-mzERErxsYSehbgFsbF1/GeCAXFs= + finalhandler@~1.1.2: version "1.1.2" resolved "https://registry.yarnpkg.com/finalhandler/-/finalhandler-1.1.2.tgz#b7e7d000ffd11938d0fdb053506f6ebabe9f587d" @@ -6335,6 +6340,16 @@ qs@^6.9.4: dependencies: side-channel "^1.0.4" +query-string@^7.0.1: + version "7.0.1" + resolved "https://registry.yarnpkg.com/query-string/-/query-string-7.0.1.tgz#45bd149cf586aaa582dffc7ec7a8ad97dd02f75d" + integrity sha512-uIw3iRvHnk9to1blJCG3BTc+Ro56CBowJXKmNNAm3RulvPBzWLRqKSiiDk+IplJhsydwtuNMHi8UGQFcCLVfkA== + dependencies: + decode-uri-component "^0.2.0" + filter-obj "^1.1.0" + split-on-first "^1.0.0" + strict-uri-encode "^2.0.0" + querystring@0.2.0, querystring@^0.2.0: version "0.2.0" resolved "https://registry.yarnpkg.com/querystring/-/querystring-0.2.0.tgz#b209849203bb25df820da756e747005878521620" @@ -7276,6 +7291,11 @@ spdy@^4.0.2: select-hose "^2.0.0" spdy-transport "^3.0.0" +split-on-first@^1.0.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/split-on-first/-/split-on-first-1.1.0.tgz#f610afeee3b12bce1d0c30425e76398b78249a5f" + integrity sha512-43ZssAJaMusuKWL8sKUBQXHWOpq8d6CfN/u1p4gUzfJkM05C8rxTmYrkIPTXapZpORA6LkkzcUulJ8FqA7Uudw== + split-string@^3.0.1, split-string@^3.0.2: version "3.1.0" resolved "https://registry.yarnpkg.com/split-string/-/split-string-3.1.0.tgz#7cb09dda3a86585705c64b39a6466038682e8fe2" @@ -7315,6 +7335,11 @@ stream-to-string@^1.1.0: dependencies: promise-polyfill "^1.1.6" +strict-uri-encode@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/strict-uri-encode/-/strict-uri-encode-2.0.0.tgz#b9c7330c7042862f6b142dc274bbcc5866ce3546" + integrity sha1-ucczDHBChi9rFC3CdLvMWGbONUY= + string-argv@^0.3.1: version "0.3.1" resolved "https://registry.yarnpkg.com/string-argv/-/string-argv-0.3.1.tgz#95e2fbec0427ae19184935f816d74aaa4c5c19da"