mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
adding or replacing sort params for provider urls when necessary
This commit is contained in:
10
CHANGELOG.md
10
CHANGELOG.md
@@ -1,3 +1,13 @@
|
||||
###### [V5.4.0]
|
||||
- Upgrading dependencies
|
||||
- Provider urls are now automagically been changed to include the correct sort order for search results
|
||||
|
||||
```
|
||||
Note: It has been an point of confusion since the very beginning of Fredy, that people simply copied the url, but
|
||||
did not take care of sorting the search results by date. If this is not done, Fredy will most likely not see the latest
|
||||
results, thus cannot report them. This release fixes it by adding the necessary params (or replaces them).
|
||||
```
|
||||
|
||||
###### [V5.3.0]
|
||||
- Upgrading dependencies
|
||||
- It's now possible to send mails to multiple receiver using comma separation for MailJet & Sendgrid
|
||||
|
||||
@@ -4,6 +4,7 @@ const { setKnownListings, getKnownListings } = require('./services/storage/listi
|
||||
const notify = require('./notification/notify');
|
||||
const xray = require('./services/scraper');
|
||||
const scrapingAnt = require('./services/scrapingAnt');
|
||||
const urlModifier = require('./services/queryStringMutator');
|
||||
|
||||
class FredyRuntime {
|
||||
/**
|
||||
@@ -24,7 +25,8 @@ class FredyRuntime {
|
||||
|
||||
execute() {
|
||||
return (
|
||||
Promise.resolve(this._providerConfig.url)
|
||||
//modify the url to make sure search order is correctly set
|
||||
Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam))
|
||||
//scraping the site and try finding new listings
|
||||
.then(this._getListings.bind(this))
|
||||
//bring them in a proper form (dictated by the provider)
|
||||
|
||||
@@ -22,6 +22,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.tabelle',
|
||||
sortByDateParam: 'sort_type=newest',
|
||||
crawlFields: {
|
||||
id: '.inner_object_data input[name="marker_objekt_id"]@value | int',
|
||||
price: '.tabelle .inner_object_data .single_data_price | removeNewline | trim',
|
||||
|
||||
@@ -24,6 +24,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#result-list-stage .item',
|
||||
sortByDateParam: 'sortby=19',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
price: 'div[id*="selPrice_"] | trim',
|
||||
|
||||
@@ -20,6 +20,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#resultListItems li.result-list__listing',
|
||||
sortByDateParam: 'sorting=2',
|
||||
crawlFields: {
|
||||
id: '.result-list-entry@data-obid | int',
|
||||
price: '.result-list-entry .result-list-entry__criteria .grid-item:first-child dd | removeNewline | trim',
|
||||
|
||||
@@ -16,6 +16,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: "div[class^='EstateItem-']",
|
||||
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
|
||||
crawlFields: {
|
||||
id: 'a@id',
|
||||
price: "div[class^='KeyFacts-'] [data-test='price'] | removeNewline | trim",
|
||||
|
||||
@@ -21,6 +21,8 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#srchrslt-adtable .ad-listitem ',
|
||||
//sort by date is standard oO
|
||||
sortByDateParam: null,
|
||||
crawlFields: {
|
||||
id: '.aditem@data-adid | int',
|
||||
price: '.aditem-main--middle--price | removeNewline | trim',
|
||||
|
||||
@@ -13,6 +13,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.nbk-container >div article',
|
||||
sortByDateParam: 'Sortierung=Id&Richtung=DESC',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
title: 'a.nbk-truncate@title | removeNewline | trim',
|
||||
|
||||
@@ -16,6 +16,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#main_column .wgg_card',
|
||||
sortByDateParam: 'sort_column=0&sort_order=0',
|
||||
crawlFields: {
|
||||
id: '@data-id',
|
||||
details: '.row .noprint .col-xs-11 |removeNewline |trim',
|
||||
|
||||
22
lib/services/queryStringMutator.js
Normal file
22
lib/services/queryStringMutator.js
Normal file
@@ -0,0 +1,22 @@
|
||||
const queryString = require('query-string');
|
||||
|
||||
/**
|
||||
* for Fredy, it is important to sort search results by date, starting with the latest listing. if it is not sorted, we
|
||||
* might never actually find the newest results, no matter how many pages we crawl.
|
||||
* It has been written in the documentation, but obviously nobody reads docu theses days which is why it's been done
|
||||
* automagically now.
|
||||
*
|
||||
* @param _url actual provider url containing the searchParams
|
||||
* @param sortByDateParam param(s) indicating the correct sort order
|
||||
* @returns {`${string}?${string}`} correctly formatted url
|
||||
*/
|
||||
module.exports = (_url, sortByDateParam) => {
|
||||
//if no mutation is necessary, just return the original url
|
||||
if (sortByDateParam == null) {
|
||||
return _url;
|
||||
}
|
||||
|
||||
const original = queryString.parseUrl(_url);
|
||||
const mutate = queryString.parse(sortByDateParam);
|
||||
return `${original.url}?${queryString.stringify({ ...original.query, ...mutate })}`;
|
||||
};
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "fredy",
|
||||
"version": "5.3.2",
|
||||
"version": "5.4.ß",
|
||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||
"scripts": {
|
||||
"start": "node index.js",
|
||||
@@ -67,6 +67,7 @@
|
||||
"markdown": "^0.5.0",
|
||||
"nanoid": "3.1.30",
|
||||
"node-mailjet": "3.3.4",
|
||||
"query-string": "^7.0.1",
|
||||
"react": "17.0.2",
|
||||
"react-dom": "17.0.2",
|
||||
"react-redux": "7.2.6",
|
||||
|
||||
32
test/queryStringMutator/queryStringMutator.test.js
Normal file
32
test/queryStringMutator/queryStringMutator.test.js
Normal file
@@ -0,0 +1,32 @@
|
||||
const testData = require('./testData.json');
|
||||
const expect = require('chai').expect;
|
||||
const fs = require('fs');
|
||||
|
||||
const mutator = require('../../lib/services/queryStringMutator.js');
|
||||
const queryString = require('query-string');
|
||||
|
||||
/**
|
||||
* Test test might look a bit weird at first, but listen stranger...
|
||||
* It's not wise to compare 2 urls, as this means all url params must be in the expected order. This is however not
|
||||
* guaranteed, as params (and their order) are totally variable.
|
||||
*/
|
||||
describe('queryStringMutator', () => {
|
||||
it('should fix all urls', () => {
|
||||
let _provider = fs.readdirSync('./lib/provider/').map((integPath) => require(`../../lib/provider/${integPath}`));
|
||||
|
||||
for (let test of testData) {
|
||||
const provider = _provider.find((p) => p.metaInformation.id === test.id);
|
||||
if (provider == null) {
|
||||
throw new Error(`Cannot find provider for given id: ${test.id}`);
|
||||
}
|
||||
|
||||
const fixedUrl = mutator(test.url, provider.config.sortByDateParam);
|
||||
const expectedParams = queryString.parseUrl(test.shouldBecome);
|
||||
const actualParams = queryString.parseUrl(fixedUrl);
|
||||
|
||||
//check if all new params are existing
|
||||
expect(Object.keys(expectedParams.query)).to.include.members(Object.keys(actualParams.query));
|
||||
expect(Object.values(expectedParams.query)).to.include.members(Object.values(actualParams.query));
|
||||
}
|
||||
});
|
||||
});
|
||||
33
test/queryStringMutator/testData.json
Normal file
33
test/queryStringMutator/testData.json
Normal file
@@ -0,0 +1,33 @@
|
||||
[
|
||||
{
|
||||
"url": "https://www.immowelt.de/liste/40589/wohnungen/mieten?d=true&sd=DESC&sf=PRIMARY_PRICE_AMOUNT&sp=1",
|
||||
"shouldBecome": "https://www.immowelt.de/liste/40589/wohnungen/mieten?d=true&sd=DESC&sf=TIMESTAMP&sp=1",
|
||||
"id": "immowelt"
|
||||
},
|
||||
{
|
||||
"url": "https://www.1a-immobilienmarkt.de/suchen/duesseldorf/wohnung-mieten.html?search=yes",
|
||||
"shouldBecome": "https://www.1a-immobilienmarkt.de/suchen/duesseldorf/wohnung-mieten.html?search=yes&sort_type=newest",
|
||||
"id": "einsAImmobilien"
|
||||
},
|
||||
{
|
||||
"url": "https://www.wg-gesucht.de/1-zimmer-wohnungen-in-Dusseldorf.30.1.1.0.html?sort_column=1&sort_order=0",
|
||||
"shouldBecome": "https://www.wg-gesucht.de/1-zimmer-wohnungen-in-Dusseldorf.30.1.1.0.html?sort_column=0&sort_order=0",
|
||||
"id": "wgGesucht"
|
||||
},
|
||||
|
||||
{
|
||||
"url": "https://www.immonet.de/immobiliensuche/sel.do?sortby=0&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
|
||||
"shouldBecome": "https://www.immonet.de/immobiliensuche/sel.do?sortby=19&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
|
||||
"id": "immonet"
|
||||
},
|
||||
{
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten",
|
||||
"shouldBecome": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?sorting=2",
|
||||
"id": "immoscout"
|
||||
},
|
||||
{
|
||||
"url": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/",
|
||||
"shouldBecome": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/?Sortierung=Id&Richtung=DESC",
|
||||
"id": "neubauKompass"
|
||||
}
|
||||
]
|
||||
25
yarn.lock
25
yarn.lock
@@ -3631,6 +3631,11 @@ fill-range@^7.0.1:
|
||||
dependencies:
|
||||
to-regex-range "^5.0.1"
|
||||
|
||||
filter-obj@^1.1.0:
|
||||
version "1.1.0"
|
||||
resolved "https://registry.yarnpkg.com/filter-obj/-/filter-obj-1.1.0.tgz#9b311112bc6c6127a16e016c6c5d7f19e0805c5b"
|
||||
integrity sha1-mzERErxsYSehbgFsbF1/GeCAXFs=
|
||||
|
||||
finalhandler@~1.1.2:
|
||||
version "1.1.2"
|
||||
resolved "https://registry.yarnpkg.com/finalhandler/-/finalhandler-1.1.2.tgz#b7e7d000ffd11938d0fdb053506f6ebabe9f587d"
|
||||
@@ -6335,6 +6340,16 @@ qs@^6.9.4:
|
||||
dependencies:
|
||||
side-channel "^1.0.4"
|
||||
|
||||
query-string@^7.0.1:
|
||||
version "7.0.1"
|
||||
resolved "https://registry.yarnpkg.com/query-string/-/query-string-7.0.1.tgz#45bd149cf586aaa582dffc7ec7a8ad97dd02f75d"
|
||||
integrity sha512-uIw3iRvHnk9to1blJCG3BTc+Ro56CBowJXKmNNAm3RulvPBzWLRqKSiiDk+IplJhsydwtuNMHi8UGQFcCLVfkA==
|
||||
dependencies:
|
||||
decode-uri-component "^0.2.0"
|
||||
filter-obj "^1.1.0"
|
||||
split-on-first "^1.0.0"
|
||||
strict-uri-encode "^2.0.0"
|
||||
|
||||
querystring@0.2.0, querystring@^0.2.0:
|
||||
version "0.2.0"
|
||||
resolved "https://registry.yarnpkg.com/querystring/-/querystring-0.2.0.tgz#b209849203bb25df820da756e747005878521620"
|
||||
@@ -7276,6 +7291,11 @@ spdy@^4.0.2:
|
||||
select-hose "^2.0.0"
|
||||
spdy-transport "^3.0.0"
|
||||
|
||||
split-on-first@^1.0.0:
|
||||
version "1.1.0"
|
||||
resolved "https://registry.yarnpkg.com/split-on-first/-/split-on-first-1.1.0.tgz#f610afeee3b12bce1d0c30425e76398b78249a5f"
|
||||
integrity sha512-43ZssAJaMusuKWL8sKUBQXHWOpq8d6CfN/u1p4gUzfJkM05C8rxTmYrkIPTXapZpORA6LkkzcUulJ8FqA7Uudw==
|
||||
|
||||
split-string@^3.0.1, split-string@^3.0.2:
|
||||
version "3.1.0"
|
||||
resolved "https://registry.yarnpkg.com/split-string/-/split-string-3.1.0.tgz#7cb09dda3a86585705c64b39a6466038682e8fe2"
|
||||
@@ -7315,6 +7335,11 @@ stream-to-string@^1.1.0:
|
||||
dependencies:
|
||||
promise-polyfill "^1.1.6"
|
||||
|
||||
strict-uri-encode@^2.0.0:
|
||||
version "2.0.0"
|
||||
resolved "https://registry.yarnpkg.com/strict-uri-encode/-/strict-uri-encode-2.0.0.tgz#b9c7330c7042862f6b142dc274bbcc5866ce3546"
|
||||
integrity sha1-ucczDHBChi9rFC3CdLvMWGbONUY=
|
||||
|
||||
string-argv@^0.3.1:
|
||||
version "0.3.1"
|
||||
resolved "https://registry.yarnpkg.com/string-argv/-/string-argv-0.3.1.tgz#95e2fbec0427ae19184935f816d74aaa4c5c19da"
|
||||
|
||||
Reference in New Issue
Block a user