mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
adding or replacing sort params for provider urls when necessary
This commit is contained in:
@@ -4,6 +4,7 @@ const { setKnownListings, getKnownListings } = require('./services/storage/listi
|
||||
const notify = require('./notification/notify');
|
||||
const xray = require('./services/scraper');
|
||||
const scrapingAnt = require('./services/scrapingAnt');
|
||||
const urlModifier = require('./services/queryStringMutator');
|
||||
|
||||
class FredyRuntime {
|
||||
/**
|
||||
@@ -24,7 +25,8 @@ class FredyRuntime {
|
||||
|
||||
execute() {
|
||||
return (
|
||||
Promise.resolve(this._providerConfig.url)
|
||||
//modify the url to make sure search order is correctly set
|
||||
Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam))
|
||||
//scraping the site and try finding new listings
|
||||
.then(this._getListings.bind(this))
|
||||
//bring them in a proper form (dictated by the provider)
|
||||
|
||||
@@ -22,6 +22,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.tabelle',
|
||||
sortByDateParam: 'sort_type=newest',
|
||||
crawlFields: {
|
||||
id: '.inner_object_data input[name="marker_objekt_id"]@value | int',
|
||||
price: '.tabelle .inner_object_data .single_data_price | removeNewline | trim',
|
||||
|
||||
@@ -24,6 +24,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#result-list-stage .item',
|
||||
sortByDateParam: 'sortby=19',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
price: 'div[id*="selPrice_"] | trim',
|
||||
|
||||
@@ -20,6 +20,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#resultListItems li.result-list__listing',
|
||||
sortByDateParam: 'sorting=2',
|
||||
crawlFields: {
|
||||
id: '.result-list-entry@data-obid | int',
|
||||
price: '.result-list-entry .result-list-entry__criteria .grid-item:first-child dd | removeNewline | trim',
|
||||
|
||||
@@ -16,6 +16,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: "div[class^='EstateItem-']",
|
||||
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
|
||||
crawlFields: {
|
||||
id: 'a@id',
|
||||
price: "div[class^='KeyFacts-'] [data-test='price'] | removeNewline | trim",
|
||||
|
||||
@@ -21,6 +21,8 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#srchrslt-adtable .ad-listitem ',
|
||||
//sort by date is standard oO
|
||||
sortByDateParam: null,
|
||||
crawlFields: {
|
||||
id: '.aditem@data-adid | int',
|
||||
price: '.aditem-main--middle--price | removeNewline | trim',
|
||||
|
||||
@@ -13,6 +13,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.nbk-container >div article',
|
||||
sortByDateParam: 'Sortierung=Id&Richtung=DESC',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
title: 'a.nbk-truncate@title | removeNewline | trim',
|
||||
|
||||
@@ -16,6 +16,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#main_column .wgg_card',
|
||||
sortByDateParam: 'sort_column=0&sort_order=0',
|
||||
crawlFields: {
|
||||
id: '@data-id',
|
||||
details: '.row .noprint .col-xs-11 |removeNewline |trim',
|
||||
|
||||
22
lib/services/queryStringMutator.js
Normal file
22
lib/services/queryStringMutator.js
Normal file
@@ -0,0 +1,22 @@
|
||||
const queryString = require('query-string');
|
||||
|
||||
/**
|
||||
* for Fredy, it is important to sort search results by date, starting with the latest listing. if it is not sorted, we
|
||||
* might never actually find the newest results, no matter how many pages we crawl.
|
||||
* It has been written in the documentation, but obviously nobody reads docu theses days which is why it's been done
|
||||
* automagically now.
|
||||
*
|
||||
* @param _url actual provider url containing the searchParams
|
||||
* @param sortByDateParam param(s) indicating the correct sort order
|
||||
* @returns {`${string}?${string}`} correctly formatted url
|
||||
*/
|
||||
module.exports = (_url, sortByDateParam) => {
|
||||
//if no mutation is necessary, just return the original url
|
||||
if (sortByDateParam == null) {
|
||||
return _url;
|
||||
}
|
||||
|
||||
const original = queryString.parseUrl(_url);
|
||||
const mutate = queryString.parse(sortByDateParam);
|
||||
return `${original.url}?${queryString.stringify({ ...original.query, ...mutate })}`;
|
||||
};
|
||||
Reference in New Issue
Block a user