Files
fredy/lib/FredyRuntime.js
Thomas Brockmöller dd5c5b29d9 Fix address value in similarity filtering (#191)
* Fix address field in similarity filter
2025-09-25 15:02:00 +02:00

120 lines
4.4 KiB
JavaScript
Executable File

import { NoNewListingsWarning } from './errors.js';
import { storeListings, getKnownListingHashesForJobAndProvider } from './services/storage/listingsStorage.js';
import * as notify from './notification/notify.js';
import Extractor from './services/extractor/extractor.js';
import urlModifier from './services/queryStringMutator.js';
import logger from './services/logger.js';
class FredyRuntime {
/**
*
* @param providerConfig the config for the specific provider, we're going to query at the moment
* @param notificationConfig the config for all notifications
* @param providerId the id of the provider currently in use
* @param jobKey key of the job that is currently running (from within the config)
* @param similarityCache cache instance holding values to check for similarity of entries
*/
constructor(providerConfig, notificationConfig, providerId, jobKey, similarityCache) {
this._providerConfig = providerConfig;
this._notificationConfig = notificationConfig;
this._providerId = providerId;
this._jobKey = jobKey;
this._similarityCache = similarityCache;
}
execute() {
return (
//modify the url to make sure search order is correctly set
Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam))
//scraping the site and try finding new listings
.then(this._providerConfig.getListings?.bind(this) ?? this._getListings.bind(this))
//bring them in a proper form (dictated by the provider)
.then(this._normalize.bind(this))
//filter listings with stuff tagged by the blacklist of the provider
.then(this._filter.bind(this))
//check if new listings available. if so proceed
.then(this._findNew.bind(this))
//store everything in db
.then(this._save.bind(this))
//check for similar listings. if found, remove them before notifying
.then(this._filterBySimilarListings.bind(this))
//notify the user using the configured notification adapter
.then(this._notify.bind(this))
//if an error occurred on the way, handle it here.
.catch(this._handleError.bind(this))
);
}
_getListings(url) {
const extractor = new Extractor();
return new Promise((resolve, reject) => {
extractor
.execute(url, this._providerConfig.waitForSelector)
.then(() => {
const listings = extractor.parseResponseText(
this._providerConfig.crawlContainer,
this._providerConfig.crawlFields,
url,
);
resolve(listings == null ? [] : listings);
})
.catch((err) => {
reject(err);
logger.error(err);
});
});
}
_normalize(listings) {
return listings.map(this._providerConfig.normalize);
}
_filter(listings) {
//only return those where all the fields have been found
const keys = Object.keys(this._providerConfig.crawlFields);
const filteredListings = listings.filter((item) => keys.every((key) => key in item));
return filteredListings.filter(this._providerConfig.filter);
}
_findNew(listings) {
const hashes = getKnownListingHashesForJobAndProvider(this._jobKey, this._providerId) || [];
const newListings = listings.filter((o) => !hashes.includes(o.id));
if (newListings.length === 0) {
throw new NoNewListingsWarning();
}
return newListings;
}
_notify(newListings) {
if (newListings.length === 0) {
throw new NoNewListingsWarning();
}
const sendNotifications = notify.send(this._providerId, newListings, this._notificationConfig, this._jobKey);
return Promise.all(sendNotifications).then(() => newListings);
}
_save(newListings) {
storeListings(this._jobKey, this._providerId, newListings);
return newListings;
}
_filterBySimilarListings(listings) {
const filteredList = listings.filter((listing) => {
const similar = this._similarityCache.hasSimilarEntries(listing.title, listing.address);
if (similar) {
logger.debug(`Filtering similar entry for title: ${listing.title} and address ${listing.address}`);
}
return !similar;
});
filteredList.forEach((filter) => this._similarityCache.addCacheEntry(filter.title, filter.address));
return filteredList;
}
_handleError(err) {
if (err.name !== 'NoNewListingsWarning') logger.error(err);
}
}
export default FredyRuntime;