diff --git a/lib/FredyRuntime.js b/lib/FredyRuntime.js index 29f54f2..0d9b50c 100755 --- a/lib/FredyRuntime.js +++ b/lib/FredyRuntime.js @@ -5,14 +5,54 @@ import Extractor from './services/extractor/extractor.js'; import urlModifier from './services/queryStringMutator.js'; import logger from './services/logger.js'; +/** + * @typedef {Object} Listing + * @property {string} id Stable unique identifier (hash) of the listing. + * @property {string} title Title or headline of the listing. + * @property {string} [address] Optional address/location text. + * @property {string} [price] Optional price text/value. + * @property {string} [url] Link to the listing detail page. + * @property {any} [meta] Provider-specific additional metadata. + */ + +/** + * @typedef {Object} SimilarityCache + * @property {(title:string, address?:string)=>boolean} hasSimilarEntries Returns true if a similar entry is known. + * @property {(title:string, address?:string)=>void} addCacheEntry Adds a new entry to the similarity cache. + */ + +/** + * Runtime orchestrator for fetching, normalizing, filtering, deduplicating, storing, + * and notifying about new listings from a configured provider. + * + * The execution flow is: + * 1) Prepare provider URL (sorting, etc.) + * 2) Extract raw listings from the provider + * 3) Normalize listings to the provider schema + * 4) Filter out incomplete/blacklisted listings + * 5) Identify new listings (vs. previously stored hashes) + * 6) Persist new listings + * 7) Filter out entries similar to already seen ones + * 8) Dispatch notifications + */ class FredyRuntime { /** + * Create a new runtime instance for a single provider/job execution. * - * @param providerConfig the config for the specific provider, we're going to query at the moment - * @param notificationConfig the config for all notifications - * @param providerId the id of the provider currently in use - * @param jobKey key of the job that is currently running (from within the config) - * @param similarityCache cache instance holding values to check for similarity of entries + * @param {Object} providerConfig Provider configuration. + * @param {string} providerConfig.url Base URL to crawl. + * @param {string} [providerConfig.sortByDateParam] Query parameter used to enforce sorting by date (provider-specific). + * @param {string} [providerConfig.waitForSelector] CSS selector to wait for before parsing content. + * @param {Object.} providerConfig.crawlFields Mapping of field names to selectors/paths to extract. + * @param {string} providerConfig.crawlContainer CSS selector for the container holding listing items. + * @param {(raw:any)=>Listing} providerConfig.normalize Function to convert raw scraped data into a Listing shape. + * @param {(listing:Listing)=>boolean} providerConfig.filter Function to filter out unwanted listings. + * @param {(url:string, waitForSelector?:string)=>Promise|Promise} [providerConfig.getListings] Optional override to fetch listings. + * + * @param {Object} notificationConfig Notification configuration passed to notification adapters. + * @param {string} providerId The ID of the provider currently in use. + * @param {string} jobKey Key of the job that is currently running (from within the config). + * @param {SimilarityCache} similarityCache Cache instance for checking similar entries. */ constructor(providerConfig, notificationConfig, providerId, jobKey, similarityCache) { this._providerConfig = providerConfig; @@ -22,29 +62,31 @@ class FredyRuntime { this._similarityCache = similarityCache; } + /** + * Execute the end-to-end pipeline for a single provider run. + * + * @returns {Promise} Resolves to the list of new (and similarity-filtered) listings + * after notifications have been sent; resolves to void when there are no new listings. + */ execute() { - return ( - //modify the url to make sure search order is correctly set - Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam)) - //scraping the site and try finding new listings - .then(this._providerConfig.getListings?.bind(this) ?? this._getListings.bind(this)) - //bring them in a proper form (dictated by the provider) - .then(this._normalize.bind(this)) - //filter listings with stuff tagged by the blacklist of the provider - .then(this._filter.bind(this)) - //check if new listings available. if so proceed - .then(this._findNew.bind(this)) - //store everything in db - .then(this._save.bind(this)) - //check for similar listings. if found, remove them before notifying - .then(this._filterBySimilarListings.bind(this)) - //notify the user using the configured notification adapter - .then(this._notify.bind(this)) - //if an error occurred on the way, handle it here. - .catch(this._handleError.bind(this)) - ); + return Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam)) + .then(this._providerConfig.getListings?.bind(this) ?? this._getListings.bind(this)) + .then(this._normalize.bind(this)) + .then(this._filter.bind(this)) + .then(this._findNew.bind(this)) + .then(this._save.bind(this)) + .then(this._filterBySimilarListings.bind(this)) + .then(this._notify.bind(this)) + .catch(this._handleError.bind(this)); } + /** + * Fetch listings from the provider, using the default Extractor flow unless + * a provider-specific getListings override is supplied. + * + * @param {string} url The provider URL to fetch from. + * @returns {Promise} Resolves with an array of listings (empty when none found). + */ _getListings(url) { const extractor = new Extractor(); return new Promise((resolve, reject) => { @@ -65,17 +107,36 @@ class FredyRuntime { }); } + /** + * Normalize raw listings into the provider-specific Listing shape. + * + * @param {any[]} listings Raw listing entries from the extractor or override. + * @returns {Listing[]} Normalized listings. + */ _normalize(listings) { return listings.map(this._providerConfig.normalize); } + /** + * Filter out listings that are missing required fields and those rejected by the + * provider's blacklist/filter function. + * + * @param {Listing[]} listings Listings to filter. + * @returns {Listing[]} Filtered listings that pass validation and provider filter. + */ _filter(listings) { - //only return those where all the fields have been found const keys = Object.keys(this._providerConfig.crawlFields); const filteredListings = listings.filter((item) => keys.every((key) => key in item)); return filteredListings.filter(this._providerConfig.filter); } + /** + * Determine which listings are new by comparing their IDs against stored hashes. + * + * @param {Listing[]} listings Listings to evaluate for novelty. + * @returns {Listing[]} New listings not seen before. + * @throws {NoNewListingsWarning} When no new listings are found. + */ _findNew(listings) { logger.debug(`Checking ${listings.length} listings for new entries (Provider: '${this._providerId}')`); const hashes = getKnownListingHashesForJobAndProvider(this._jobKey, this._providerId) || []; @@ -87,6 +148,13 @@ class FredyRuntime { return newListings; } + /** + * Send notifications for new listings using the configured notification adapter(s). + * + * @param {Listing[]} newListings New listings to notify about. + * @returns {Promise} Resolves to the provided listings after notifications complete. + * @throws {NoNewListingsWarning} When there are no listings to notify about. + */ _notify(newListings) { if (newListings.length === 0) { throw new NoNewListingsWarning(); @@ -95,12 +163,25 @@ class FredyRuntime { return Promise.all(sendNotifications).then(() => newListings); } + /** + * Persist new listings and pass them through. + * + * @param {Listing[]} newListings Listings to store. + * @returns {Listing[]} The same listings, unchanged. + */ _save(newListings) { logger.debug(`Storing ${newListings.length} new listings (Provider: '${this._providerId}')`); storeListings(this._jobKey, this._providerId, newListings); return newListings; } + /** + * Remove listings that are similar to already known entries according to the similarity cache. + * Adds the remaining listings to the cache. + * + * @param {Listing[]} listings Listings to filter by similarity. + * @returns {Listing[]} Listings considered unique enough to keep. + */ _filterBySimilarListings(listings) { const filteredList = listings.filter((listing) => { const similar = this._similarityCache.hasSimilarEntries(listing.title, listing.address); @@ -115,6 +196,12 @@ class FredyRuntime { return filteredList; } + /** + * Handle errors occurring in the pipeline, logging levels depending on type. + * + * @param {Error} err Error instance thrown by previous steps. + * @returns {void} + */ _handleError(err) { if (err.name === 'NoNewListingsWarning') { logger.debug(`No new listings found (Provider: '${this._providerId}').`); diff --git a/lib/services/storage/migrations/sql/5.job-sharing.js b/lib/services/storage/migrations/sql/5.job-sharing.js index 2a581ff..7b6e16d 100644 --- a/lib/services/storage/migrations/sql/5.job-sharing.js +++ b/lib/services/storage/migrations/sql/5.job-sharing.js @@ -1,4 +1,4 @@ -// Migration: Adding a new table to store if somebody "watches" (a.k.a favorite) a listing +// Migration: Adding a new table to store if somebody shared a job with someone export function up(db) { db.exec(`