new usersetting to blacklist (filter) also on description

This commit is contained in:
orangecoding
2026-06-10 17:10:39 +02:00
parent c17a815263
commit a2a765f43d
11 changed files with 362 additions and 9 deletions

View File

@@ -38,11 +38,15 @@ import { formatListing } from './utils/formatListing.js';
* 3) Normalize listings to the provider schema
* 4) Filter out incomplete/blacklisted listings
* 5) Identify new listings (vs. previously stored hashes)
* 6) Persist new listings
* 7) Filter out entries similar to already seen ones
* 8) Filter out entries that do not match the job's specFilter
* 9) Filter out entries that do not match the job's spatialFilter
* 10) Dispatch notifications
* 6) Optionally enrich new listings via provider.fetchDetails
* 7) Optionally re-apply the provider blacklist using the (now enriched)
* description — only when the user opted in via
* `blacklist_filter_on_provider_details`
* 8) Persist new listings
* 9) Filter out entries similar to already seen ones
* 10) Filter out entries that do not match the job's specFilter
* 11) Filter out entries that do not match the job's spatialFilter
* 12) Dispatch notifications
*/
class FredyPipelineExecutioner {
/**
@@ -86,6 +90,7 @@ class FredyPipelineExecutioner {
.then(this._filter.bind(this))
.then(this._findNew.bind(this))
.then(this._fetchDetails.bind(this))
.then(this._filterAfterDetails.bind(this))
.then(this._geocode.bind(this))
.then(this._save.bind(this))
.then(this._calculateDistance.bind(this))
@@ -266,6 +271,48 @@ class FredyPipelineExecutioner {
);
}
/**
* Re-apply the provider's blacklist filter after `_fetchDetails` has had a
* chance to enrich the listings (e.g., load the full description from the
* detail page). The initial `_filter` step only sees the truncated snippet
* exposed on the search results page, so a blacklisted term that lives
* deeper in the listing's full description would otherwise slip through.
*
* Opt-in: gated by the user setting `blacklist_filter_on_provider_details`.
* The full detail description tends to contain a lot of boilerplate (legal,
* exposé contact info, generic marketing copy) which can accidentally match
* a blacklist term and remove otherwise relevant listings. Users who want
* the stricter behavior must enable the setting explicitly.
*
* Throws {@link NoNewListingsWarning} when all listings are filtered out
* so the rest of the pipeline (save + notify) is short-circuited.
*
* @param {ParsedListing[]} listings Enriched listings to re-filter.
* @returns {ParsedListing[]} Listings that still pass the provider's filter.
* @throws {NoNewListingsWarning} When every listing is filtered out.
*/
_filterAfterDetails(listings) {
if (typeof this._providerConfig.filter !== 'function') {
return listings;
}
const userId = getJob(this._jobKey)?.userId;
const enabled = getUserSettings(userId)?.blacklist_filter_on_provider_details === true;
if (!enabled) {
return listings;
}
const kept = listings.filter(this._providerConfig.filter);
const removed = listings.length - kept.length;
if (removed > 0) {
logger.debug(
`Re-filter after detail enrichment removed ${removed} listing(s) by blacklist (Provider: '${this._providerId}')`,
);
}
if (kept.length === 0) {
throw new NoNewListingsWarning();
}
return kept;
}
/**
* Determine which listings are new by comparing their IDs against stored hashes.
*

View File

@@ -103,6 +103,28 @@ export default async function userSettingsPlugin(fastify) {
}
});
fastify.post('/blacklist-filter-on-details', async (request, reply) => {
const userId = request.session.currentUser;
const { blacklist_filter_on_provider_details } = request.body;
const globalSettings = await getSettings();
if (globalSettings.demoMode && !isAdmin(request)) {
return reply.code(403).send({ error: 'In demo mode, it is not allowed to change settings.' });
}
if (typeof blacklist_filter_on_provider_details !== 'boolean') {
return reply.code(400).send({ error: 'blacklist_filter_on_provider_details must be a boolean.' });
}
try {
upsertSettings({ blacklist_filter_on_provider_details }, userId);
return { success: true };
} catch (error) {
logger.error('Error updating blacklist-filter-on-details setting', error);
return reply.code(500).send({ error: error.message });
}
});
fastify.post('/listings-view-mode', async (request, reply) => {
const userId = request.session.currentUser;
const { listings_view_mode } = request.body;

View File

@@ -198,7 +198,9 @@ function normalize(o) {
* @returns {boolean}
*/
function applyBlacklist(o) {
return !isOneOf(o.title, appliedBlackList);
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted;
}
/** @type {ProviderConfig} */
const config = {

View File

@@ -42,7 +42,9 @@ function normalize(o) {
* @returns {boolean}
*/
function applyBlacklist(o) {
return !isOneOf(o.title, appliedBlackList);
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
return titleNotBlacklisted && descNotBlacklisted;
}
/** @type {ProviderConfig} */