mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
79a8420dfb |
2
index.js
2
index.js
@@ -37,6 +37,8 @@ await runMigrations();
|
|||||||
// Load provider modules once at startup
|
// Load provider modules once at startup
|
||||||
const providers = await getProviders();
|
const providers = await getProviders();
|
||||||
|
|
||||||
|
similarityCache.initSimilarityCache();
|
||||||
|
|
||||||
//assuming interval is always in minutes
|
//assuming interval is always in minutes
|
||||||
const INTERVAL = config.interval * 60 * 1000;
|
const INTERVAL = config.interval * 60 * 1000;
|
||||||
|
|
||||||
|
|||||||
@@ -183,8 +183,12 @@ class FredyPipeline {
|
|||||||
* @returns {Listing[]} Listings considered unique enough to keep.
|
* @returns {Listing[]} Listings considered unique enough to keep.
|
||||||
*/
|
*/
|
||||||
_filterBySimilarListings(listings) {
|
_filterBySimilarListings(listings) {
|
||||||
const filteredList = listings.filter((listing) => {
|
return listings.filter((listing) => {
|
||||||
const similar = this._similarityCache.hasSimilarEntries(listing.title, listing.address);
|
const similar = this._similarityCache.checkAndAddEntry({
|
||||||
|
title: listing.title,
|
||||||
|
address: listing.address,
|
||||||
|
price: listing.price,
|
||||||
|
});
|
||||||
if (similar) {
|
if (similar) {
|
||||||
logger.debug(
|
logger.debug(
|
||||||
`Filtering similar entry for title '${listing.title}' and address '${listing.address}' (Provider: '${this._providerId}')`,
|
`Filtering similar entry for title '${listing.title}' and address '${listing.address}' (Provider: '${this._providerId}')`,
|
||||||
@@ -192,8 +196,6 @@ class FredyPipeline {
|
|||||||
}
|
}
|
||||||
return !similar;
|
return !similar;
|
||||||
});
|
});
|
||||||
filteredList.forEach((filter) => this._similarityCache.addCacheEntry(filter.title, filter.address));
|
|
||||||
return filteredList;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -1,116 +1,92 @@
|
|||||||
import crypto from 'crypto';
|
|
||||||
|
|
||||||
const retention = 60 * 60 * 1000;
|
|
||||||
/**
|
/**
|
||||||
* Internal cache storage.
|
* Similarity cache
|
||||||
* Maps a SHA-256 hash (string) to its expiry timestamp (number in ms).
|
|
||||||
* @type {Map<string, number>}
|
|
||||||
*/
|
|
||||||
const entries = new Map();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reference to the currently scheduled cleanup timer.
|
|
||||||
* @type {NodeJS.Timeout | null}
|
|
||||||
*/
|
|
||||||
let timer = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate a SHA-256 hash from a list of input strings.
|
|
||||||
* Null or undefined values are ignored.
|
|
||||||
*
|
*
|
||||||
* @param {...(string|null|undefined)} strings - Input values to hash
|
* Maintains an in-memory Set of content hashes to detect whether a listing
|
||||||
|
* (identified by a tuple of title, price and address) has been seen before.
|
||||||
|
*
|
||||||
|
* Design notes:
|
||||||
|
* - The cache is refreshed periodically from persistent storage. To avoid
|
||||||
|
* modification-during-iteration issues, the refresh builds a new Set and
|
||||||
|
* atomically swaps the reference instead of mutating in place.
|
||||||
|
* - Hashing ignores null/undefined values but preserves falsy-yet-valid values
|
||||||
|
* like 0. Non-string values are coerced to strings before hashing.
|
||||||
|
*
|
||||||
|
* This module has no persistence of its own; it relies on
|
||||||
|
* getAllEntriesFromListings() for data hydration.
|
||||||
|
* @module similarityCache
|
||||||
|
*/
|
||||||
|
import crypto from 'crypto';
|
||||||
|
import { getAllEntriesFromListings } from '../storage/listingsStorage.js';
|
||||||
|
|
||||||
|
/** @type {number} Refresh interval in milliseconds (defaults to one hour). */
|
||||||
|
const reloadCycle = 60 * 60 * 1000; // every hour, refresh
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Internal cache of content hashes for known listings.
|
||||||
|
*
|
||||||
|
* Each entry is an SHA-256 hex digest produced by toHash(title, price, address).
|
||||||
|
* @type {Set<string>}
|
||||||
|
*/
|
||||||
|
let cache = new Set();
|
||||||
|
|
||||||
|
// Periodically refresh the cache from storage
|
||||||
|
setInterval(() => {
|
||||||
|
initSimilarityCache();
|
||||||
|
}, reloadCycle);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize or refresh the similarity cache from persistent storage.
|
||||||
|
*
|
||||||
|
* Reads all stored listings via getAllEntriesFromListings(), computes a hash for
|
||||||
|
* each, and swaps the in-memory Set atomically to avoid in-place mutations that
|
||||||
|
* could interfere with concurrent iteration.
|
||||||
|
*
|
||||||
|
* This function is idempotent and safe to call at any time.
|
||||||
|
* @returns {void}
|
||||||
|
*/
|
||||||
|
export const initSimilarityCache = () => {
|
||||||
|
const allEntries = getAllEntriesFromListings();
|
||||||
|
const newCache = new Set();
|
||||||
|
for (const entry of allEntries) {
|
||||||
|
newCache.add(toHash(entry?.title, entry?.price, entry?.address));
|
||||||
|
}
|
||||||
|
// Atomic swap to avoid mutating the cache while it may be iterated elsewhere
|
||||||
|
cache = newCache;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a listing is already known and add it to the cache if not.
|
||||||
|
*
|
||||||
|
* The listing is identified by the combination of its title, price and
|
||||||
|
* address. Null/undefined fields are ignored during hashing. Falsy-but-valid
|
||||||
|
* values (e.g., price 0) are preserved.
|
||||||
|
*
|
||||||
|
* @param {Object} params - Listing fields
|
||||||
|
* @param {string|undefined|null} params.title - The listing title
|
||||||
|
* @param {string|undefined|null} params.address - The listing address
|
||||||
|
* @param {number|string|undefined|null} params.price - The listing price
|
||||||
|
* @returns {boolean} true if the entry already existed in the cache (duplicate), otherwise false
|
||||||
|
*/
|
||||||
|
export const checkAndAddEntry = ({ title, address, price }) => {
|
||||||
|
const hash = toHash(title, price, address);
|
||||||
|
if (cache.has(hash)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
cache.add(hash);
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate an SHA-256 hash from a list of input values.
|
||||||
|
* Null or undefined values are ignored. Falsy but valid values like 0 are preserved.
|
||||||
|
* Non-string values are coerced to strings prior to hashing.
|
||||||
|
*
|
||||||
|
* @param {...(string|number|null|undefined)} strings - Input values to hash
|
||||||
* @returns {string} Hexadecimal hash
|
* @returns {string} Hexadecimal hash
|
||||||
*/
|
*/
|
||||||
function toHash(...strings) {
|
function toHash(...strings) {
|
||||||
return crypto.createHash('sha256').update(strings.filter(Boolean).join('|')).digest('hex');
|
const normalized = strings
|
||||||
}
|
.filter((v) => v !== null && v !== undefined)
|
||||||
|
.map((v) => (typeof v === 'string' ? v : String(v)));
|
||||||
/**
|
return crypto.createHash('sha256').update(normalized.join('|')).digest('hex');
|
||||||
* Cleanup expired cache entries and schedule the next cleanup run.
|
|
||||||
* This function is invoked automatically by scheduled timers.
|
|
||||||
*
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
function runCleanup() {
|
|
||||||
const now = Date.now();
|
|
||||||
for (const [hash, expiry] of entries) {
|
|
||||||
if (expiry <= now) entries.delete(hash);
|
|
||||||
}
|
|
||||||
scheduleNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Find the soonest expiry timestamp among all cache entries
|
|
||||||
* and schedule a one-shot timer that will trigger at that time.
|
|
||||||
* Cancels any existing timer before scheduling a new one.
|
|
||||||
*
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
function scheduleNext() {
|
|
||||||
if (timer) {
|
|
||||||
clearTimeout(timer);
|
|
||||||
timer = null;
|
|
||||||
}
|
|
||||||
let next = Infinity;
|
|
||||||
const now = Date.now();
|
|
||||||
for (const expiry of entries.values()) {
|
|
||||||
if (expiry > now && expiry < next) next = expiry;
|
|
||||||
}
|
|
||||||
if (next !== Infinity) {
|
|
||||||
timer = setTimeout(runCleanup, Math.max(0, next - now));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Add or refresh a cache entry for the given title and address.
|
|
||||||
* The entry will automatically expire after the configured retention window.
|
|
||||||
*
|
|
||||||
* @param {string} title - The title used to build the cache key
|
|
||||||
* @param {string} address - The address used to build the cache key
|
|
||||||
*/
|
|
||||||
export function addCacheEntry(title, address) {
|
|
||||||
const hash = toHash(title, address);
|
|
||||||
const expiry = Date.now() + retention;
|
|
||||||
entries.set(hash, expiry);
|
|
||||||
scheduleNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if a cache entry with the same title and address exists
|
|
||||||
* and is still valid (not expired).
|
|
||||||
*
|
|
||||||
* @param {string} title - The title used to build the cache key
|
|
||||||
* @param {string} address - The address used to build the cache key
|
|
||||||
* @returns {boolean} True if a valid cache entry exists, false otherwise
|
|
||||||
*/
|
|
||||||
export function hasSimilarEntries(title, address) {
|
|
||||||
const hash = toHash(title, address);
|
|
||||||
const expiry = entries.get(hash);
|
|
||||||
if (expiry == null) return false;
|
|
||||||
if (expiry <= Date.now()) {
|
|
||||||
entries.delete(hash);
|
|
||||||
scheduleNext();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Stop any scheduled cleanup timers and prevent further automatic cleanup.
|
|
||||||
* Entries that are already in the cache will remain until removed manually
|
|
||||||
* or until cleanup is started again by adding new entries.
|
|
||||||
*/
|
|
||||||
export function stopCacheCleanup() {
|
|
||||||
if (timer) clearTimeout(timer);
|
|
||||||
timer = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* this is only for test purposes
|
|
||||||
*/
|
|
||||||
export function invalidateAllForTest() {
|
|
||||||
for (const key of entries.keys()) {
|
|
||||||
entries.set(key, 0);
|
|
||||||
}
|
|
||||||
runCleanup();
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -310,8 +310,8 @@ export const deleteListingsByJobId = (jobId) => {
|
|||||||
if (!jobId) return;
|
if (!jobId) return;
|
||||||
return SqliteConnection.execute(
|
return SqliteConnection.execute(
|
||||||
`DELETE
|
`DELETE
|
||||||
FROM listings
|
FROM listings
|
||||||
WHERE job_id = @jobId`,
|
WHERE job_id = @jobId`,
|
||||||
{ jobId },
|
{ jobId },
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
@@ -332,3 +332,13 @@ export const deleteListingsById = (ids) => {
|
|||||||
ids,
|
ids,
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return all listings with only the fields: title, address, and price.
|
||||||
|
* This is the single helper requested for simple consumers.
|
||||||
|
*
|
||||||
|
* @returns {{title: string|null, address: string|null, price: number|null}[]}
|
||||||
|
*/
|
||||||
|
export const getAllEntriesFromListings = () => {
|
||||||
|
return SqliteConnection.query(`SELECT title, address, price FROM listings`);
|
||||||
|
};
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "fredy",
|
"name": "fredy",
|
||||||
"version": "14.2.2",
|
"version": "14.3.0",
|
||||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"prepare": "husky",
|
"prepare": "husky",
|
||||||
|
|||||||
@@ -1,53 +0,0 @@
|
|||||||
import { expect } from 'chai';
|
|
||||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
|
||||||
import { mockFredy } from '../utils.js';
|
|
||||||
|
|
||||||
describe('FredyPipeline', () => {
|
|
||||||
afterEach(() => {
|
|
||||||
similarityCache.invalidateAllForTest();
|
|
||||||
});
|
|
||||||
|
|
||||||
after(() => {
|
|
||||||
similarityCache.stopCacheCleanup();
|
|
||||||
});
|
|
||||||
|
|
||||||
describe('_filterBySimilarListings', () => {
|
|
||||||
let fredyRuntime;
|
|
||||||
|
|
||||||
beforeEach(async () => {
|
|
||||||
const FredyRuntime = await mockFredy();
|
|
||||||
fredyRuntime = new FredyRuntime({}, null, 'dummy-provider', 'dummy-job', similarityCache);
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should filter out listings with similar title and address already in cache', () => {
|
|
||||||
similarityCache.addCacheEntry('Penthouse', 'Mustermann Straße 1');
|
|
||||||
|
|
||||||
const listings = [
|
|
||||||
{ id: '1', title: 'Penthouse', address: 'Mustermann Straße 1' },
|
|
||||||
{ id: '2', title: 'Nice apartment', address: 'Mustermann Straße 15' },
|
|
||||||
];
|
|
||||||
|
|
||||||
const result = fredyRuntime._filterBySimilarListings(listings);
|
|
||||||
|
|
||||||
expect(result).to.have.length(1);
|
|
||||||
expect(result[0].id).to.equal('2');
|
|
||||||
expect(result[0].title).to.equal('Nice apartment');
|
|
||||||
|
|
||||||
expect(similarityCache.hasSimilarEntries('Nice apartment', 'Mustermann Straße 15')).to.be.true;
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should handle listings with null or undefined address', () => {
|
|
||||||
const listings = [
|
|
||||||
{ id: '1', title: 'Penthouse', address: null },
|
|
||||||
{ id: '2', title: 'Nice apartment', address: undefined },
|
|
||||||
];
|
|
||||||
|
|
||||||
const result = fredyRuntime._filterBySimilarListings(listings);
|
|
||||||
|
|
||||||
expect(result).to.have.length(2);
|
|
||||||
|
|
||||||
expect(similarityCache.hasSimilarEntries('Penthouse', null)).to.be.true;
|
|
||||||
expect(similarityCache.hasSimilarEntries('Nice apartment', undefined)).to.be.true;
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
@@ -5,9 +5,6 @@ import { expect } from 'chai';
|
|||||||
import * as provider from '../../lib/provider/einsAImmobilien.js';
|
import * as provider from '../../lib/provider/einsAImmobilien.js';
|
||||||
|
|
||||||
describe('#einsAImmobilien testsuite()', () => {
|
describe('#einsAImmobilien testsuite()', () => {
|
||||||
after(() => {
|
|
||||||
similarityCache.stopCacheCleanup();
|
|
||||||
});
|
|
||||||
provider.init(providerConfig.einsAImmobilien, [], []);
|
provider.init(providerConfig.einsAImmobilien, [], []);
|
||||||
it('should test einsAImmobilien provider', async () => {
|
it('should test einsAImmobilien provider', async () => {
|
||||||
const Fredy = await mockFredy();
|
const Fredy = await mockFredy();
|
||||||
|
|||||||
@@ -5,9 +5,6 @@ import { expect } from 'chai';
|
|||||||
import * as provider from '../../lib/provider/immobilienDe.js';
|
import * as provider from '../../lib/provider/immobilienDe.js';
|
||||||
|
|
||||||
describe('#immobilien.de testsuite()', () => {
|
describe('#immobilien.de testsuite()', () => {
|
||||||
after(() => {
|
|
||||||
similarityCache.stopCacheCleanup();
|
|
||||||
});
|
|
||||||
provider.init(providerConfig.immobilienDe, [], []);
|
provider.init(providerConfig.immobilienDe, [], []);
|
||||||
it('should test immobilien.de provider', async () => {
|
it('should test immobilien.de provider', async () => {
|
||||||
const Fredy = await mockFredy();
|
const Fredy = await mockFredy();
|
||||||
|
|||||||
@@ -5,10 +5,6 @@ import { expect } from 'chai';
|
|||||||
import * as provider from '../../lib/provider/immonet.js';
|
import * as provider from '../../lib/provider/immonet.js';
|
||||||
|
|
||||||
describe('#immonet testsuite()', () => {
|
describe('#immonet testsuite()', () => {
|
||||||
after(() => {
|
|
||||||
similarityCache.stopCacheCleanup();
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should test immonet provider', async () => {
|
it('should test immonet provider', async () => {
|
||||||
const Fredy = await mockFredy();
|
const Fredy = await mockFredy();
|
||||||
provider.init(providerConfig.immonet, [], []);
|
provider.init(providerConfig.immonet, [], []);
|
||||||
|
|||||||
@@ -5,10 +5,6 @@ import { get } from '../mocks/mockNotification.js';
|
|||||||
import * as provider from '../../lib/provider/immoscout.js';
|
import * as provider from '../../lib/provider/immoscout.js';
|
||||||
|
|
||||||
describe('#immoscout provider testsuite()', () => {
|
describe('#immoscout provider testsuite()', () => {
|
||||||
after(() => {
|
|
||||||
similarityCache.stopCacheCleanup();
|
|
||||||
});
|
|
||||||
|
|
||||||
provider.init(providerConfig.immoscout, [], []);
|
provider.init(providerConfig.immoscout, [], []);
|
||||||
it('should test immoscout provider', async () => {
|
it('should test immoscout provider', async () => {
|
||||||
const Fredy = await mockFredy();
|
const Fredy = await mockFredy();
|
||||||
|
|||||||
@@ -5,9 +5,6 @@ import { expect } from 'chai';
|
|||||||
import * as provider from '../../lib/provider/immoswp.js';
|
import * as provider from '../../lib/provider/immoswp.js';
|
||||||
|
|
||||||
describe('#immoswp testsuite()', () => {
|
describe('#immoswp testsuite()', () => {
|
||||||
after(() => {
|
|
||||||
similarityCache.stopCacheCleanup();
|
|
||||||
});
|
|
||||||
provider.init(providerConfig.immoswp, [], []);
|
provider.init(providerConfig.immoswp, [], []);
|
||||||
it('should test immoswp provider', async () => {
|
it('should test immoswp provider', async () => {
|
||||||
const Fredy = await mockFredy();
|
const Fredy = await mockFredy();
|
||||||
|
|||||||
@@ -5,10 +5,6 @@ import { expect } from 'chai';
|
|||||||
import * as provider from '../../lib/provider/immowelt.js';
|
import * as provider from '../../lib/provider/immowelt.js';
|
||||||
|
|
||||||
describe('#immowelt testsuite()', () => {
|
describe('#immowelt testsuite()', () => {
|
||||||
after(() => {
|
|
||||||
similarityCache.stopCacheCleanup();
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should test immowelt provider', async () => {
|
it('should test immowelt provider', async () => {
|
||||||
const Fredy = await mockFredy();
|
const Fredy = await mockFredy();
|
||||||
provider.init(providerConfig.immowelt, [], []);
|
provider.init(providerConfig.immowelt, [], []);
|
||||||
|
|||||||
@@ -5,9 +5,6 @@ import { expect } from 'chai';
|
|||||||
import * as provider from '../../lib/provider/kleinanzeigen.js';
|
import * as provider from '../../lib/provider/kleinanzeigen.js';
|
||||||
|
|
||||||
describe('#kleinanzeigen testsuite()', () => {
|
describe('#kleinanzeigen testsuite()', () => {
|
||||||
after(() => {
|
|
||||||
similarityCache.stopCacheCleanup();
|
|
||||||
});
|
|
||||||
it('should test kleinanzeigen provider', async () => {
|
it('should test kleinanzeigen provider', async () => {
|
||||||
const Fredy = await mockFredy();
|
const Fredy = await mockFredy();
|
||||||
provider.init(providerConfig.kleinanzeigen, [], []);
|
provider.init(providerConfig.kleinanzeigen, [], []);
|
||||||
|
|||||||
@@ -5,10 +5,6 @@ import { expect } from 'chai';
|
|||||||
import * as provider from '../../lib/provider/mcMakler.js';
|
import * as provider from '../../lib/provider/mcMakler.js';
|
||||||
|
|
||||||
describe('#mcMakler testsuite()', () => {
|
describe('#mcMakler testsuite()', () => {
|
||||||
after(() => {
|
|
||||||
similarityCache.stopCacheCleanup();
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should test mcMakler provider', async () => {
|
it('should test mcMakler provider', async () => {
|
||||||
const Fredy = await mockFredy();
|
const Fredy = await mockFredy();
|
||||||
provider.init(providerConfig.mcMakler, []);
|
provider.init(providerConfig.mcMakler, []);
|
||||||
|
|||||||
@@ -5,9 +5,6 @@ import { expect } from 'chai';
|
|||||||
import * as provider from '../../lib/provider/neubauKompass.js';
|
import * as provider from '../../lib/provider/neubauKompass.js';
|
||||||
|
|
||||||
describe('#neubauKompass testsuite()', () => {
|
describe('#neubauKompass testsuite()', () => {
|
||||||
after(() => {
|
|
||||||
similarityCache.stopCacheCleanup();
|
|
||||||
});
|
|
||||||
provider.init(providerConfig.neubauKompass, [], []);
|
provider.init(providerConfig.neubauKompass, [], []);
|
||||||
it('should test neubauKompass provider', async () => {
|
it('should test neubauKompass provider', async () => {
|
||||||
const Fredy = await mockFredy();
|
const Fredy = await mockFredy();
|
||||||
|
|||||||
@@ -5,10 +5,6 @@ import { expect } from 'chai';
|
|||||||
import * as provider from '../../lib/provider/regionalimmobilien24.js';
|
import * as provider from '../../lib/provider/regionalimmobilien24.js';
|
||||||
|
|
||||||
describe('#regionalimmobilien24 testsuite()', () => {
|
describe('#regionalimmobilien24 testsuite()', () => {
|
||||||
after(() => {
|
|
||||||
similarityCache.stopCacheCleanup();
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should test regionalimmobilien24 provider', async () => {
|
it('should test regionalimmobilien24 provider', async () => {
|
||||||
const Fredy = await mockFredy();
|
const Fredy = await mockFredy();
|
||||||
provider.init(providerConfig.regionalimmobilien24, []);
|
provider.init(providerConfig.regionalimmobilien24, []);
|
||||||
|
|||||||
@@ -5,10 +5,6 @@ import { expect } from 'chai';
|
|||||||
import * as provider from '../../lib/provider/sparkasse.js';
|
import * as provider from '../../lib/provider/sparkasse.js';
|
||||||
|
|
||||||
describe('#sparkasse testsuite()', () => {
|
describe('#sparkasse testsuite()', () => {
|
||||||
after(() => {
|
|
||||||
similarityCache.stopCacheCleanup();
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should test sparkasse provider', async () => {
|
it('should test sparkasse provider', async () => {
|
||||||
const Fredy = await mockFredy();
|
const Fredy = await mockFredy();
|
||||||
provider.init(providerConfig.sparkasse, []);
|
provider.init(providerConfig.sparkasse, []);
|
||||||
|
|||||||
@@ -5,9 +5,6 @@ import { expect } from 'chai';
|
|||||||
import * as provider from '../../lib/provider/wgGesucht.js';
|
import * as provider from '../../lib/provider/wgGesucht.js';
|
||||||
|
|
||||||
describe('#wgGesucht testsuite()', () => {
|
describe('#wgGesucht testsuite()', () => {
|
||||||
after(() => {
|
|
||||||
similarityCache.stopCacheCleanup();
|
|
||||||
});
|
|
||||||
provider.init(providerConfig.wgGesucht, [], []);
|
provider.init(providerConfig.wgGesucht, [], []);
|
||||||
it('should test wgGesucht provider', async () => {
|
it('should test wgGesucht provider', async () => {
|
||||||
const Fredy = await mockFredy();
|
const Fredy = await mockFredy();
|
||||||
|
|||||||
@@ -1,30 +0,0 @@
|
|||||||
import { expect } from 'chai';
|
|
||||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
|
||||||
|
|
||||||
describe('similarityCheck', () => {
|
|
||||||
it('should return true on duplicate', () => {
|
|
||||||
similarityCache.addCacheEntry('Hello World', 'Test');
|
|
||||||
expect(similarityCache.hasSimilarEntries('Hello World', 'Test')).to.be.true;
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should return true even if one value is null', () => {
|
|
||||||
similarityCache.addCacheEntry('Hello World', null);
|
|
||||||
expect(similarityCache.hasSimilarEntries('Hello World', null)).to.be.true;
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should return true even if one value is an obj', () => {
|
|
||||||
similarityCache.addCacheEntry('Hello World', [{ TR: 'OLOLO' }]);
|
|
||||||
expect(similarityCache.hasSimilarEntries('Hello World', [{ TR: 'OLOLO' }])).to.be.true;
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should return false when no duplicate', () => {
|
|
||||||
similarityCache.addCacheEntry('Hello World__', 'Test');
|
|
||||||
expect(similarityCache.hasSimilarEntries('Hello World___', 'Test')).to.be.false;
|
|
||||||
});
|
|
||||||
|
|
||||||
it('should return false when no duplicate', () => {
|
|
||||||
expect(similarityCache.hasSimilarEntries('Hello World', 'Test')).to.be.true;
|
|
||||||
similarityCache.invalidateAllForTest();
|
|
||||||
expect(similarityCache.hasSimilarEntries('Hello World', 'Test')).to.be.false;
|
|
||||||
});
|
|
||||||
});
|
|
||||||
62
test/similarity/similarityCache.test.js
Normal file
62
test/similarity/similarityCache.test.js
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
import { expect } from 'chai';
|
||||||
|
import esmock from 'esmock';
|
||||||
|
|
||||||
|
// Helper to create module under test with mocks
|
||||||
|
async function loadModuleWith({ entries = [] } = {}) {
|
||||||
|
const mod = await esmock('../../lib/services/similarity-check/similarityCache.js', {
|
||||||
|
// Mock the storage to return our controlled entries
|
||||||
|
'../../lib/services/storage/listingsStorage.js': {
|
||||||
|
getAllEntriesFromListings: () => entries,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
return mod;
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('similarityCache', () => {
|
||||||
|
it('initSimilarityCache builds cache from storage and enables duplicate detection', async () => {
|
||||||
|
const entries = [
|
||||||
|
{ title: 'A', price: 1000, address: 'Main 1' },
|
||||||
|
{ title: 'B', price: 0, address: 'Zero St' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const { initSimilarityCache, checkAndAddEntry } = await loadModuleWith({ entries });
|
||||||
|
|
||||||
|
// Initially, duplicates should not be detected for new data
|
||||||
|
expect(checkAndAddEntry({ title: 'X', price: 200, address: 'Y' })).to.equal(false);
|
||||||
|
|
||||||
|
// Now initialize from storage
|
||||||
|
initSimilarityCache();
|
||||||
|
|
||||||
|
// Exact duplicates should be detected
|
||||||
|
expect(checkAndAddEntry({ title: 'A', price: 1000, address: 'Main 1' })).to.equal(true);
|
||||||
|
// Ensure falsy-but-valid price 0 is preserved by hashing and detected as duplicate
|
||||||
|
expect(checkAndAddEntry({ title: 'B', price: 0, address: 'Zero St' })).to.equal(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('checkAndAddEntry returns false for new entry then true for duplicate on second call', async () => {
|
||||||
|
const { checkAndAddEntry } = await loadModuleWith();
|
||||||
|
|
||||||
|
const first = checkAndAddEntry({ title: 'C', price: 300, address: 'Road 3' });
|
||||||
|
const second = checkAndAddEntry({ title: 'C', price: 300, address: 'Road 3' });
|
||||||
|
|
||||||
|
expect(first).to.equal(false);
|
||||||
|
expect(second).to.equal(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('hashing ignores null/undefined but preserves 0 via behavior', async () => {
|
||||||
|
const { checkAndAddEntry } = await loadModuleWith();
|
||||||
|
|
||||||
|
// Add baseline (null address ignored)
|
||||||
|
const add1 = checkAndAddEntry({ title: 'T', price: 1, address: null });
|
||||||
|
expect(add1).to.equal(false);
|
||||||
|
// Duplicate with undefined address should match
|
||||||
|
const dup = checkAndAddEntry({ title: 'T', price: 1, address: undefined });
|
||||||
|
expect(dup).to.equal(true);
|
||||||
|
|
||||||
|
// Now test that price 0 is preserved (not filtered out)
|
||||||
|
const addZero = checkAndAddEntry({ title: 'Z', price: 0, address: 'Zero' });
|
||||||
|
expect(addZero).to.equal(false);
|
||||||
|
const dupZero = checkAndAddEntry({ title: 'Z', price: 0, address: 'Zero' });
|
||||||
|
expect(dupZero).to.equal(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user