mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
improving similarity cache
This commit is contained in:
@@ -1,53 +0,0 @@
|
||||
import { expect } from 'chai';
|
||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
||||
import { mockFredy } from '../utils.js';
|
||||
|
||||
describe('FredyPipeline', () => {
|
||||
afterEach(() => {
|
||||
similarityCache.invalidateAllForTest();
|
||||
});
|
||||
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
|
||||
describe('_filterBySimilarListings', () => {
|
||||
let fredyRuntime;
|
||||
|
||||
beforeEach(async () => {
|
||||
const FredyRuntime = await mockFredy();
|
||||
fredyRuntime = new FredyRuntime({}, null, 'dummy-provider', 'dummy-job', similarityCache);
|
||||
});
|
||||
|
||||
it('should filter out listings with similar title and address already in cache', () => {
|
||||
similarityCache.addCacheEntry('Penthouse', 'Mustermann Straße 1');
|
||||
|
||||
const listings = [
|
||||
{ id: '1', title: 'Penthouse', address: 'Mustermann Straße 1' },
|
||||
{ id: '2', title: 'Nice apartment', address: 'Mustermann Straße 15' },
|
||||
];
|
||||
|
||||
const result = fredyRuntime._filterBySimilarListings(listings);
|
||||
|
||||
expect(result).to.have.length(1);
|
||||
expect(result[0].id).to.equal('2');
|
||||
expect(result[0].title).to.equal('Nice apartment');
|
||||
|
||||
expect(similarityCache.hasSimilarEntries('Nice apartment', 'Mustermann Straße 15')).to.be.true;
|
||||
});
|
||||
|
||||
it('should handle listings with null or undefined address', () => {
|
||||
const listings = [
|
||||
{ id: '1', title: 'Penthouse', address: null },
|
||||
{ id: '2', title: 'Nice apartment', address: undefined },
|
||||
];
|
||||
|
||||
const result = fredyRuntime._filterBySimilarListings(listings);
|
||||
|
||||
expect(result).to.have.length(2);
|
||||
|
||||
expect(similarityCache.hasSimilarEntries('Penthouse', null)).to.be.true;
|
||||
expect(similarityCache.hasSimilarEntries('Nice apartment', undefined)).to.be.true;
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -5,9 +5,6 @@ import { expect } from 'chai';
|
||||
import * as provider from '../../lib/provider/einsAImmobilien.js';
|
||||
|
||||
describe('#einsAImmobilien testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
provider.init(providerConfig.einsAImmobilien, [], []);
|
||||
it('should test einsAImmobilien provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
|
||||
@@ -5,9 +5,6 @@ import { expect } from 'chai';
|
||||
import * as provider from '../../lib/provider/immobilienDe.js';
|
||||
|
||||
describe('#immobilien.de testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
provider.init(providerConfig.immobilienDe, [], []);
|
||||
it('should test immobilien.de provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
|
||||
@@ -5,10 +5,6 @@ import { expect } from 'chai';
|
||||
import * as provider from '../../lib/provider/immonet.js';
|
||||
|
||||
describe('#immonet testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
|
||||
it('should test immonet provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
provider.init(providerConfig.immonet, [], []);
|
||||
|
||||
@@ -5,10 +5,6 @@ import { get } from '../mocks/mockNotification.js';
|
||||
import * as provider from '../../lib/provider/immoscout.js';
|
||||
|
||||
describe('#immoscout provider testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
|
||||
provider.init(providerConfig.immoscout, [], []);
|
||||
it('should test immoscout provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
|
||||
@@ -5,9 +5,6 @@ import { expect } from 'chai';
|
||||
import * as provider from '../../lib/provider/immoswp.js';
|
||||
|
||||
describe('#immoswp testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
provider.init(providerConfig.immoswp, [], []);
|
||||
it('should test immoswp provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
|
||||
@@ -5,10 +5,6 @@ import { expect } from 'chai';
|
||||
import * as provider from '../../lib/provider/immowelt.js';
|
||||
|
||||
describe('#immowelt testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
|
||||
it('should test immowelt provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
provider.init(providerConfig.immowelt, [], []);
|
||||
|
||||
@@ -5,9 +5,6 @@ import { expect } from 'chai';
|
||||
import * as provider from '../../lib/provider/kleinanzeigen.js';
|
||||
|
||||
describe('#kleinanzeigen testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
it('should test kleinanzeigen provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
provider.init(providerConfig.kleinanzeigen, [], []);
|
||||
|
||||
@@ -5,10 +5,6 @@ import { expect } from 'chai';
|
||||
import * as provider from '../../lib/provider/mcMakler.js';
|
||||
|
||||
describe('#mcMakler testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
|
||||
it('should test mcMakler provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
provider.init(providerConfig.mcMakler, []);
|
||||
|
||||
@@ -5,9 +5,6 @@ import { expect } from 'chai';
|
||||
import * as provider from '../../lib/provider/neubauKompass.js';
|
||||
|
||||
describe('#neubauKompass testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
provider.init(providerConfig.neubauKompass, [], []);
|
||||
it('should test neubauKompass provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
|
||||
@@ -5,10 +5,6 @@ import { expect } from 'chai';
|
||||
import * as provider from '../../lib/provider/regionalimmobilien24.js';
|
||||
|
||||
describe('#regionalimmobilien24 testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
|
||||
it('should test regionalimmobilien24 provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
provider.init(providerConfig.regionalimmobilien24, []);
|
||||
|
||||
@@ -5,10 +5,6 @@ import { expect } from 'chai';
|
||||
import * as provider from '../../lib/provider/sparkasse.js';
|
||||
|
||||
describe('#sparkasse testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
|
||||
it('should test sparkasse provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
provider.init(providerConfig.sparkasse, []);
|
||||
|
||||
@@ -5,9 +5,6 @@ import { expect } from 'chai';
|
||||
import * as provider from '../../lib/provider/wgGesucht.js';
|
||||
|
||||
describe('#wgGesucht testsuite()', () => {
|
||||
after(() => {
|
||||
similarityCache.stopCacheCleanup();
|
||||
});
|
||||
provider.init(providerConfig.wgGesucht, [], []);
|
||||
it('should test wgGesucht provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
import { expect } from 'chai';
|
||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
||||
|
||||
describe('similarityCheck', () => {
|
||||
it('should return true on duplicate', () => {
|
||||
similarityCache.addCacheEntry('Hello World', 'Test');
|
||||
expect(similarityCache.hasSimilarEntries('Hello World', 'Test')).to.be.true;
|
||||
});
|
||||
|
||||
it('should return true even if one value is null', () => {
|
||||
similarityCache.addCacheEntry('Hello World', null);
|
||||
expect(similarityCache.hasSimilarEntries('Hello World', null)).to.be.true;
|
||||
});
|
||||
|
||||
it('should return true even if one value is an obj', () => {
|
||||
similarityCache.addCacheEntry('Hello World', [{ TR: 'OLOLO' }]);
|
||||
expect(similarityCache.hasSimilarEntries('Hello World', [{ TR: 'OLOLO' }])).to.be.true;
|
||||
});
|
||||
|
||||
it('should return false when no duplicate', () => {
|
||||
similarityCache.addCacheEntry('Hello World__', 'Test');
|
||||
expect(similarityCache.hasSimilarEntries('Hello World___', 'Test')).to.be.false;
|
||||
});
|
||||
|
||||
it('should return false when no duplicate', () => {
|
||||
expect(similarityCache.hasSimilarEntries('Hello World', 'Test')).to.be.true;
|
||||
similarityCache.invalidateAllForTest();
|
||||
expect(similarityCache.hasSimilarEntries('Hello World', 'Test')).to.be.false;
|
||||
});
|
||||
});
|
||||
62
test/similarity/similarityCache.test.js
Normal file
62
test/similarity/similarityCache.test.js
Normal file
@@ -0,0 +1,62 @@
|
||||
import { expect } from 'chai';
|
||||
import esmock from 'esmock';
|
||||
|
||||
// Helper to create module under test with mocks
|
||||
async function loadModuleWith({ entries = [] } = {}) {
|
||||
const mod = await esmock('../../lib/services/similarity-check/similarityCache.js', {
|
||||
// Mock the storage to return our controlled entries
|
||||
'../../lib/services/storage/listingsStorage.js': {
|
||||
getAllEntriesFromListings: () => entries,
|
||||
},
|
||||
});
|
||||
return mod;
|
||||
}
|
||||
|
||||
describe('similarityCache', () => {
|
||||
it('initSimilarityCache builds cache from storage and enables duplicate detection', async () => {
|
||||
const entries = [
|
||||
{ title: 'A', price: 1000, address: 'Main 1' },
|
||||
{ title: 'B', price: 0, address: 'Zero St' },
|
||||
];
|
||||
|
||||
const { initSimilarityCache, checkAndAddEntry } = await loadModuleWith({ entries });
|
||||
|
||||
// Initially, duplicates should not be detected for new data
|
||||
expect(checkAndAddEntry({ title: 'X', price: 200, address: 'Y' })).to.equal(false);
|
||||
|
||||
// Now initialize from storage
|
||||
initSimilarityCache();
|
||||
|
||||
// Exact duplicates should be detected
|
||||
expect(checkAndAddEntry({ title: 'A', price: 1000, address: 'Main 1' })).to.equal(true);
|
||||
// Ensure falsy-but-valid price 0 is preserved by hashing and detected as duplicate
|
||||
expect(checkAndAddEntry({ title: 'B', price: 0, address: 'Zero St' })).to.equal(true);
|
||||
});
|
||||
|
||||
it('checkAndAddEntry returns false for new entry then true for duplicate on second call', async () => {
|
||||
const { checkAndAddEntry } = await loadModuleWith();
|
||||
|
||||
const first = checkAndAddEntry({ title: 'C', price: 300, address: 'Road 3' });
|
||||
const second = checkAndAddEntry({ title: 'C', price: 300, address: 'Road 3' });
|
||||
|
||||
expect(first).to.equal(false);
|
||||
expect(second).to.equal(true);
|
||||
});
|
||||
|
||||
it('hashing ignores null/undefined but preserves 0 via behavior', async () => {
|
||||
const { checkAndAddEntry } = await loadModuleWith();
|
||||
|
||||
// Add baseline (null address ignored)
|
||||
const add1 = checkAndAddEntry({ title: 'T', price: 1, address: null });
|
||||
expect(add1).to.equal(false);
|
||||
// Duplicate with undefined address should match
|
||||
const dup = checkAndAddEntry({ title: 'T', price: 1, address: undefined });
|
||||
expect(dup).to.equal(true);
|
||||
|
||||
// Now test that price 0 is preserved (not filtered out)
|
||||
const addZero = checkAndAddEntry({ title: 'Z', price: 0, address: 'Zero' });
|
||||
expect(addZero).to.equal(false);
|
||||
const dupZero = checkAndAddEntry({ title: 'Z', price: 0, address: 'Zero' });
|
||||
expect(dupZero).to.equal(true);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user