mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
check if a listing is still active
This commit is contained in:
35
index.js
35
index.js
@@ -1,16 +1,20 @@
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { config } from './lib/utils.js';
|
||||
import { config, getProviders, refreshConfig } from './lib/utils.js';
|
||||
import * as similarityCache from './lib/services/similarity-check/similarityCache.js';
|
||||
import * as jobStorage from './lib/services/storage/jobStorage.js';
|
||||
import FredyRuntime from './lib/FredyRuntime.js';
|
||||
import { duringWorkingHoursOrNotSet } from './lib/utils.js';
|
||||
import { runMigrations } from './lib/services/storage/migrations/migrate.js';
|
||||
import { ensureDemoUserExists, ensureAdminUserExists } from './lib/services/storage/userStorage.js';
|
||||
import { cleanupDemoAtMidnight } from './lib/services/demoCleanup.js';
|
||||
import { initTrackerCron } from './lib/services/tracking/Tracker-Cron.js';
|
||||
import { cleanupDemoAtMidnight } from './lib/services/crons/demoCleanup-cron.js';
|
||||
import { initTrackerCron } from './lib/services/crons/tracker-cron.js';
|
||||
import logger from './lib/services/logger.js';
|
||||
import { bus } from './lib/services/events/event-bus.js';
|
||||
import { initActiveCheckerCron } from './lib/services/crons/listing-alive-cron.js';
|
||||
|
||||
// Load configuration before any other startup steps
|
||||
await refreshConfig();
|
||||
|
||||
// Ensure sqlite directory exists before loading anything else (based on config.sqlitepath)
|
||||
const rawDir = config.sqlitepath || '/db';
|
||||
@@ -23,8 +27,9 @@ if (!fs.existsSync(absDir)) {
|
||||
// Run DB migrations once at startup and block until finished
|
||||
await runMigrations();
|
||||
|
||||
const providersPath = './lib/provider';
|
||||
const provider = fs.readdirSync(providersPath).filter((file) => file.endsWith('.js'));
|
||||
// Load provider modules once at startup
|
||||
const providers = await getProviders();
|
||||
|
||||
//assuming interval is always in minutes
|
||||
const INTERVAL = config.interval * 60 * 1000;
|
||||
|
||||
@@ -38,13 +43,11 @@ if (config.demoMode) {
|
||||
|
||||
logger.info(`Started Fredy successfully. Ui can be accessed via http://localhost:${config.port}`);
|
||||
|
||||
const fetchedProvider = await Promise.all(
|
||||
provider.filter((provider) => provider.endsWith('.js')).map(async (pro) => import(`${providersPath}/${pro}`)),
|
||||
);
|
||||
|
||||
ensureAdminUserExists();
|
||||
ensureDemoUserExists();
|
||||
await initTrackerCron();
|
||||
//do not wait for this to finish, let it run in the background
|
||||
initActiveCheckerCron();
|
||||
|
||||
bus.on('jobs:runAll', () => {
|
||||
logger.debug('Running Fredy Job manually');
|
||||
@@ -61,11 +64,17 @@ const execute = () => {
|
||||
.filter((job) => job.enabled)
|
||||
.forEach((job) => {
|
||||
job.provider
|
||||
.filter((p) => fetchedProvider.find((fp) => fp.metaInformation.id === p.id) != null)
|
||||
.filter((p) => providers.find((loaded) => loaded.metaInformation.id === p.id) != null)
|
||||
.forEach(async (prov) => {
|
||||
const pro = fetchedProvider.find((fp) => fp.metaInformation.id === prov.id);
|
||||
pro.init(prov, job.blacklist);
|
||||
await new FredyRuntime(pro.config, job.notificationAdapter, prov.id, job.id, similarityCache).execute();
|
||||
const matchedProvider = providers.find((loaded) => loaded.metaInformation.id === prov.id);
|
||||
matchedProvider.init(prov, job.blacklist);
|
||||
await new FredyRuntime(
|
||||
matchedProvider.config,
|
||||
job.notificationAdapter,
|
||||
prov.id,
|
||||
job.id,
|
||||
similarityCache,
|
||||
).execute();
|
||||
});
|
||||
});
|
||||
} else {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { buildHash, isOneOf } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
@@ -29,8 +30,8 @@ function normalizePrice(price) {
|
||||
return result[0];
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
@@ -49,6 +50,7 @@ const config = {
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { buildHash, isOneOf } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
@@ -24,8 +25,8 @@ function normalize(o) {
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
@@ -46,6 +47,7 @@ const config = {
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { isOneOf, buildHash } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
@@ -11,8 +12,8 @@ function normalize(o) {
|
||||
return Object.assign(o, { id, address, price, size, title, link });
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
const config = {
|
||||
@@ -31,6 +32,7 @@ const config = {
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -35,8 +35,11 @@
|
||||
*
|
||||
*/
|
||||
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { convertWebToMobile } from '../services/immoscout/immoscout-web-translator.js';
|
||||
import { buildHash, isOneOf } from '../utils.js';
|
||||
import {
|
||||
convertImmoscoutListingToMobileListing,
|
||||
convertWebToMobile,
|
||||
} from '../services/immoscout/immoscout-web-translator.js';
|
||||
import logger from '../services/logger.js';
|
||||
let appliedBlackList = [];
|
||||
|
||||
@@ -77,6 +80,25 @@ async function getListings(url) {
|
||||
});
|
||||
}
|
||||
|
||||
async function isListingActive(link) {
|
||||
const result = await fetch(convertImmoscoutListingToMobileListing(link), {
|
||||
headers: {
|
||||
'User-Agent': 'ImmoScout_27.3_26.0_._',
|
||||
},
|
||||
});
|
||||
|
||||
if (result.status === 200) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (result.status === 404) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
logger.warn('Unknown status for immoscout listing', link);
|
||||
return -1;
|
||||
}
|
||||
|
||||
function nullOrEmpty(val) {
|
||||
return val == null || val.length === 0;
|
||||
}
|
||||
@@ -87,7 +109,7 @@ function normalize(o) {
|
||||
return Object.assign(o, { id, title, address });
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
return !utils.isOneOf(o.title, appliedBlackList);
|
||||
return !isOneOf(o.title, appliedBlackList);
|
||||
}
|
||||
const config = {
|
||||
url: null,
|
||||
@@ -104,6 +126,7 @@ const config = {
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
getListings: getListings,
|
||||
activeTester: isListingActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { isOneOf, buildHash } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
@@ -14,8 +15,8 @@ function normalize(o) {
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
@@ -35,6 +36,7 @@ const config = {
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { buildHash, isOneOf } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
@@ -8,8 +9,8 @@ function normalize(o) {
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
@@ -30,6 +31,7 @@ const config = {
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { buildHash, isOneOf } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
let appliedBlacklistedDistricts = [];
|
||||
@@ -11,10 +12,10 @@ function normalize(o) {
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
const isBlacklistedDistrict =
|
||||
appliedBlacklistedDistricts.length === 0 ? false : utils.isOneOf(o.description, appliedBlacklistedDistricts);
|
||||
appliedBlacklistedDistricts.length === 0 ? false : isOneOf(o.description, appliedBlacklistedDistricts);
|
||||
return o.title != null && !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
@@ -36,6 +37,7 @@ const config = {
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const metaInformation = {
|
||||
name: 'Ebay Kleinanzeigen',
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { isOneOf, buildHash } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
@@ -15,7 +16,7 @@ function normalize(o) {
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
return !utils.isOneOf(o.title, appliedBlackList);
|
||||
return !isOneOf(o.title, appliedBlackList);
|
||||
}
|
||||
|
||||
const config = {
|
||||
@@ -33,6 +34,7 @@ const config = {
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { isOneOf, buildHash } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
@@ -10,8 +11,8 @@ function normalize(o) {
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
return o.id != null && titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
@@ -31,6 +32,7 @@ const config = {
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { removeJobsByUserId } from './storage/jobStorage.js';
|
||||
import { config } from '../utils.js';
|
||||
import { getUsers } from './storage/userStorage.js';
|
||||
import logger from './logger.js';
|
||||
import { removeJobsByUserId } from '../storage/jobStorage.js';
|
||||
import { config } from '../../utils.js';
|
||||
import { getUsers } from '../storage/userStorage.js';
|
||||
import logger from '../logger.js';
|
||||
import cron from 'node-cron';
|
||||
|
||||
/**
|
||||
13
lib/services/crons/listing-alive-cron.js
Normal file
13
lib/services/crons/listing-alive-cron.js
Normal file
@@ -0,0 +1,13 @@
|
||||
import cron from 'node-cron';
|
||||
import runActiveChecker from '../listings/listingActiveService.js';
|
||||
|
||||
async function runTask() {
|
||||
await runActiveChecker();
|
||||
}
|
||||
|
||||
export async function initActiveCheckerCron() {
|
||||
//run directly on start
|
||||
await runTask();
|
||||
// then every day at 1 am
|
||||
cron.schedule('0 1 * * *', runTask);
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
import cron from 'node-cron';
|
||||
import { config, inDevMode } from '../../utils.js';
|
||||
import { trackMainEvent } from './Tracker.js';
|
||||
import { trackMainEvent } from '../tracking/Tracker.js';
|
||||
|
||||
async function runTask() {
|
||||
//make sure to only send tracking events if the user gave us the green light and we are not in dev mode
|
||||
@@ -60,6 +60,7 @@ https://api.mobile.immobilienscout24.de/search/map/v3?publishedafter=2025-05-14T
|
||||
https://api.mobile.immobilienscout24.de/search/map/v3?features=disableNHBGrouping,nextGen,fairPrice,listingsInListFirstSummary,xxlListingType,contactDetails&publishedafter=2025-05-14T09:19:43&sorting=standard&pagesize=300&searchType=shape&realEstateType=housebuy&pagenumber=1&shape=%7D%7BjwHy%7Cqh@jCKdCgAvB_BdB%7DBzAaCjAqCfAqC~@uCt@iCh@eCZkCLyC?_EO%7DEa@%7DEa@iE_@%7BD%5DaDe@gDi@gDo@uCu@kBcB_AeDOiE?iDCgCMuBOkDCkG?yFRgD%60@cB%5C%7BA%60@eBx@aB%7C@kAbAy@rAe@bBUxCAhE?dFh@fGlAzGbBbHlBxGdB%60FrAhDz@xBh@nAf@l@RNNXkCkMJR~B%7CEnCpErCnDtClCvC~ApCh@rCJpC?
|
||||
*/
|
||||
import queryString from 'query-string';
|
||||
import { nullOrEmpty } from '../../utils.js';
|
||||
|
||||
const PARAM_NAME_MAP = {
|
||||
heatingtypes: 'heatingtypes',
|
||||
@@ -193,3 +194,14 @@ export function convertWebToMobile(webUrl) {
|
||||
|
||||
return `https://api.mobile.immobilienscout24.de/search/list?${mobileQuery}`;
|
||||
}
|
||||
|
||||
export function convertImmoscoutListingToMobileListing(url) {
|
||||
if (nullOrEmpty(url)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return url.replace(
|
||||
/^https:\/\/www\.immobilienscout24\.de\/expose\//,
|
||||
'https://api.mobile.immobilienscout24.de/expose/',
|
||||
);
|
||||
}
|
||||
|
||||
105
lib/services/listings/listingActiveService.js
Normal file
105
lib/services/listings/listingActiveService.js
Normal file
@@ -0,0 +1,105 @@
|
||||
import { deactivateListings, getActiveOrUnknownListings } from '../storage/listingsStorage.js';
|
||||
import { getProviders } from '../../utils.js';
|
||||
import logger from '../../services/logger.js';
|
||||
|
||||
/**
|
||||
* Runs the active-listing checker:
|
||||
* 1) Loads all listings with unknown or active status.
|
||||
* 2) Resolves each listing's provider and calls its `activeTester(link)`.
|
||||
* 3) Collects listings that are no longer active and deactivates them in one batch.
|
||||
*
|
||||
* Concurrency: network-bound checks are executed with a configurable concurrency limit.
|
||||
*
|
||||
* @param {object} [opts]
|
||||
* @param {number} [opts.concurrency=8] Max number of parallel activeTester calls.
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
export default async function runActiveChecker(opts = {}) {
|
||||
const { concurrency = 4 } = opts;
|
||||
|
||||
const listings = getActiveOrUnknownListings();
|
||||
if (!Array.isArray(listings) || listings.length === 0) {
|
||||
logger.debug('No listings to check.');
|
||||
return;
|
||||
}
|
||||
|
||||
const providers = await getProviders();
|
||||
if (!Array.isArray(providers) || providers.length === 0) {
|
||||
logger.warn('No providers available. Skipping active checks.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Build a map for O(1) provider lookup by id
|
||||
/** @type {Record<string, any>} */
|
||||
const providerById = Object.create(null);
|
||||
for (const p of providers) {
|
||||
const id = p?.metaInformation?.id;
|
||||
if (id) providerById[id] = p;
|
||||
}
|
||||
|
||||
// Small generic mapLimit to cap concurrency without extra deps
|
||||
/**
|
||||
* @template T, R
|
||||
* @param {T[]} items
|
||||
* @param {number} limit
|
||||
* @param {(item: T, index: number) => Promise<R>} worker
|
||||
* @returns {Promise<R[]>}
|
||||
*/
|
||||
async function mapLimit(items, limit, worker) {
|
||||
const results = new Array(items.length);
|
||||
let next = 0;
|
||||
|
||||
async function runOne() {
|
||||
while (next < items.length) {
|
||||
const i = next++;
|
||||
try {
|
||||
results[i] = await worker(items[i], i);
|
||||
} catch (err) {
|
||||
results[i] = /** @type {any} */ (err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const runners = Array.from({ length: Math.min(limit, items.length) }, runOne);
|
||||
await Promise.all(runners);
|
||||
return results;
|
||||
}
|
||||
|
||||
/** @type {string[]} */
|
||||
const listingsSetToInactive = [];
|
||||
|
||||
await mapLimit(listings, concurrency, async (listing) => {
|
||||
const { provider: listingProviderId, link, id } = listing || {};
|
||||
|
||||
const matchedProvider = providerById[listingProviderId];
|
||||
if (!matchedProvider) {
|
||||
logger.warn('Could not find matching provider for', listingProviderId);
|
||||
return;
|
||||
}
|
||||
const tester = matchedProvider?.config?.activeTester;
|
||||
if (typeof tester !== 'function') {
|
||||
logger.warn('No activeTester configured for', listingProviderId);
|
||||
return;
|
||||
}
|
||||
|
||||
// Contract: activeTester(link) returns 1 if active, 0 if inactive
|
||||
let result;
|
||||
try {
|
||||
result = await tester(link);
|
||||
} catch (err) {
|
||||
logger.warn('activeTester failed for', listingProviderId, link, err);
|
||||
return;
|
||||
}
|
||||
|
||||
if (result === 0 && id) {
|
||||
listingsSetToInactive.push(id);
|
||||
}
|
||||
});
|
||||
|
||||
if (listingsSetToInactive.length > 0) {
|
||||
logger.info(`Setting ${listingsSetToInactive.length} listings to inactive.`);
|
||||
deactivateListings(listingsSetToInactive);
|
||||
} else {
|
||||
logger.debug('No listings need to be set inactive.');
|
||||
}
|
||||
}
|
||||
68
lib/services/listings/listingActiveTester.js
Normal file
68
lib/services/listings/listingActiveTester.js
Normal file
@@ -0,0 +1,68 @@
|
||||
import fetch from 'node-fetch';
|
||||
import { randomBetween, sleep } from '../../utils.js';
|
||||
|
||||
const maxAttempts = 3;
|
||||
|
||||
/**
|
||||
* Check if a listing is still active with up to 3 attempts and exponential backoff.
|
||||
* Backoff waits are capped and the last wait is at most 2000 ms.
|
||||
*
|
||||
* Rules:
|
||||
* - HTTP 200 => return 1
|
||||
* - HTTP 401/403 => return -1 (most certainly detected as a bot)
|
||||
* - HTTP 404 => return 0
|
||||
* - Other statuses or network errors => retry until attempts are exhausted
|
||||
*
|
||||
* @returns {Promise<Integer>} 1 if active, o if not active and -1 if detected as bot
|
||||
*/
|
||||
export default async function checkIfListingIsActive(link) {
|
||||
await sleep(randomBetween(50, 100));
|
||||
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
||||
try {
|
||||
const res = await fetch(link, {
|
||||
headers: {
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
|
||||
'Accept-Language': 'de-DE,de;q=0.9,en;q=0.8',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.status === 200) {
|
||||
return 1;
|
||||
}
|
||||
if (res.status === 401) return -1;
|
||||
if (res.status === 403) return -1;
|
||||
if (res.status === 404) return 0;
|
||||
|
||||
// For any other status, only retry if attempts remain
|
||||
if (attempt < maxAttempts) {
|
||||
await sleep(backoffDelay(attempt));
|
||||
continue;
|
||||
}
|
||||
|
||||
return 0;
|
||||
} catch {
|
||||
// Network error: retry if attempts remain
|
||||
if (attempt < maxAttempts) {
|
||||
await sleep(backoffDelay(attempt));
|
||||
continue;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Exponential backoff delay with cap.
|
||||
* attempt: 1 -> 500ms, 2 -> 1000ms, 3 -> 2000ms (cap)
|
||||
* @param {number} attempt 1-based attempt index
|
||||
* @returns {number} delay in ms
|
||||
*/
|
||||
function backoffDelay(attempt) {
|
||||
const base = 500;
|
||||
const cap = 2000;
|
||||
return Math.min(base * 2 ** (attempt - 1), cap);
|
||||
}
|
||||
@@ -53,6 +53,36 @@ export const getKnownListingHashesForJobAndProvider = (jobId, providerId) => {
|
||||
).map((r) => r.hash);
|
||||
};
|
||||
|
||||
/**
|
||||
* Return a list of listing that either are active or have an unknown status
|
||||
* to constantly check if they are still online
|
||||
*
|
||||
* @returns {string[]} Array of listings
|
||||
*/
|
||||
export const getActiveOrUnknownListings = () => {
|
||||
return SqliteConnection.query(
|
||||
`SELECT *
|
||||
FROM listings
|
||||
WHERE is_active is null OR is_active = 1 ORDER BY provider`,
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Deactivates listings by setting is_active = 0 for all matching IDs.
|
||||
*
|
||||
* @param {string[]} ids - Array of listing IDs to deactivate.
|
||||
* @returns {object[]} Result of the SQLite query execution.
|
||||
*/
|
||||
export const deactivateListings = (ids) => {
|
||||
const placeholders = ids.map(() => '?').join(',');
|
||||
return SqliteConnection.execute(
|
||||
`UPDATE listings
|
||||
SET is_active = 0
|
||||
WHERE id IN (${placeholders})`,
|
||||
ids,
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Persist a batch of scraped listings for a given job and provider.
|
||||
*
|
||||
@@ -86,9 +116,9 @@ export const storeListings = (jobId, providerId, listings) => {
|
||||
SqliteConnection.withTransaction((db) => {
|
||||
const stmt = db.prepare(
|
||||
`INSERT INTO listings (id, hash, provider, job_id, price, size, title, image_url, description, address,
|
||||
link, created_at)
|
||||
link, created_at, is_active)
|
||||
VALUES (@id, @hash, @provider, @job_id, @price, @size, @title, @image_url, @description, @address, @link,
|
||||
@created_at)
|
||||
@created_at, 1)
|
||||
ON CONFLICT(job_id, hash) DO NOTHING`,
|
||||
);
|
||||
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
// Migration: there needs to be a unique index on job_id and hash as only
|
||||
// this makes the listing indeed unique
|
||||
|
||||
export function up(db) {
|
||||
db.exec(`
|
||||
ALTER TABLE listings ADD COLUMN is_active INTEGER DEFAULT 1;
|
||||
`);
|
||||
}
|
||||
64
lib/utils.js
64
lib/utils.js
@@ -1,5 +1,6 @@
|
||||
import { dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath, pathToFileURL } from 'node:url';
|
||||
import { readFile } from 'fs/promises';
|
||||
import { createHash } from 'crypto';
|
||||
import { DEFAULT_CONFIG } from './defaultConfig.js';
|
||||
@@ -11,6 +12,26 @@ const RE_GT = />/g;
|
||||
const RE_WEBP = /\/format\/webp/gi;
|
||||
const RE_EXT = /\.(jpe?g|png|gif)(\?.*)?$/i;
|
||||
const HTTPS_PREFIX = 'https://';
|
||||
const providersDirectoryPath = `${getDirName()}/provider`;
|
||||
|
||||
/**
|
||||
* Lazily load all provider modules from the provider directory.
|
||||
* Caches the resolved array to avoid re-importing on subsequent calls.
|
||||
*
|
||||
* @returns {Promise<any[]>} A list of loaded provider modules.
|
||||
*/
|
||||
let cachedProvidersPromise = null;
|
||||
|
||||
export function getProviders() {
|
||||
if (!cachedProvidersPromise) {
|
||||
/** @type {string[]} */
|
||||
const providerFileNames = fs.readdirSync(providersDirectoryPath).filter((fileName) => fileName.endsWith('.js'));
|
||||
cachedProvidersPromise = Promise.all(
|
||||
providerFileNames.map((fileName) => import(pathToFileURL(path.join(providersDirectoryPath, fileName)).href)),
|
||||
);
|
||||
}
|
||||
return cachedProvidersPromise;
|
||||
}
|
||||
|
||||
/**
|
||||
* Safely stringify a value to JSON for storage.
|
||||
@@ -21,7 +42,7 @@ const HTTPS_PREFIX = 'https://';
|
||||
* @param {T} v - Any JSON-serializable value.
|
||||
* @returns {string|null} JSON string or null.
|
||||
*/
|
||||
export const toJson = (v) => (v == null ? null : JSON.stringify(v));
|
||||
const toJson = (v) => (v == null ? null : JSON.stringify(v));
|
||||
|
||||
/**
|
||||
* Safely parse JSON text coming from storage.
|
||||
@@ -33,7 +54,7 @@ export const toJson = (v) => (v == null ? null : JSON.stringify(v));
|
||||
* @param {T} fallback - Value to return when txt is null/invalid.
|
||||
* @returns {T} Parsed value or fallback.
|
||||
*/
|
||||
export const fromJson = (txt, fallback) => {
|
||||
const fromJson = (txt, fallback) => {
|
||||
if (txt == null) return fallback;
|
||||
try {
|
||||
return JSON.parse(txt);
|
||||
@@ -213,23 +234,40 @@ async function getPackageVersion() {
|
||||
return 'N/A';
|
||||
}
|
||||
|
||||
/**
|
||||
* Sleep helper
|
||||
* @param {number} ms milliseconds to wait
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
function sleep(ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* returns a random into between start and end
|
||||
* @param a start int
|
||||
* @param b max int
|
||||
* @returns {*}
|
||||
*/
|
||||
function randomBetween(a, b) {
|
||||
return Math.floor(Math.random() * (b - a + 1)) + a;
|
||||
}
|
||||
|
||||
// Call refreshConfig() from the application entrypoint during startup to populate config.
|
||||
await refreshConfig();
|
||||
|
||||
export { isOneOf };
|
||||
export { normalizeImageUrl };
|
||||
export { inDevMode };
|
||||
export { nullOrEmpty };
|
||||
export { duringWorkingHoursOrNotSet };
|
||||
export { getDirName };
|
||||
export { config };
|
||||
export { buildHash };
|
||||
export { getPackageVersion };
|
||||
export default {
|
||||
export {
|
||||
isOneOf,
|
||||
normalizeImageUrl,
|
||||
inDevMode,
|
||||
nullOrEmpty,
|
||||
duringWorkingHoursOrNotSet,
|
||||
getDirName,
|
||||
sleep,
|
||||
randomBetween,
|
||||
config,
|
||||
buildHash,
|
||||
getPackageVersion,
|
||||
toJson,
|
||||
fromJson,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user