mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
Merge branch 'master' of github.com:orangecoding/fredy
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
import { NoNewListingsWarning } from './errors.js';
|
||||
import { setKnownListings, getKnownListings } from './services/storage/listingsStorage.js';
|
||||
import { storeListings, getKnownListingHashesForJobAndProvider } from './services/storage/listingsStorage.js';
|
||||
import * as notify from './notification/notify.js';
|
||||
import Extractor from './services/extractor/extractor.js';
|
||||
import urlModifier from './services/queryStringMutator.js';
|
||||
import logger from './services/logger.js';
|
||||
|
||||
class FredyRuntime {
|
||||
/**
|
||||
@@ -59,7 +60,7 @@ class FredyRuntime {
|
||||
})
|
||||
.catch((err) => {
|
||||
reject(err);
|
||||
console.error(err);
|
||||
logger.error(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -76,7 +77,9 @@ class FredyRuntime {
|
||||
}
|
||||
|
||||
_findNew(listings) {
|
||||
const newListings = listings.filter((o) => getKnownListings(this._jobKey, this._providerId)[o.id] == null);
|
||||
const hashes = getKnownListingHashesForJobAndProvider(this._jobKey, this._providerId) || [];
|
||||
|
||||
const newListings = listings.filter((o) => !hashes.includes(o.id));
|
||||
if (newListings.length === 0) {
|
||||
throw new NoNewListingsWarning();
|
||||
}
|
||||
@@ -92,30 +95,24 @@ class FredyRuntime {
|
||||
}
|
||||
|
||||
_save(newListings) {
|
||||
const currentListings = getKnownListings(this._jobKey, this._providerId) || {};
|
||||
newListings.forEach((listing) => {
|
||||
currentListings[listing.id] = Date.now();
|
||||
});
|
||||
setKnownListings(this._jobKey, this._providerId, currentListings);
|
||||
storeListings(this._jobKey, this._providerId, newListings);
|
||||
return newListings;
|
||||
}
|
||||
|
||||
_filterBySimilarListings(listings) {
|
||||
const filteredList = listings.filter((listing) => {
|
||||
const similar = this._similarityCache.hasSimilarEntries(this._jobKey, listing.title);
|
||||
const similar = this._similarityCache.hasSimilarEntries(listing.title, listing.address);
|
||||
if (similar) {
|
||||
/* eslint-disable no-console */
|
||||
console.debug(`Filtering similar entry for job with id ${this._jobKey} with title: `, listing.title);
|
||||
/* eslint-enable no-console */
|
||||
logger.debug(`Filtering similar entry for title: ${listing.title} and address ${listing.address}`);
|
||||
}
|
||||
return !similar;
|
||||
});
|
||||
filteredList.forEach((filter) => this._similarityCache.addCacheEntry(this._jobKey, filter.title));
|
||||
filteredList.forEach((filter) => this._similarityCache.addCacheEntry(filter.title, listings.address));
|
||||
return filteredList;
|
||||
}
|
||||
|
||||
_handleError(err) {
|
||||
if (err.name !== 'NoNewListingsWarning') console.error(err);
|
||||
if (err.name !== 'NoNewListingsWarning') logger.error(err);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,12 +7,14 @@ import { loginRouter } from './routes/loginRoute.js';
|
||||
import { config } from '../utils.js';
|
||||
import { userRouter } from './routes/userRoute.js';
|
||||
import { jobRouter } from './routes/jobRouter.js';
|
||||
import { versionRouter } from './routes/versionRouter.js';
|
||||
import bodyParser from 'body-parser';
|
||||
import restana from 'restana';
|
||||
import files from 'serve-static';
|
||||
import path from 'path';
|
||||
import { getDirName } from '../utils.js';
|
||||
import { demoRouter } from './routes/demoRouter.js';
|
||||
import logger from '../services/logger.js';
|
||||
const service = restana();
|
||||
const staticService = files(path.join(getDirName(), '../ui/public'));
|
||||
const PORT = config.port || 9998;
|
||||
@@ -22,6 +24,7 @@ service.use(cookieSession());
|
||||
service.use(staticService);
|
||||
service.use('/api/admin', authInterceptor());
|
||||
service.use('/api/jobs', authInterceptor());
|
||||
service.use('/api/version', authInterceptor());
|
||||
// /admin can only be accessed when user is having admin permissions
|
||||
service.use('/api/admin', adminInterceptor());
|
||||
service.use('/api/jobs/notificationAdapter', notificationAdapterRouter);
|
||||
@@ -29,12 +32,12 @@ service.use('/api/admin/generalSettings', generalSettingsRouter);
|
||||
service.use('/api/jobs/provider', providerRouter);
|
||||
service.use('/api/jobs/insights', analyticsRouter);
|
||||
service.use('/api/admin/users', userRouter);
|
||||
service.use('/api/version', versionRouter);
|
||||
service.use('/api/jobs', jobRouter);
|
||||
service.use('/api/login', loginRouter);
|
||||
//this route is unsecured intentionally as it is being queried from the login page
|
||||
service.use('/api/demo', demoRouter);
|
||||
|
||||
/* eslint-disable no-console */
|
||||
service.start(PORT).then(() => {
|
||||
console.info(`Started API service on port ${PORT}`);
|
||||
logger.debug(`Started API service on port ${PORT}`);
|
||||
});
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import restana from 'restana';
|
||||
import { config, getDirName, readConfigFromStorage, refreshConfig } from '../../utils.js';
|
||||
import fs from 'fs';
|
||||
import { handleDemoUser } from '../../services/storage/userStorage.js';
|
||||
import { ensureDemoUserExists } from '../../services/storage/userStorage.js';
|
||||
import logger from '../../services/logger.js';
|
||||
const service = restana();
|
||||
const generalSettingsRouter = service.newRouter();
|
||||
generalSettingsRouter.get('/', async (req, res) => {
|
||||
@@ -18,9 +19,9 @@ generalSettingsRouter.post('/', async (req, res) => {
|
||||
const currentConfig = await readConfigFromStorage();
|
||||
fs.writeFileSync(`${getDirName()}/../conf/config.json`, JSON.stringify({ ...currentConfig, ...settings }));
|
||||
await refreshConfig();
|
||||
handleDemoUser();
|
||||
ensureDemoUserExists();
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
logger.error(err);
|
||||
res.send(new Error('Error while trying to write settings.'));
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -3,9 +3,12 @@ import * as jobStorage from '../../services/storage/jobStorage.js';
|
||||
import * as userStorage from '../../services/storage/userStorage.js';
|
||||
import { config } from '../../utils.js';
|
||||
import { isAdmin } from '../security.js';
|
||||
import { trackDemoJobCreated } from '../../services/tracking/Tracker.js';
|
||||
import logger from '../../services/logger.js';
|
||||
import { bus } from '../../services/events/event-bus.js';
|
||||
|
||||
const service = restana();
|
||||
const jobRouter = service.newRouter();
|
||||
|
||||
function doesJobBelongsToUser(job, req) {
|
||||
const userId = req.session.currentUser;
|
||||
if (userId == null) {
|
||||
@@ -17,6 +20,7 @@ function doesJobBelongsToUser(job, req) {
|
||||
}
|
||||
return user.isAdmin || job.userId === user.id;
|
||||
}
|
||||
|
||||
jobRouter.get('/', async (req, res) => {
|
||||
const isUserAdmin = isAdmin(req);
|
||||
//show only the jobs which belongs to the user (or all of the user is an admin)
|
||||
@@ -30,6 +34,12 @@ jobRouter.get('/processingTimes', async (req, res) => {
|
||||
};
|
||||
res.send();
|
||||
});
|
||||
|
||||
jobRouter.post('/startAll', async (req, res) => {
|
||||
bus.emit('jobs:runAll');
|
||||
res.send();
|
||||
});
|
||||
|
||||
jobRouter.post('/', async (req, res) => {
|
||||
const { provider, notificationAdapter, name, blacklist = [], jobId, enabled } = req.body;
|
||||
try {
|
||||
@@ -44,13 +54,8 @@ jobRouter.post('/', async (req, res) => {
|
||||
});
|
||||
} catch (error) {
|
||||
res.send(new Error(error));
|
||||
console.error(error);
|
||||
logger.error(error);
|
||||
}
|
||||
trackDemoJobCreated({
|
||||
name,
|
||||
provider,
|
||||
adapter: notificationAdapter,
|
||||
});
|
||||
res.send();
|
||||
});
|
||||
jobRouter.delete('', async (req, res) => {
|
||||
@@ -64,7 +69,7 @@ jobRouter.delete('', async (req, res) => {
|
||||
}
|
||||
} catch (error) {
|
||||
res.send(new Error(error));
|
||||
console.error(error);
|
||||
logger.error(error);
|
||||
}
|
||||
res.send();
|
||||
});
|
||||
@@ -83,7 +88,7 @@ jobRouter.put('/:jobId/status', async (req, res) => {
|
||||
}
|
||||
} catch (error) {
|
||||
res.send(new Error(error));
|
||||
console.error(error);
|
||||
logger.error(error);
|
||||
}
|
||||
res.send();
|
||||
});
|
||||
|
||||
@@ -3,6 +3,7 @@ import * as userStorage from '../../services/storage/userStorage.js';
|
||||
import * as hasher from '../../services/security/hash.js';
|
||||
import { config } from '../../utils.js';
|
||||
import { trackDemoAccessed } from '../../services/tracking/Tracker.js';
|
||||
import logger from '../../services/logger.js';
|
||||
const service = restana();
|
||||
const loginRouter = service.newRouter();
|
||||
loginRouter.get('/user', async (req, res) => {
|
||||
@@ -27,7 +28,7 @@ loginRouter.post('/', async (req, res) => {
|
||||
}
|
||||
if (user.password === hasher.hash(password)) {
|
||||
if (config.demoMode) {
|
||||
trackDemoAccessed();
|
||||
await trackDemoAccessed();
|
||||
}
|
||||
|
||||
req.session.currentUser = user.id;
|
||||
@@ -35,7 +36,7 @@ loginRouter.post('/', async (req, res) => {
|
||||
res.send(200);
|
||||
return;
|
||||
} else {
|
||||
console.error(`User ${username} tried to login, but password was wrong.`);
|
||||
logger.error(`User ${username} tried to login, but password was wrong.`);
|
||||
}
|
||||
res.send(401);
|
||||
});
|
||||
|
||||
30
lib/api/routes/versionRouter.js
Normal file
30
lib/api/routes/versionRouter.js
Normal file
@@ -0,0 +1,30 @@
|
||||
import restana from 'restana';
|
||||
import fetch from 'node-fetch';
|
||||
import { getPackageVersion } from '../../utils.js';
|
||||
|
||||
const service = restana();
|
||||
const versionRouter = service.newRouter();
|
||||
|
||||
versionRouter.get('/', async (req, res) => {
|
||||
const versionPayload = await getCurrentVersionFromGithub();
|
||||
res.body = versionPayload == null ? { newVersion: false } : versionPayload;
|
||||
res.send();
|
||||
});
|
||||
|
||||
async function getCurrentVersionFromGithub() {
|
||||
const raw = await fetch('https://api.github.com/repos/orangecoding/fredy/releases/latest');
|
||||
const data = await raw.json();
|
||||
const localFredyVersion = await getPackageVersion();
|
||||
if (localFredyVersion === data.tag_name) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
newVersion: true,
|
||||
version: data.tag_name,
|
||||
url: data.html_url,
|
||||
body: data.body,
|
||||
localFredyVersion,
|
||||
};
|
||||
}
|
||||
|
||||
export { versionRouter };
|
||||
@@ -4,4 +4,6 @@ export const DEFAULT_CONFIG = {
|
||||
workingHours: { from: '', to: '' },
|
||||
demoMode: false,
|
||||
analyticsEnabled: null,
|
||||
// Default path for sqlite storage directory. Interpreted relative to project root.
|
||||
sqlitepath: '/db',
|
||||
};
|
||||
|
||||
@@ -5,6 +5,7 @@ import Handlebars from 'handlebars';
|
||||
import fetch from 'node-fetch';
|
||||
import { markdown2Html } from '../../services/markdown.js';
|
||||
import { getDirName, normalizeImageUrl } from '../../utils.js';
|
||||
import logger from '../../services/logger.js';
|
||||
|
||||
const __dirname = getDirName();
|
||||
const template = fs.readFileSync(path.resolve(__dirname + '/notification/emailTemplate/template.hbs'), 'utf8');
|
||||
@@ -24,7 +25,7 @@ const toBase64 = async (url) => {
|
||||
const ab = await res.arrayBuffer();
|
||||
return Buffer.from(ab).toString('base64');
|
||||
} catch (error) {
|
||||
console.error(`Error fetching image from ${url}:`, error.message);
|
||||
logger.error(`Error fetching image from ${url}:`, error.message);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
@@ -62,7 +63,7 @@ const mapListingsWithCid = async (serviceName, jobKey, listings) => {
|
||||
item.hasImage = true;
|
||||
item.imageCid = cid;
|
||||
} catch (error) {
|
||||
console.warn(`Skipping image for listing ${i} due to error: ${error.message}`);
|
||||
logger.warn(`Skipping image for listing ${i} due to error: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,18 @@
|
||||
import { markdown2Html } from '../../services/markdown.js';
|
||||
import Database from 'better-sqlite3';
|
||||
export const send = ({ serviceName, newListings, jobKey }) => {
|
||||
const db = new Database('db/listings.db');
|
||||
import path from 'path';
|
||||
import fs from 'fs';
|
||||
|
||||
export const send = ({ serviceName, newListings, jobKey, notificationConfig }) => {
|
||||
const sqliteConfig = notificationConfig.find((adapter) => adapter.id === config.id);
|
||||
const dbPath = sqliteConfig?.fields?.dbPath || 'db/listings.db';
|
||||
|
||||
const dbDir = path.dirname(dbPath);
|
||||
if (!fs.existsSync(dbDir)) {
|
||||
fs.mkdirSync(dbDir, { recursive: true });
|
||||
}
|
||||
|
||||
const db = new Database(dbPath);
|
||||
const fields = [
|
||||
'serviceName',
|
||||
'jobKey',
|
||||
@@ -30,8 +41,16 @@ export const send = ({ serviceName, newListings, jobKey }) => {
|
||||
};
|
||||
export const config = {
|
||||
id: 'sqlite',
|
||||
name: 'Sqlite',
|
||||
description: 'This adapter stores listings in a local sqlite3 database.',
|
||||
config: {},
|
||||
name: 'SQLite',
|
||||
description: 'This adapter stores listings in a local SQLite 3 database.',
|
||||
fields: {
|
||||
dbPath: {
|
||||
type: 'text',
|
||||
label: 'Database Path',
|
||||
description:
|
||||
'Path to the SQLite database file (e.g., db/listings.db). If not specified, defaults to db/listings.db',
|
||||
placeholder: 'db/listings.db',
|
||||
},
|
||||
},
|
||||
readme: markdown2Html('lib/notification/adapter/sqlite.md'),
|
||||
};
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
### Sqlite Adapter
|
||||
### SQLite Adapter
|
||||
|
||||
This adapter stores search results in a sqlite database located in db/listings.db. This file can be used for further analysis later on.
|
||||
This adapter stores search results in an SQLite database. By default, the database is located at `db/listings.db`, but you can configure a custom location. This file can be used for further analysis later.
|
||||
|
||||
Fields are:
|
||||
The database table contains the following columns (all stored as `TEXT` type):
|
||||
|
||||
```
|
||||
['serviceName', 'jobKey', 'id', 'size', 'rooms', 'price', 'address', 'title', 'link', 'description']
|
||||
['serviceName', 'jobKey', 'id', 'size', 'rooms', 'price', 'address', 'title', 'link', 'description', 'image']
|
||||
```
|
||||
|
||||
@@ -63,31 +63,41 @@ export const send = ({ serviceName, newListings, notificationConfig, jobKey }) =
|
||||
const jobName = job == null ? jobKey : job.name;
|
||||
|
||||
const throttledCall = getThrottled(chatId, async function (endpoint, body) {
|
||||
await fetch(`https://api.telegram.org/bot${token}/${endpoint}`, {
|
||||
const res = await fetch(`https://api.telegram.org/bot${token}/${endpoint}`, {
|
||||
method: 'post',
|
||||
body: JSON.stringify(body),
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
return res;
|
||||
});
|
||||
|
||||
const promises = newListings.map(async (o) => {
|
||||
const img = normalizeImageUrl(o.image);
|
||||
const textPayload = {
|
||||
chat_id: chatId,
|
||||
text: buildText(jobName, serviceName, o),
|
||||
parse_mode: 'HTML',
|
||||
disable_web_page_preview: true,
|
||||
};
|
||||
|
||||
if (img) {
|
||||
return throttledCall('sendPhoto', {
|
||||
if (!img) {
|
||||
return throttledCall('sendMessage', textPayload);
|
||||
}
|
||||
|
||||
try {
|
||||
return await throttledCall('sendPhoto', {
|
||||
chat_id: chatId,
|
||||
photo: img,
|
||||
caption: buildCaption(jobName, serviceName, o),
|
||||
parse_mode: 'HTML',
|
||||
});
|
||||
} catch (e) {
|
||||
// If we see a timeout due to sending an image, try sending it without
|
||||
if (e && (e.code === 'ETIMEDOUT' || e.errno === 'ETIMEDOUT')) {
|
||||
return throttledCall('sendMessage', textPayload);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
|
||||
return throttledCall('sendMessage', {
|
||||
chat_id: chatId,
|
||||
text: buildText(jobName, serviceName, o),
|
||||
parse_mode: 'HTML',
|
||||
disable_web_page_preview: true,
|
||||
});
|
||||
});
|
||||
|
||||
return Promise.all(promises);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { buildHash, isOneOf } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
@@ -7,7 +8,8 @@ function normalize(o) {
|
||||
const price = normalizePrice(o.price);
|
||||
const id = buildHash(o.id, price);
|
||||
const image = baseUrl + o.image;
|
||||
return Object.assign(o, { id, price, link, image });
|
||||
const address = o.address == null ? null : o.address.trim().replaceAll('/', ',');
|
||||
return Object.assign(o, { id, price, link, image, address });
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -28,8 +30,8 @@ function normalizePrice(price) {
|
||||
return result[0];
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
@@ -44,9 +46,11 @@ const config = {
|
||||
size: '.tabelle .tabelle_inhalt_infos .single_data_box | removeNewline | trim',
|
||||
title: '.inner_object_data .tabelle_inhalt_titel_black | removeNewline | trim',
|
||||
image: '.inner_object_pic img@src',
|
||||
address: '.tabelle .tabelle_inhalt_infos .left_information > div:nth-child(2) | removeNewline | trim',
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { buildHash, isOneOf } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
@@ -12,10 +13,10 @@ function parseId(shortenedLink) {
|
||||
|
||||
function normalize(o) {
|
||||
const baseUrl = 'https://www.immobilien.de';
|
||||
const size = o.size || 'N/A m²';
|
||||
const price = o.price || 'N/A €';
|
||||
const size = o.size || null;
|
||||
const price = o.price || null;
|
||||
const title = o.title || 'No title available';
|
||||
const address = o.address || 'No address available';
|
||||
const address = o.address || null;
|
||||
const shortLink = shortenLink(o.link);
|
||||
const link = `${baseUrl}/${shortLink}`;
|
||||
const image = baseUrl + o.image;
|
||||
@@ -24,8 +25,8 @@ function normalize(o) {
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
@@ -46,6 +47,7 @@ const config = {
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -1,25 +1,19 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { isOneOf, buildHash } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
let appliedBlackList = [];
|
||||
|
||||
/**
|
||||
* Note, Immonet is rly a piece of sh*t. It is using a weird combination of React and some buttons (instead of links),
|
||||
* so that if somebody clicks the listing, a new page will open with the actual link to the listing. Of course, a scraper
|
||||
* cannot do this (which is why I always just return the link to the whole list of listings).
|
||||
* This is not only bad for us, but also bad for ppl with disabilities...
|
||||
*/
|
||||
|
||||
function normalize(o) {
|
||||
const size = o.size != null ? o.size.replace('Wohnfläche ', '') : 'N/A m²';
|
||||
const price = o.price.replace('Kaufpreis ', '');
|
||||
const address = o.address?.split(' • ')?.pop() ?? null;
|
||||
const title = o.title || 'No title available';
|
||||
const link = config.url;
|
||||
const link = o.link != null ? decodeURIComponent(o.link) : config.url;
|
||||
const id = buildHash(title, price);
|
||||
return Object.assign(o, { id, address, price, size, title, link });
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
const config = {
|
||||
@@ -28,15 +22,17 @@ const config = {
|
||||
sortByDateParam: 'sortby=19',
|
||||
waitForSelector: 'div[data-testid="serp-gridcontainer-testid"]',
|
||||
crawlFields: {
|
||||
id: 'button@title |trim', // immonet is a piece of sh*t. See comment above
|
||||
id: 'button@title |trim',
|
||||
title: 'button@title |trim',
|
||||
price: 'div[data-testid="cardmfe-price-testid"] | trim',
|
||||
size: 'div[data-testid="cardmfe-keyfacts-testid"] | trim',
|
||||
address: 'div[data-testid="cardmfe-description-box-address"] | trim',
|
||||
image: 'div[data-testid="cardmfe-picture-box-test-id"] img@src',
|
||||
link: 'button@data-base',
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
*
|
||||
* The mobile API provides the following endpoints:
|
||||
* - GET /search/total?{search parameters}: Returns the total number of listings for the given query
|
||||
* Example: `curl -H "User-Agent: ImmoScout24_1410_30_._" https://api.mobile.immobilienscout24.de/search/total?searchType=region&realestatetype=apartmentrent&pricetype=calculatedtotalrent&geocodes=%2Fde%2Fberlin%2Fberlin `
|
||||
* Example: `curl -H "User-Agent: ImmoScout_27.3_26.0_._" https://api.mobile.immobilienscout24.de/search/total?searchType=region&realestatetype=apartmentrent&pricetype=calculatedtotalrent&geocodes=%2Fde%2Fberlin%2Fberlin `
|
||||
*
|
||||
* - POST /search/list?{search parameters}: Actually retrieves the listings. Body is json encoded and contains
|
||||
* data specifying additional results (advertisements) to return. The format is as follows:
|
||||
@@ -15,12 +15,12 @@
|
||||
* ```
|
||||
* It is not necessary to provide data for the specified keys.
|
||||
*
|
||||
* Example: `curl -X POST 'https://api.mobile.immobilienscout24.de/search/list?pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region&geocodes=%2Fde%2Fberlin%2Fberlin&pagenumber=1' -H "Connection: keep-alive" -H "User-Agent: ImmoScout24_1410_30_._" -H "Accept: application/json" -H "Content-Type: application/json" -d '{"supportedResultListType": [], "userData": {}}'`
|
||||
* Example: `curl -X POST 'https://api.mobile.immobilienscout24.de/search/list?pricetype=calculatedtotalrent&realestatetype=apartmentrent&searchType=region&geocodes=%2Fde%2Fberlin%2Fberlin&pagenumber=1' -H "Connection: keep-alive" -H "User-Agent: ImmoScout_27.3_26.0_._" -H "Accept: application/json" -H "Content-Type: application/json" -d '{"supportedResultListType": [], "userData": {}}'`
|
||||
|
||||
* - GET /expose/{id} - Returns the details of a listing. The response contains additional details not included in the
|
||||
* listing response.
|
||||
*
|
||||
* Example: `curl -H "User-Agent: ImmoScout24_1410_30_._" "https://api.mobile.immobilienscout24.de/expose/158382494"`
|
||||
* Example: `curl -H "User-Agent: ImmoScout_27.3_26.0_._" "https://api.mobile.immobilienscout24.de/expose/158382494"`
|
||||
*
|
||||
*
|
||||
* It is necessary to set the correct User Agent (see `getListings`) in the request header.
|
||||
@@ -35,15 +35,19 @@
|
||||
*
|
||||
*/
|
||||
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { convertWebToMobile } from '../services/immoscout/immoscout-web-translator.js';
|
||||
import { buildHash, isOneOf } from '../utils.js';
|
||||
import {
|
||||
convertImmoscoutListingToMobileListing,
|
||||
convertWebToMobile,
|
||||
} from '../services/immoscout/immoscout-web-translator.js';
|
||||
import logger from '../services/logger.js';
|
||||
let appliedBlackList = [];
|
||||
|
||||
async function getListings(url) {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'User-Agent': 'ImmoScout24_1410_30_._',
|
||||
'User-Agent': 'ImmoScout_27.3_26.0_._',
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
@@ -52,7 +56,7 @@ async function getListings(url) {
|
||||
}),
|
||||
});
|
||||
if (!response.ok) {
|
||||
console.error('Error fetching data from ImmoScout Mobile API:', response.statusText);
|
||||
logger.error('Error fetching data from ImmoScout Mobile API:', response.statusText);
|
||||
return [];
|
||||
}
|
||||
|
||||
@@ -68,6 +72,7 @@ async function getListings(url) {
|
||||
price: price?.value,
|
||||
size: size?.value,
|
||||
title: item.title,
|
||||
description: item.description,
|
||||
link: `${metaInformation.baseUrl}expose/${item.id}`,
|
||||
address: item.address?.line,
|
||||
image,
|
||||
@@ -75,6 +80,25 @@ async function getListings(url) {
|
||||
});
|
||||
}
|
||||
|
||||
async function isListingActive(link) {
|
||||
const result = await fetch(convertImmoscoutListingToMobileListing(link), {
|
||||
headers: {
|
||||
'User-Agent': 'ImmoScout_27.3_26.0_._',
|
||||
},
|
||||
});
|
||||
|
||||
if (result.status === 200) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (result.status === 404) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
logger.warn('Unknown status for immoscout listing', link);
|
||||
return -1;
|
||||
}
|
||||
|
||||
function nullOrEmpty(val) {
|
||||
return val == null || val.length === 0;
|
||||
}
|
||||
@@ -85,7 +109,7 @@ function normalize(o) {
|
||||
return Object.assign(o, { id, title, address });
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
return !utils.isOneOf(o.title, appliedBlackList);
|
||||
return !isOneOf(o.title, appliedBlackList);
|
||||
}
|
||||
const config = {
|
||||
url: null,
|
||||
@@ -102,6 +126,7 @@ const config = {
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
getListings: getListings,
|
||||
activeTester: isListingActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { isOneOf, buildHash } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
@@ -14,8 +15,8 @@ function normalize(o) {
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
@@ -35,6 +36,7 @@ const config = {
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { buildHash, isOneOf } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
@@ -8,8 +9,8 @@ function normalize(o) {
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
@@ -30,6 +31,7 @@ const config = {
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { buildHash, isOneOf } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
let appliedBlacklistedDistricts = [];
|
||||
@@ -11,10 +12,10 @@ function normalize(o) {
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
const isBlacklistedDistrict =
|
||||
appliedBlacklistedDistricts.length === 0 ? false : utils.isOneOf(o.description, appliedBlacklistedDistricts);
|
||||
appliedBlacklistedDistricts.length === 0 ? false : isOneOf(o.description, appliedBlacklistedDistricts);
|
||||
return o.title != null && !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
@@ -36,6 +37,7 @@ const config = {
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const metaInformation = {
|
||||
name: 'Ebay Kleinanzeigen',
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { isOneOf, buildHash } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
@@ -15,14 +16,14 @@ function normalize(o) {
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
return !utils.isOneOf(o.title, appliedBlackList);
|
||||
return !isOneOf(o.title, appliedBlackList);
|
||||
}
|
||||
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.col-12.mb-4',
|
||||
sortByDateParam: 'Sortierung=Id&Richtung=DESC',
|
||||
waitForSelector: '.nbk-section',
|
||||
waitForSelector: 'div[data-live-name-value="SearchList"]',
|
||||
crawlFields: {
|
||||
id: 'a@href',
|
||||
title: 'a@title | removeNewline | trim',
|
||||
@@ -33,6 +34,7 @@ const config = {
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import utils, { buildHash } from '../utils.js';
|
||||
import { isOneOf, buildHash } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
@@ -10,8 +11,8 @@ function normalize(o) {
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
return o.id != null && titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
@@ -31,6 +32,7 @@ const config = {
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
|
||||
23
lib/services/crons/demoCleanup-cron.js
Normal file
23
lib/services/crons/demoCleanup-cron.js
Normal file
@@ -0,0 +1,23 @@
|
||||
import { removeJobsByUserId } from '../storage/jobStorage.js';
|
||||
import { config } from '../../utils.js';
|
||||
import { getUsers } from '../storage/userStorage.js';
|
||||
import logger from '../logger.js';
|
||||
import cron from 'node-cron';
|
||||
|
||||
/**
|
||||
* if we are running in demo environment, we have to cleanup the db files (specifically the jobs table)
|
||||
*/
|
||||
export function cleanupDemoAtMidnight() {
|
||||
cron.schedule('0 0 * * *', cleanup);
|
||||
}
|
||||
|
||||
function cleanup() {
|
||||
if (config.demoMode) {
|
||||
const demoUser = getUsers(false).find((user) => user.username === 'demo');
|
||||
if (demoUser == null) {
|
||||
logger.error('Demo user not found, cannot remove Jobs');
|
||||
return;
|
||||
}
|
||||
removeJobsByUserId(demoUser.id);
|
||||
}
|
||||
}
|
||||
13
lib/services/crons/listing-alive-cron.js
Normal file
13
lib/services/crons/listing-alive-cron.js
Normal file
@@ -0,0 +1,13 @@
|
||||
import cron from 'node-cron';
|
||||
import runActiveChecker from '../listings/listingActiveService.js';
|
||||
|
||||
async function runTask() {
|
||||
await runActiveChecker();
|
||||
}
|
||||
|
||||
export async function initActiveCheckerCron() {
|
||||
//run directly on start
|
||||
await runTask();
|
||||
// then every day at 1 am
|
||||
cron.schedule('0 1 * * *', runTask);
|
||||
}
|
||||
17
lib/services/crons/tracker-cron.js
Normal file
17
lib/services/crons/tracker-cron.js
Normal file
@@ -0,0 +1,17 @@
|
||||
import cron from 'node-cron';
|
||||
import { config, inDevMode } from '../../utils.js';
|
||||
import { trackMainEvent } from '../tracking/Tracker.js';
|
||||
|
||||
async function runTask() {
|
||||
//make sure to only send tracking events if the user gave us the green light and we are not in dev mode
|
||||
if (config.analyticsEnabled && !inDevMode()) {
|
||||
await trackMainEvent();
|
||||
}
|
||||
}
|
||||
|
||||
export async function initTrackerCron() {
|
||||
//run directly on start
|
||||
await runTask();
|
||||
// then every 6 hours
|
||||
cron.schedule('0 */6 * * *', runTask);
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
import { setInterval } from 'node:timers';
|
||||
import { removeJobsByUserName } from './storage/jobStorage.js';
|
||||
import { config } from '../utils.js';
|
||||
import { getUsers } from './storage/userStorage.js';
|
||||
|
||||
/**
|
||||
* if we are running in demo environment, we have to cleanup the db files (specifically the jobs table)
|
||||
*/
|
||||
export function cleanupDemoAtMidnight() {
|
||||
const now = new Date();
|
||||
const millisUntilMidnightUTC =
|
||||
(24 - now.getUTCHours()) * 60 * 60 * 1000 -
|
||||
now.getUTCMinutes() * 60 * 1000 -
|
||||
now.getUTCSeconds() * 1000 -
|
||||
now.getUTCMilliseconds();
|
||||
|
||||
cleanup();
|
||||
setTimeout(() => {
|
||||
setInterval(
|
||||
() => {
|
||||
cleanup();
|
||||
},
|
||||
24 * 60 * 60 * 1000,
|
||||
);
|
||||
}, millisUntilMidnightUTC);
|
||||
}
|
||||
|
||||
function cleanup() {
|
||||
if (config.demoMode) {
|
||||
const demoUser = getUsers(false).find((user) => user.username === 'demo');
|
||||
if (demoUser == null) {
|
||||
console.error('Demo user not found, cannot remove Jobs');
|
||||
return;
|
||||
}
|
||||
removeJobsByUserName(demoUser.id);
|
||||
}
|
||||
}
|
||||
2
lib/services/events/event-bus.js
Normal file
2
lib/services/events/event-bus.js
Normal file
@@ -0,0 +1,2 @@
|
||||
import { EventEmitter } from 'node:events';
|
||||
export const bus = new EventEmitter();
|
||||
@@ -1,6 +1,7 @@
|
||||
import { setDebug } from './utils.js';
|
||||
import puppeteerExtractor from './puppeteerExtractor.js';
|
||||
import { loadParser, parse } from './parser/parser.js';
|
||||
import logger from '../logger.js';
|
||||
|
||||
const DEFAULT_OPTIONS = {
|
||||
debug: false,
|
||||
@@ -32,7 +33,7 @@ export default class Extractor {
|
||||
loadParser(this.responseText);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error trying to load page.', error);
|
||||
logger.error('Error trying to load page.', error);
|
||||
}
|
||||
return this;
|
||||
};
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import * as cheerio from 'cheerio';
|
||||
import logger from '../../logger.js';
|
||||
|
||||
let $ = null;
|
||||
|
||||
@@ -8,19 +9,19 @@ export function loadParser(text) {
|
||||
|
||||
export function parse(crawlContainer, crawlFields, text, url) {
|
||||
if (!text) {
|
||||
console.warn('No content found for ', url);
|
||||
logger.debug('No content found for ', url);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!crawlContainer || !crawlFields) {
|
||||
console.warn('Cannot parse, selector was empty for url ', url);
|
||||
logger.debug('Cannot parse, selector was empty for url ', url);
|
||||
return null;
|
||||
}
|
||||
|
||||
const result = [];
|
||||
|
||||
if ($(crawlContainer).length === 0) {
|
||||
console.warn('No elements in crawl container found for url ', url);
|
||||
logger.debug('No elements in crawl container found for url ', url);
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -58,7 +59,7 @@ export function parse(crawlContainer, crawlFields, text, url) {
|
||||
|
||||
parsedObject[key] = value || null;
|
||||
} catch (error) {
|
||||
console.error(`Error parsing field '${key}' with selector '${fieldSelector}':`, error);
|
||||
logger.error(`Error parsing field '${key}' with selector '${fieldSelector}':`, error);
|
||||
parsedObject[key] = null;
|
||||
}
|
||||
}
|
||||
@@ -66,7 +67,7 @@ export function parse(crawlContainer, crawlFields, text, url) {
|
||||
if (parsedObject.id != null) {
|
||||
result.push(parsedObject);
|
||||
} else {
|
||||
console.warn('ID not found. Not relaying object.');
|
||||
logger.debug('ID not found. Not relaying object.');
|
||||
}
|
||||
});
|
||||
|
||||
@@ -89,7 +90,7 @@ function applyModifiers(value, modifiers) {
|
||||
value = value.replace(/\n/g, ' ');
|
||||
break;
|
||||
default:
|
||||
console.warn(`Unknown modifier: ${modifier}`);
|
||||
logger.warn(`Unknown modifier: ${modifier}`);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -1,30 +1,57 @@
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { debug, DEFAULT_HEADER, botDetected } from './utils.js';
|
||||
import logger from '../logger.js';
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
import path from 'path';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
export default async function execute(url, waitForSelector, options) {
|
||||
let browser;
|
||||
let page;
|
||||
let result = null;
|
||||
let userDataDir;
|
||||
let removeUserDataDir = false;
|
||||
try {
|
||||
debug(`Sending request to ${url} using Puppeteer.`);
|
||||
|
||||
// Prepare a dedicated temporary userDataDir to avoid leaking /tmp/.org.chromium.* dirs
|
||||
if (options && options.userDataDir) {
|
||||
userDataDir = options.userDataDir;
|
||||
removeUserDataDir = !!options.cleanupUserDataDir;
|
||||
} else {
|
||||
const prefix = path.join(os.tmpdir(), 'puppeteer-fredy-');
|
||||
userDataDir = fs.mkdtempSync(prefix);
|
||||
removeUserDataDir = true;
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: options.puppeteerHeadless ?? true,
|
||||
args: ['--no-sandbox', '--disable-gpu', '--disable-setuid-sandbox'],
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-gpu',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-crash-reporter',
|
||||
],
|
||||
timeout: options.puppeteerTimeout || 30_000,
|
||||
userDataDir,
|
||||
});
|
||||
let page = await browser.newPage();
|
||||
page = await browser.newPage();
|
||||
await page.setExtraHTTPHeaders(DEFAULT_HEADER);
|
||||
const response = await page.goto(url, {
|
||||
waitUntil: 'domcontentloaded',
|
||||
});
|
||||
let pageSource;
|
||||
//if we're extracting data from a spa, we must wait for the selector
|
||||
// if we're extracting data from a SPA, we must wait for the selector
|
||||
if (waitForSelector != null) {
|
||||
await page.waitForSelector(waitForSelector);
|
||||
const selectorTimeout = options?.puppeteerSelectorTimeout ?? options?.puppeteerTimeout ?? 30_000;
|
||||
await page.waitForSelector(waitForSelector, { timeout: selectorTimeout });
|
||||
pageSource = await page.evaluate((selector) => {
|
||||
return document.querySelector(selector).innerHTML;
|
||||
const el = document.querySelector(selector);
|
||||
return el ? el.innerHTML : '';
|
||||
}, waitForSelector);
|
||||
} else {
|
||||
pageSource = await page.content();
|
||||
@@ -33,17 +60,36 @@ export default async function execute(url, waitForSelector, options) {
|
||||
const statusCode = response.status();
|
||||
|
||||
if (botDetected(pageSource, statusCode)) {
|
||||
console.warn('We have been detected as a bot :-/ Tried url: => ', url);
|
||||
return null;
|
||||
logger.warn('We have been detected as a bot :-/ Tried url: => ', url);
|
||||
result = null;
|
||||
} else {
|
||||
result = pageSource || (await page.content());
|
||||
}
|
||||
|
||||
return await page.content();
|
||||
} catch (error) {
|
||||
console.error('Error executing with puppeteer executor', error);
|
||||
return null;
|
||||
logger.error('Error executing with puppeteer executor', error);
|
||||
result = null;
|
||||
} finally {
|
||||
if (browser != null) {
|
||||
await browser.close();
|
||||
try {
|
||||
if (page) {
|
||||
await page.close();
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
try {
|
||||
if (browser != null) {
|
||||
await browser.close();
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
try {
|
||||
if (removeUserDataDir && userDataDir) {
|
||||
await fs.promises.rm(userDataDir, { recursive: true, force: true });
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import logger from '../logger.js';
|
||||
|
||||
let debuggingOn = false;
|
||||
|
||||
export const DEFAULT_HEADER = {
|
||||
@@ -15,9 +17,7 @@ export const setDebug = (options) => {
|
||||
|
||||
export const debug = (message) => {
|
||||
if (debuggingOn) {
|
||||
/* eslint-disable no-console */
|
||||
console.debug(message);
|
||||
/* eslint-enable no-console */
|
||||
logger.debug(message);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -60,6 +60,7 @@ https://api.mobile.immobilienscout24.de/search/map/v3?publishedafter=2025-05-14T
|
||||
https://api.mobile.immobilienscout24.de/search/map/v3?features=disableNHBGrouping,nextGen,fairPrice,listingsInListFirstSummary,xxlListingType,contactDetails&publishedafter=2025-05-14T09:19:43&sorting=standard&pagesize=300&searchType=shape&realEstateType=housebuy&pagenumber=1&shape=%7D%7BjwHy%7Cqh@jCKdCgAvB_BdB%7DBzAaCjAqCfAqC~@uCt@iCh@eCZkCLyC?_EO%7DEa@%7DEa@iE_@%7BD%5DaDe@gDi@gDo@uCu@kBcB_AeDOiE?iDCgCMuBOkDCkG?yFRgD%60@cB%5C%7BA%60@eBx@aB%7C@kAbAy@rAe@bBUxCAhE?dFh@fGlAzGbBbHlBxGdB%60FrAhDz@xBh@nAf@l@RNNXkCkMJR~B%7CEnCpErCnDtClCvC~ApCh@rCJpC?
|
||||
*/
|
||||
import queryString from 'query-string';
|
||||
import { nullOrEmpty } from '../../utils.js';
|
||||
|
||||
const PARAM_NAME_MAP = {
|
||||
heatingtypes: 'heatingtypes',
|
||||
@@ -193,3 +194,14 @@ export function convertWebToMobile(webUrl) {
|
||||
|
||||
return `https://api.mobile.immobilienscout24.de/search/list?${mobileQuery}`;
|
||||
}
|
||||
|
||||
export function convertImmoscoutListingToMobileListing(url) {
|
||||
if (nullOrEmpty(url)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return url.replace(
|
||||
/^https:\/\/www\.immobilienscout24\.de\/expose\//,
|
||||
'https://api.mobile.immobilienscout24.de/expose/',
|
||||
);
|
||||
}
|
||||
|
||||
104
lib/services/listings/listingActiveService.js
Normal file
104
lib/services/listings/listingActiveService.js
Normal file
@@ -0,0 +1,104 @@
|
||||
import { deactivateListings, getActiveOrUnknownListings } from '../storage/listingsStorage.js';
|
||||
import { getProviders } from '../../utils.js';
|
||||
import logger from '../../services/logger.js';
|
||||
|
||||
/**
|
||||
* Runs the active-listing checker:
|
||||
* 1) Loads all listings with unknown or active status.
|
||||
* 2) Resolves each listing's provider and calls its `activeTester(link)`.
|
||||
* 3) Collects listings that are no longer active and deactivates them in one batch.
|
||||
*
|
||||
* Concurrency: network-bound checks are executed with a configurable concurrency limit.
|
||||
*
|
||||
* @param {object} [opts]
|
||||
* @param {number} [opts.concurrency=8] Max number of parallel activeTester calls.
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
export default async function runActiveChecker(opts = {}) {
|
||||
const { concurrency = 4 } = opts;
|
||||
|
||||
const listings = getActiveOrUnknownListings();
|
||||
if (!Array.isArray(listings) || listings.length === 0) {
|
||||
logger.debug('No listings to check.');
|
||||
return;
|
||||
}
|
||||
|
||||
const providers = await getProviders();
|
||||
if (!Array.isArray(providers) || providers.length === 0) {
|
||||
logger.warn('No providers available. Skipping active checks.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Build a map for O(1) provider lookup by id
|
||||
/** @type {Record<string, any>} */
|
||||
const providerById = Object.create(null);
|
||||
for (const p of providers) {
|
||||
const id = p?.metaInformation?.id;
|
||||
if (id) providerById[id] = p;
|
||||
}
|
||||
|
||||
// Small generic mapLimit to cap concurrency without extra deps
|
||||
/**
|
||||
* @template T, R
|
||||
* @param {T[]} items
|
||||
* @param {number} limit
|
||||
* @param {(item: T, index: number) => Promise<R>} worker
|
||||
* @returns {Promise<R[]>}
|
||||
*/
|
||||
async function mapLimit(items, limit, worker) {
|
||||
const results = new Array(items.length);
|
||||
let next = 0;
|
||||
|
||||
async function runOne() {
|
||||
while (next < items.length) {
|
||||
const i = next++;
|
||||
try {
|
||||
results[i] = await worker(items[i], i);
|
||||
} catch (err) {
|
||||
results[i] = /** @type {any} */ (err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const runners = Array.from({ length: Math.min(limit, items.length) }, runOne);
|
||||
await Promise.all(runners);
|
||||
return results;
|
||||
}
|
||||
|
||||
/** @type {string[]} */
|
||||
const listingsSetToInactive = [];
|
||||
|
||||
await mapLimit(listings, concurrency, async (listing) => {
|
||||
const { provider: listingProviderId, link, id } = listing || {};
|
||||
|
||||
const matchedProvider = providerById[listingProviderId];
|
||||
if (!matchedProvider) {
|
||||
logger.warn('Could not find matching provider for', listingProviderId);
|
||||
return;
|
||||
}
|
||||
const tester = matchedProvider?.config?.activeTester;
|
||||
if (typeof tester !== 'function') {
|
||||
logger.warn('No activeTester configured for', listingProviderId);
|
||||
return;
|
||||
}
|
||||
|
||||
// Contract: activeTester(link) returns 1 if active, 0 if inactive
|
||||
let result;
|
||||
try {
|
||||
result = await tester(link);
|
||||
} catch {
|
||||
result = -1;
|
||||
}
|
||||
|
||||
if (result === 0 && id) {
|
||||
listingsSetToInactive.push(id);
|
||||
}
|
||||
});
|
||||
|
||||
if (listingsSetToInactive.length > 0) {
|
||||
logger.info(`Setting ${listingsSetToInactive.length} listings to inactive.`);
|
||||
deactivateListings(listingsSetToInactive);
|
||||
} else {
|
||||
logger.debug('No listings need to be set inactive.');
|
||||
}
|
||||
}
|
||||
68
lib/services/listings/listingActiveTester.js
Normal file
68
lib/services/listings/listingActiveTester.js
Normal file
@@ -0,0 +1,68 @@
|
||||
import fetch from 'node-fetch';
|
||||
import { randomBetween, sleep } from '../../utils.js';
|
||||
|
||||
const maxAttempts = 3;
|
||||
|
||||
/**
|
||||
* Check if a listing is still active with up to 3 attempts and exponential backoff.
|
||||
* Backoff waits are capped and the last wait is at most 2000 ms.
|
||||
*
|
||||
* Rules:
|
||||
* - HTTP 200 => return 1
|
||||
* - HTTP 401/403 => return -1 (most certainly detected as a bot)
|
||||
* - HTTP 404 => return 0
|
||||
* - Other statuses or network errors => retry until attempts are exhausted
|
||||
*
|
||||
* @returns {Promise<Integer>} 1 if active, o if not active and -1 if detected as bot
|
||||
*/
|
||||
export default async function checkIfListingIsActive(link) {
|
||||
await sleep(randomBetween(50, 100));
|
||||
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
||||
try {
|
||||
const res = await fetch(link, {
|
||||
headers: {
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
|
||||
'Accept-Language': 'de-DE,de;q=0.9,en;q=0.8',
|
||||
},
|
||||
});
|
||||
|
||||
if (res.status === 200) {
|
||||
return 1;
|
||||
}
|
||||
if (res.status === 401) return -1;
|
||||
if (res.status === 403) return -1;
|
||||
if (res.status === 404) return 0;
|
||||
|
||||
// For any other status, only retry if attempts remain
|
||||
if (attempt < maxAttempts) {
|
||||
await sleep(backoffDelay(attempt));
|
||||
continue;
|
||||
}
|
||||
|
||||
return 0;
|
||||
} catch {
|
||||
// Network error: retry if attempts remain
|
||||
if (attempt < maxAttempts) {
|
||||
await sleep(backoffDelay(attempt));
|
||||
continue;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Exponential backoff delay with cap.
|
||||
* attempt: 1 -> 500ms, 2 -> 1000ms, 3 -> 2000ms (cap)
|
||||
* @param {number} attempt 1-based attempt index
|
||||
* @returns {number} delay in ms
|
||||
*/
|
||||
function backoffDelay(attempt) {
|
||||
const base = 500;
|
||||
const cap = 2000;
|
||||
return Math.min(base * 2 ** (attempt - 1), cap);
|
||||
}
|
||||
59
lib/services/logger.js
Normal file
59
lib/services/logger.js
Normal file
@@ -0,0 +1,59 @@
|
||||
const COLORS = {
|
||||
debug: '\x1b[36m',
|
||||
info: '\x1b[32m',
|
||||
warn: '\x1b[33m',
|
||||
error: '\x1b[31m',
|
||||
reset: '\x1b[0m',
|
||||
};
|
||||
|
||||
const env = process.env.NODE_ENV || 'development';
|
||||
const useColor = process.stdout.isTTY || process.stderr.isTTY;
|
||||
|
||||
function ts() {
|
||||
const d = new Date();
|
||||
const yyyy = d.getFullYear();
|
||||
const mm = String(d.getMonth() + 1).padStart(2, '0');
|
||||
const dd = String(d.getDate()).padStart(2, '0');
|
||||
const hh = String(d.getHours()).padStart(2, '0');
|
||||
const mi = String(d.getMinutes()).padStart(2, '0');
|
||||
const ss = String(d.getSeconds()).padStart(2, '0');
|
||||
return `${yyyy}-${mm}-${dd} ${hh}:${mi}:${ss}`;
|
||||
}
|
||||
|
||||
function lvl(level) {
|
||||
const upper = level.toUpperCase();
|
||||
if (!useColor) return upper;
|
||||
return `${COLORS[level] || ''}${upper}${COLORS.reset}`;
|
||||
}
|
||||
|
||||
/* eslint-disable no-console */
|
||||
function log(level, ...args) {
|
||||
if (level === 'debug' && env !== 'development') {
|
||||
return; // Skip debug logs in non-development environments
|
||||
}
|
||||
|
||||
const prefix = `[${ts()}] ${lvl(level)}:`;
|
||||
switch (level) {
|
||||
case 'debug':
|
||||
console.debug(prefix, ...args);
|
||||
break;
|
||||
case 'info':
|
||||
console.info(prefix, ...args);
|
||||
break;
|
||||
case 'warn':
|
||||
console.warn(prefix, ...args);
|
||||
break;
|
||||
case 'error':
|
||||
console.error(prefix, ...args);
|
||||
break;
|
||||
default:
|
||||
console.log(prefix, ...args);
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
debug: (...a) => log('debug', ...a),
|
||||
info: (...a) => log('info', ...a),
|
||||
warn: (...a) => log('warn', ...a),
|
||||
error: (...a) => log('error', ...a),
|
||||
};
|
||||
@@ -1,26 +0,0 @@
|
||||
import stringSimilarity from 'string-similarity';
|
||||
//if the score is higher than this, it will be considered a match
|
||||
const MAX_DICE_INDEX = 0.7;
|
||||
export default (class SimilarityCacheEntry {
|
||||
constructor(time) {
|
||||
this.time = time;
|
||||
this.values = [];
|
||||
}
|
||||
setCacheEntry = (entry) => {
|
||||
this.values.push(entry);
|
||||
};
|
||||
getTime = () => {
|
||||
return this.time;
|
||||
};
|
||||
hasSimilarEntries = (value) => {
|
||||
if (this.values.length > 0) {
|
||||
for (let i = 0; i < this.values.length; i++) {
|
||||
const index = stringSimilarity.compareTwoStrings(value, this.values[i]);
|
||||
if (index >= MAX_DICE_INDEX) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
});
|
||||
@@ -1,40 +1,116 @@
|
||||
import SimilarityCacheEntry from './SimilarityCacheEntry.js';
|
||||
import { config } from '../../utils.js';
|
||||
//5 minutes
|
||||
let retention = 5 * 60 * 1000;
|
||||
const intervalInMs = config.interval * 60 * 1000;
|
||||
//an interval below 5 mins sounds crazy, but there are ppl out there doing crazy shit.
|
||||
if (intervalInMs <= retention) {
|
||||
retention = Math.floor(intervalInMs / 2);
|
||||
}
|
||||
//jobid -> SimilarityCacheEntry
|
||||
const cache = {};
|
||||
let intervalId;
|
||||
import crypto from 'crypto';
|
||||
|
||||
const retention = 60 * 60 * 1000;
|
||||
/**
|
||||
* cleanup
|
||||
* Internal cache storage.
|
||||
* Maps a SHA-256 hash (string) to its expiry timestamp (number in ms).
|
||||
* @type {Map<string, number>}
|
||||
*/
|
||||
intervalId = setInterval(() => {
|
||||
const keysToBeRemoved = [];
|
||||
const entries = new Map();
|
||||
|
||||
/**
|
||||
* Reference to the currently scheduled cleanup timer.
|
||||
* @type {NodeJS.Timeout | null}
|
||||
*/
|
||||
let timer = null;
|
||||
|
||||
/**
|
||||
* Generate a SHA-256 hash from a list of input strings.
|
||||
* Null or undefined values are ignored.
|
||||
*
|
||||
* @param {...(string|null|undefined)} strings - Input values to hash
|
||||
* @returns {string} Hexadecimal hash
|
||||
*/
|
||||
function toHash(...strings) {
|
||||
return crypto.createHash('sha256').update(strings.filter(Boolean).join('|')).digest('hex');
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup expired cache entries and schedule the next cleanup run.
|
||||
* This function is invoked automatically by scheduled timers.
|
||||
*
|
||||
* @private
|
||||
*/
|
||||
function runCleanup() {
|
||||
const now = Date.now();
|
||||
Object.keys(cache).forEach((key) => {
|
||||
if (cache[key].getTime() + retention < now) {
|
||||
keysToBeRemoved.push(key);
|
||||
}
|
||||
});
|
||||
if (keysToBeRemoved.length > 0) {
|
||||
keysToBeRemoved.forEach((key) => delete cache[key]);
|
||||
for (const [hash, expiry] of entries) {
|
||||
if (expiry <= now) entries.delete(hash);
|
||||
}
|
||||
}, 10000);
|
||||
export const addCacheEntry = (jobId, value) => {
|
||||
cache[jobId] = cache[jobId] || new SimilarityCacheEntry(Date.now());
|
||||
cache[jobId].setCacheEntry(value);
|
||||
};
|
||||
export const hasSimilarEntries = (jobId, value) => {
|
||||
if (cache[jobId] == null) {
|
||||
scheduleNext();
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the soonest expiry timestamp among all cache entries
|
||||
* and schedule a one-shot timer that will trigger at that time.
|
||||
* Cancels any existing timer before scheduling a new one.
|
||||
*
|
||||
* @private
|
||||
*/
|
||||
function scheduleNext() {
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
timer = null;
|
||||
}
|
||||
let next = Infinity;
|
||||
const now = Date.now();
|
||||
for (const expiry of entries.values()) {
|
||||
if (expiry > now && expiry < next) next = expiry;
|
||||
}
|
||||
if (next !== Infinity) {
|
||||
timer = setTimeout(runCleanup, Math.max(0, next - now));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add or refresh a cache entry for the given title and address.
|
||||
* The entry will automatically expire after the configured retention window.
|
||||
*
|
||||
* @param {string} title - The title used to build the cache key
|
||||
* @param {string} address - The address used to build the cache key
|
||||
*/
|
||||
export function addCacheEntry(title, address) {
|
||||
const hash = toHash(title, address);
|
||||
const expiry = Date.now() + retention;
|
||||
entries.set(hash, expiry);
|
||||
scheduleNext();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a cache entry with the same title and address exists
|
||||
* and is still valid (not expired).
|
||||
*
|
||||
* @param {string} title - The title used to build the cache key
|
||||
* @param {string} address - The address used to build the cache key
|
||||
* @returns {boolean} True if a valid cache entry exists, false otherwise
|
||||
*/
|
||||
export function hasSimilarEntries(title, address) {
|
||||
const hash = toHash(title, address);
|
||||
const expiry = entries.get(hash);
|
||||
if (expiry == null) return false;
|
||||
if (expiry <= Date.now()) {
|
||||
entries.delete(hash);
|
||||
scheduleNext();
|
||||
return false;
|
||||
}
|
||||
return cache[jobId].hasSimilarEntries(value);
|
||||
};
|
||||
export const stopCacheCleanup = () => {
|
||||
clearInterval(intervalId);
|
||||
};
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop any scheduled cleanup timers and prevent further automatic cleanup.
|
||||
* Entries that are already in the cache will remain until removed manually
|
||||
* or until cleanup is started again by adding new entries.
|
||||
*/
|
||||
export function stopCacheCleanup() {
|
||||
if (timer) clearTimeout(timer);
|
||||
timer = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* this is only for test purposes
|
||||
*/
|
||||
export function invalidateAllForTest() {
|
||||
for (const key of entries.keys()) {
|
||||
entries.set(key, 0);
|
||||
}
|
||||
runCleanup();
|
||||
}
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
import lodash from 'lodash';
|
||||
import { LowSync } from 'lowdb';
|
||||
export default class LowdashAdapter extends LowSync {
|
||||
constructor(adapter, defaultData = {}) {
|
||||
super(adapter, defaultData);
|
||||
this.chain = lodash.chain(this).get('data');
|
||||
}
|
||||
}
|
||||
140
lib/services/storage/SqliteConnection.js
Normal file
140
lib/services/storage/SqliteConnection.js
Normal file
@@ -0,0 +1,140 @@
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import Database from 'better-sqlite3';
|
||||
import logger from '../../services/logger.js';
|
||||
import { config } from '../../utils.js';
|
||||
|
||||
/**
|
||||
* SqliteConnection
|
||||
* A small, high-performance wrapper around better-sqlite3 that provides a
|
||||
* singleton connection, sensible PRAGMA tuning, and helper methods. This
|
||||
* module is safe to import and reuse.
|
||||
*
|
||||
* Performance notes:
|
||||
* - journal_mode = WAL: allows concurrent readers with a single writer and
|
||||
* yields better performance for server apps.
|
||||
* - synchronous = NORMAL: trades a bit of durability for significant speed
|
||||
* while still being safe in most environments.
|
||||
* - cache_size = -64000: ~64MB page cache (negative value sets KB) to improve
|
||||
* query performance for frequent reads.
|
||||
* - foreign_keys = ON: ensure referential integrity is enforced.
|
||||
* - optimize: runs SQLite's auto-analysis and purges internal caches. It is
|
||||
* cheap; we call it at startup and before process exit. You can also call
|
||||
* optimize() manually after large schema changes or bulk operations.
|
||||
*/
|
||||
class SqliteConnection {
|
||||
static #db = null;
|
||||
|
||||
/**
|
||||
* Returns a singleton instance of better-sqlite3 Database.
|
||||
* Respects env var SQLITE_DB_PATH and defaults to db/listings.db.
|
||||
*/
|
||||
static getConnection() {
|
||||
if (this.#db) return this.#db;
|
||||
|
||||
// Interpret config.sqlitepath as a directory relative to project root when it starts with '/'
|
||||
const cfg = typeof config === 'object' && config ? config.sqlitepath : undefined;
|
||||
const rawDir = cfg && cfg.length > 0 ? cfg : '/db';
|
||||
const relDir = rawDir.startsWith('/') ? rawDir.slice(1) : rawDir;
|
||||
const absDir = path.isAbsolute(relDir) ? relDir : path.join(process.cwd(), relDir);
|
||||
const dbPath = path.join(absDir, 'listings.db');
|
||||
|
||||
// Ensure directory exists
|
||||
const dir = path.dirname(dbPath);
|
||||
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
||||
|
||||
// Open the database synchronously (better-sqlite3 is sync and very fast)
|
||||
this.#db = new Database(dbPath, { verbose: undefined });
|
||||
|
||||
// Apply high-performance PRAGMA's
|
||||
try {
|
||||
this.#db.pragma('journal_mode = WAL');
|
||||
this.#db.pragma('synchronous = NORMAL');
|
||||
this.#db.pragma('cache_size = -64000');
|
||||
this.#db.pragma('foreign_keys = ON');
|
||||
this.#db.pragma('optimize');
|
||||
} catch (e) {
|
||||
logger.warn('Failed to apply one or more PRAGMAs:', e.message);
|
||||
}
|
||||
|
||||
// Run optimize on exit to persist analysis and cleanup internal caches.
|
||||
process.once('beforeExit', () => {
|
||||
try {
|
||||
this.#db?.pragma('optimize');
|
||||
} catch (e) {
|
||||
logger.debug('PRAGMA optimize on exit failed:', e.message);
|
||||
}
|
||||
});
|
||||
|
||||
return this.#db;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a write statement (INSERT/UPDATE/DELETE/DDL). Returns better-sqlite3 run info.
|
||||
*/
|
||||
static execute(sql, params = {}) {
|
||||
const db = this.getConnection();
|
||||
return db.prepare(sql).run(params);
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a query and returns all rows.
|
||||
*/
|
||||
static query(sql, params = {}) {
|
||||
const db = this.getConnection();
|
||||
return db.prepare(sql).all(params);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a table exists.
|
||||
*/
|
||||
static tableExists(tableName) {
|
||||
const db = this.getConnection();
|
||||
const row = db.prepare("SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?").get(tableName);
|
||||
return !!row;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the given callback inside a transaction. The callback receives the Database instance.
|
||||
* If the callback throws, the transaction is rolled back and the error re-thrown.
|
||||
*/
|
||||
static withTransaction(callback) {
|
||||
const db = this.getConnection();
|
||||
const trx = db.transaction((cb) => cb(db));
|
||||
return trx(callback);
|
||||
}
|
||||
|
||||
/**
|
||||
* Run SQLite PRAGMA optimize. See https://sqlite.org/pragma.html#pragma_optimize
|
||||
*
|
||||
* Explanation: PRAGMA optimize triggers internal housekeeping, such as
|
||||
* recomputing query planner statistics (similar to ANALYZE) when appropriate
|
||||
* and purging unused pages from caches. It is inexpensive and can improve
|
||||
* performance after schema changes or heavy write activity.
|
||||
*/
|
||||
static optimize() {
|
||||
const db = this.getConnection();
|
||||
try {
|
||||
db.pragma('optimize');
|
||||
} catch (e) {
|
||||
logger.warn('PRAGMA optimize failed:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the database connection. Typically not needed for long-running apps.
|
||||
*/
|
||||
static close() {
|
||||
if (this.#db) {
|
||||
try {
|
||||
this.#db.pragma('optimize');
|
||||
} catch (e) {
|
||||
logger.debug('PRAGMA optimize before close failed:', e.message);
|
||||
}
|
||||
this.#db.close();
|
||||
this.#db = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export default SqliteConnection;
|
||||
@@ -1,107 +1,144 @@
|
||||
import { JSONFileSync } from 'lowdb/node';
|
||||
import { nanoid } from 'nanoid';
|
||||
import * as listingStorage from './listingsStorage.js';
|
||||
import { getDirName } from '../../utils.js';
|
||||
import path from 'path';
|
||||
import LowdashAdapter from './LowDashAdapter.js';
|
||||
|
||||
const file = path.join(getDirName(), '../', 'db/jobs.json');
|
||||
const adapter = new JSONFileSync(file);
|
||||
const db = new LowdashAdapter(adapter, { jobs: [] });
|
||||
|
||||
db.read();
|
||||
import SqliteConnection from './SqliteConnection.js';
|
||||
import logger from '../logger.js';
|
||||
import { toJson, fromJson } from '../../utils.js';
|
||||
|
||||
/**
|
||||
* Insert or update a job. Preserves original owner (userId) when updating an existing job.
|
||||
*
|
||||
* @param {Object} params
|
||||
* @param {string} [params.jobId] - Existing job id to update; omit to insert a new job.
|
||||
* @param {string} [params.name] - Job display name.
|
||||
* @param {Array<any>} [params.blacklist] - Blacklist entries; defaults to empty array.
|
||||
* @param {boolean} [params.enabled] - Whether the job is enabled; defaults to true.
|
||||
* @param {Array<any>} params.provider - Provider configuration list.
|
||||
* @param {Array<any>} params.notificationAdapter - Notification adapter configuration list.
|
||||
* @param {string} params.userId - Owner user id for inserts; preserved on updates.
|
||||
* @returns {void}
|
||||
*/
|
||||
export const upsertJob = ({ jobId, name, blacklist = [], enabled = true, provider, notificationAdapter, userId }) => {
|
||||
const currentJob =
|
||||
jobId == null
|
||||
? null
|
||||
: db.chain
|
||||
.get('jobs')
|
||||
.find((job) => job.id === jobId)
|
||||
.value();
|
||||
const jobs = db.chain
|
||||
.get('jobs')
|
||||
.filter((job) => job.id !== jobId)
|
||||
.value();
|
||||
jobs.push({
|
||||
id: jobId || nanoid(),
|
||||
//make sure to not overwrite the user id in case an admin changes the job
|
||||
userId: currentJob == null ? userId : currentJob.userId,
|
||||
enabled,
|
||||
name,
|
||||
blacklist,
|
||||
provider,
|
||||
notificationAdapter,
|
||||
});
|
||||
db.chain.set('jobs', jobs).value();
|
||||
db.write();
|
||||
};
|
||||
export const getJob = (jobId) => {
|
||||
const job = db.chain
|
||||
.get('jobs')
|
||||
.find((job) => job.id === jobId)
|
||||
.value();
|
||||
if (job == null) {
|
||||
return null;
|
||||
const id = jobId || nanoid();
|
||||
const existing = SqliteConnection.query(`SELECT id, user_id FROM jobs WHERE id = @id LIMIT 1`, { id })[0];
|
||||
const ownerId = existing ? existing.user_id : userId;
|
||||
if (existing) {
|
||||
SqliteConnection.execute(
|
||||
`UPDATE jobs
|
||||
SET enabled = @enabled,
|
||||
name = @name,
|
||||
blacklist = @blacklist,
|
||||
provider = @provider,
|
||||
notification_adapter = @notification_adapter
|
||||
WHERE id = @id`,
|
||||
{
|
||||
id,
|
||||
enabled: enabled ? 1 : 0,
|
||||
name: name ?? null,
|
||||
blacklist: toJson(blacklist ?? []),
|
||||
provider: toJson(provider ?? []),
|
||||
notification_adapter: toJson(notificationAdapter ?? []),
|
||||
},
|
||||
);
|
||||
} else {
|
||||
SqliteConnection.execute(
|
||||
`INSERT INTO jobs (id, user_id, enabled, name, blacklist, provider, notification_adapter)
|
||||
VALUES (@id, @user_id, @enabled, @name, @blacklist, @provider, @notification_adapter)`,
|
||||
{
|
||||
id,
|
||||
user_id: ownerId,
|
||||
enabled: enabled ? 1 : 0,
|
||||
name: name ?? null,
|
||||
blacklist: toJson(blacklist ?? []),
|
||||
provider: toJson(provider ?? []),
|
||||
notification_adapter: toJson(notificationAdapter ?? []),
|
||||
},
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Get a single job by id.
|
||||
* @param {string} jobId - Job primary key.
|
||||
* @returns {Job|null} The job or null if not found.
|
||||
*/
|
||||
export const getJob = (jobId) => {
|
||||
const row = SqliteConnection.query(
|
||||
`SELECT j.id,
|
||||
j.user_id AS userId,
|
||||
j.enabled,
|
||||
j.name,
|
||||
j.blacklist,
|
||||
j.provider,
|
||||
j.notification_adapter AS notificationAdapter,
|
||||
(SELECT COUNT(1) FROM listings l WHERE l.job_id = j.id) AS numberOfFoundListings
|
||||
FROM jobs j
|
||||
WHERE j.id = @id
|
||||
LIMIT 1`,
|
||||
{ id: jobId },
|
||||
)[0];
|
||||
if (!row) return null;
|
||||
return {
|
||||
...job,
|
||||
numberOfFoundListings: listingStorage.getNumberOfAllKnownListings(job.id).length,
|
||||
...row,
|
||||
enabled: !!row.enabled,
|
||||
blacklist: fromJson(row.blacklist, []),
|
||||
provider: fromJson(row.provider, []),
|
||||
notificationAdapter: fromJson(row.notificationAdapter, []),
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Update job enabled status.
|
||||
* @param {{jobId: string, status: boolean}} params - Parameters.
|
||||
* @returns {void}
|
||||
*/
|
||||
export const setJobStatus = ({ jobId, status }) => {
|
||||
db.chain
|
||||
.get('jobs')
|
||||
.find((job) => job.id === jobId)
|
||||
.assign({ enabled: status })
|
||||
.value();
|
||||
db.write();
|
||||
SqliteConnection.execute(`UPDATE jobs SET enabled = @enabled WHERE id = @id`, {
|
||||
id: jobId,
|
||||
enabled: status ? 1 : 0,
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Remove a job by id. Listings are deleted automatically due to FK ON DELETE CASCADE.
|
||||
* @param {string} jobId - Job id.
|
||||
* @returns {void}
|
||||
*/
|
||||
export const removeJob = (jobId) => {
|
||||
listingStorage.removeListings(jobId);
|
||||
db.chain
|
||||
.get('jobs')
|
||||
.remove((job) => job.id === jobId)
|
||||
.value();
|
||||
db.write();
|
||||
// listings table has FK ON DELETE CASCADE via job_id
|
||||
SqliteConnection.execute(`DELETE FROM jobs WHERE id = @id`, { id: jobId });
|
||||
};
|
||||
|
||||
export const removeJobsByUserId = (userId) => {
|
||||
db.chain
|
||||
.get('jobs')
|
||||
.filter((job) => job.userId === userId)
|
||||
.forEach((job) => listingStorage.removeListings(job.id));
|
||||
db.chain
|
||||
.get('jobs')
|
||||
.remove((job) => job.userId === userId)
|
||||
.value();
|
||||
db.write();
|
||||
};
|
||||
export const removeJobsByUserName = (userId) => {
|
||||
let removedDemoJobs = 0;
|
||||
db.chain
|
||||
.get('jobs')
|
||||
.filter((job) => job.userId === userId)
|
||||
.forEach((job) => {
|
||||
removedDemoJobs++;
|
||||
listingStorage.removeListings(job.id);
|
||||
});
|
||||
db.chain
|
||||
.get('jobs')
|
||||
.remove((job) => job.userId === userId)
|
||||
.value();
|
||||
db.write();
|
||||
if (removedDemoJobs > 0) {
|
||||
/* eslint-disable no-console */
|
||||
console.log(`Removed ${removedDemoJobs} demo jobs`);
|
||||
/* eslint-enable no-console */
|
||||
// Count jobs to log similar to previous behavior
|
||||
const count =
|
||||
SqliteConnection.query(`SELECT COUNT(1) AS c FROM jobs WHERE user_id = @user_id`, { user_id: userId })[0]?.c ?? 0;
|
||||
SqliteConnection.execute(`DELETE FROM jobs WHERE user_id = @user_id`, { user_id: userId });
|
||||
if (count > 0) {
|
||||
logger.info(`Removed ${count} jobs for user ${userId}`);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Get all jobs.
|
||||
* @returns {Job[]} List of jobs ordered by name (NULLs last).
|
||||
*/
|
||||
export const getJobs = () => {
|
||||
return db.chain
|
||||
.get('jobs')
|
||||
.map((job) => ({
|
||||
...job,
|
||||
numberOfFoundListings: listingStorage.getNumberOfAllKnownListings(job.id),
|
||||
}))
|
||||
.value();
|
||||
const rows = SqliteConnection.query(
|
||||
`SELECT j.id,
|
||||
j.user_id AS userId,
|
||||
j.enabled,
|
||||
j.name,
|
||||
j.blacklist,
|
||||
j.provider,
|
||||
j.notification_adapter AS notificationAdapter,
|
||||
(SELECT COUNT(1) FROM listings l WHERE l.job_id = j.id) AS numberOfFoundListings
|
||||
FROM jobs j
|
||||
ORDER BY j.name IS NULL, j.name`,
|
||||
);
|
||||
return rows.map((row) => ({
|
||||
...row,
|
||||
enabled: !!row.enabled,
|
||||
blacklist: fromJson(row.blacklist, []),
|
||||
provider: fromJson(row.provider, []),
|
||||
notificationAdapter: fromJson(row.notificationAdapter, []),
|
||||
}));
|
||||
};
|
||||
|
||||
@@ -1,52 +1,168 @@
|
||||
import { JSONFileSync } from 'lowdb/node';
|
||||
import { getDirName } from '../../utils.js';
|
||||
import path from 'path';
|
||||
import LowdashAdapter from './LowDashAdapter.js';
|
||||
import { nullOrEmpty } from '../../utils.js';
|
||||
import SqliteConnection from './SqliteConnection.js';
|
||||
import { nanoid } from 'nanoid';
|
||||
|
||||
const file = path.join(getDirName(), '../', 'db/jobListingData.json');
|
||||
const adapter = new JSONFileSync(file);
|
||||
const db = new LowdashAdapter(adapter, {});
|
||||
|
||||
db.read();
|
||||
|
||||
const buildKey = (jobKey, providerId, endpoint) => {
|
||||
let key = `${jobKey}`;
|
||||
if (jobKey == null && endpoint == null) {
|
||||
return key;
|
||||
}
|
||||
if (providerId != null) {
|
||||
key += `.${providerId}`;
|
||||
}
|
||||
if (endpoint != null) {
|
||||
key += `.${endpoint}`;
|
||||
}
|
||||
return key;
|
||||
};
|
||||
export const getNumberOfAllKnownListings = (jobId) => {
|
||||
const data = db.chain.get(`${jobId}.providerData`).value() || {};
|
||||
return Object.values(data)
|
||||
.map((values) => Object.keys(values).length)
|
||||
.reduce((accumulator, currentValue) => accumulator + currentValue, 0);
|
||||
};
|
||||
/**
|
||||
* Build analytics data for a given job by grouping all listings by provider and
|
||||
* mapping each listing hash to its creation timestamp.
|
||||
*
|
||||
* SQL shape:
|
||||
* SELECT json_group_object(provider, json_object(hash, created_at)) AS result
|
||||
* FROM listings WHERE job_id = @jobId;
|
||||
*
|
||||
* The resulting object has the shape:
|
||||
* {
|
||||
* providerA: { "<hash1>": <created_at_ms>, "<hash2>": <created_at_ms>, ... },
|
||||
* providerB: { ... }
|
||||
* }
|
||||
*
|
||||
* @param {string} jobId - ID of the job whose listings should be aggregated.
|
||||
* @returns {Record<string, Record<string, number>>} Object grouped by provider mapping listing-hash -> created_at epoch ms.
|
||||
*/
|
||||
export const getListingProviderDataForAnalytics = (jobId) => {
|
||||
const key = buildKey(jobId, 'providerData');
|
||||
return db.chain.get(key).value() || {};
|
||||
const row = SqliteConnection.query(
|
||||
`SELECT COALESCE(
|
||||
json_group_object(provider, json(provider_map)),
|
||||
json('{}')
|
||||
) AS result
|
||||
FROM (SELECT provider,
|
||||
json_group_object(hash, created_at) AS provider_map
|
||||
FROM listings
|
||||
WHERE job_id = @jobId
|
||||
GROUP BY provider);`,
|
||||
{ jobId },
|
||||
);
|
||||
|
||||
return row?.length > 0 ? JSON.parse(row[0].result) : {};
|
||||
};
|
||||
export const getKnownListings = (jobId, providerId) => {
|
||||
const providerListingsKey = buildKey(jobId, 'providerData', providerId, 'listings');
|
||||
return db.chain.get(providerListingsKey).value() || {};
|
||||
|
||||
/**
|
||||
* Return a list of known listing hashes for a given job and provider.
|
||||
* Useful to de-duplicate before inserting new listings.
|
||||
*
|
||||
* @param {string} jobId - The job identifier.
|
||||
* @param {string} providerId - The provider identifier (e.g., 'immoscout').
|
||||
* @returns {string[]} Array of listing hashes.
|
||||
*/
|
||||
export const getKnownListingHashesForJobAndProvider = (jobId, providerId) => {
|
||||
return SqliteConnection.query(
|
||||
`SELECT hash
|
||||
FROM listings
|
||||
WHERE job_id = @jobId AND provider = @providerId`,
|
||||
{ jobId, providerId },
|
||||
).map((r) => r.hash);
|
||||
};
|
||||
export const setKnownListings = (jobId, providerId, listings) => {
|
||||
const providerListingsKey = buildKey(jobId, 'providerData', providerId, 'listings');
|
||||
db.chain.set(providerListingsKey, listings).value();
|
||||
return db.write();
|
||||
|
||||
/**
|
||||
* Return a list of listing that either are active or have an unknown status
|
||||
* to constantly check if they are still online
|
||||
*
|
||||
* @returns {string[]} Array of listings
|
||||
*/
|
||||
export const getActiveOrUnknownListings = () => {
|
||||
return SqliteConnection.query(
|
||||
`SELECT *
|
||||
FROM listings
|
||||
WHERE is_active is null OR is_active = 1 ORDER BY provider`,
|
||||
);
|
||||
};
|
||||
export const setLastJobExecution = (jobId) => {
|
||||
const key = buildKey(jobId, null, 'lastExecution');
|
||||
db.chain.set(key, Date.now()).value();
|
||||
return db.write();
|
||||
|
||||
/**
|
||||
* Deactivates listings by setting is_active = 0 for all matching IDs.
|
||||
*
|
||||
* @param {string[]} ids - Array of listing IDs to deactivate.
|
||||
* @returns {object[]} Result of the SQLite query execution.
|
||||
*/
|
||||
export const deactivateListings = (ids) => {
|
||||
const placeholders = ids.map(() => '?').join(',');
|
||||
return SqliteConnection.execute(
|
||||
`UPDATE listings
|
||||
SET is_active = 0
|
||||
WHERE id IN (${placeholders})`,
|
||||
ids,
|
||||
);
|
||||
};
|
||||
export const removeListings = (jobId) => {
|
||||
db.chain.unset(jobId).value();
|
||||
db.write();
|
||||
|
||||
/**
|
||||
* Persist a batch of scraped listings for a given job and provider.
|
||||
*
|
||||
* - Empty or non-array inputs are ignored.
|
||||
* - Each listing is inserted with ON CONFLICT(hash) DO NOTHING to avoid duplicates.
|
||||
* - Performs inserts in a single transaction for performance.
|
||||
*
|
||||
* Listing input shape (minimal expected):
|
||||
* {
|
||||
* id: string, // unique id
|
||||
* hash: string // stable hash/id of the listing (used as unique hash)
|
||||
* price?: string, // e.g., "1.234 €" or "1,234€"
|
||||
* size?: string, // e.g., "70 m²"
|
||||
* title?: string,
|
||||
* image?: string, // image URL
|
||||
* description?: string,
|
||||
* address?: string, // free-text address possibly containing parentheses
|
||||
* link?: string
|
||||
* }
|
||||
*
|
||||
* @param {string} jobId - The job identifier.
|
||||
* @param {string} providerId - The provider identifier.
|
||||
* @param {Array<Object>} listings - Array of listing objects as described above.
|
||||
* @returns {void}
|
||||
*/
|
||||
export const storeListings = (jobId, providerId, listings) => {
|
||||
if (!Array.isArray(listings) || listings.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
SqliteConnection.withTransaction((db) => {
|
||||
const stmt = db.prepare(
|
||||
`INSERT INTO listings (id, hash, provider, job_id, price, size, title, image_url, description, address,
|
||||
link, created_at, is_active)
|
||||
VALUES (@id, @hash, @provider, @job_id, @price, @size, @title, @image_url, @description, @address, @link,
|
||||
@created_at, 1)
|
||||
ON CONFLICT(job_id, hash) DO NOTHING`,
|
||||
);
|
||||
|
||||
for (const item of listings) {
|
||||
const params = {
|
||||
id: nanoid(),
|
||||
hash: item.id,
|
||||
provider: providerId,
|
||||
job_id: jobId,
|
||||
price: extractNumber(item.price),
|
||||
size: extractNumber(item.size),
|
||||
title: item.title,
|
||||
image_url: item.image,
|
||||
description: item.description,
|
||||
address: removeParentheses(item.address),
|
||||
link: item.link,
|
||||
created_at: Date.now(),
|
||||
};
|
||||
stmt.run(params);
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Extract the first number from a string like "1.234 €" or "70 m²".
|
||||
* Removes dots/commas before parsing. Returns null on invalid input.
|
||||
* @param {string|undefined|null} str
|
||||
* @returns {number|null}
|
||||
*/
|
||||
function extractNumber(str) {
|
||||
if (!str) return null;
|
||||
const match = str.replace(/[.,]/g, '').match(/\d+/);
|
||||
return match ? +match[0] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove any parentheses segments (including surrounding whitespace) from a string.
|
||||
* Returns null for empty input.
|
||||
* @param {string|undefined|null} str
|
||||
* @returns {string|null}
|
||||
*/
|
||||
function removeParentheses(str) {
|
||||
if (nullOrEmpty(str)) {
|
||||
return null;
|
||||
}
|
||||
return str.replace(/\s*\([^)]*\)/g, '');
|
||||
}
|
||||
};
|
||||
|
||||
185
lib/services/storage/migrations/migrate.js
Normal file
185
lib/services/storage/migrations/migrate.js
Normal file
@@ -0,0 +1,185 @@
|
||||
/**
|
||||
* Migration Runner for better-sqlite3
|
||||
* I know there are external libs out there, but
|
||||
* a) most of them are pretty bloated
|
||||
* b) I wanted to have something that fit's this limited use-case
|
||||
* c) I was searching for justifications anyway to build a migration system on my own. Don't judge me ;)
|
||||
*
|
||||
* Executes all migration files in lib/services/storage/migrations/sql in natural order.
|
||||
* Each migration runs in its own transaction. If a migration fails, only that
|
||||
* migration is rolled back and the process stops with a non-zero exit code.
|
||||
* Already applied migrations are skipped using the schema_migrations table.
|
||||
*
|
||||
* Usage:
|
||||
* CLI: yarn run migratedb
|
||||
* Programmatic:
|
||||
* import { runMigrations } from './lib/services/storage/migrations/migrate.js';
|
||||
* await runMigrations();
|
||||
*
|
||||
* Migration file format (example: lib/services/storage/migrations/sql/1.add-users.js):
|
||||
* export function up(db) {
|
||||
* db.exec("CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT NOT NULL)");
|
||||
* }
|
||||
*
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { pathToFileURL } from 'url';
|
||||
import crypto from 'crypto';
|
||||
import SqliteConnection from '../SqliteConnection.js';
|
||||
import logger from '../../logger.js';
|
||||
|
||||
const ROOT = path.resolve('.');
|
||||
const MIGRATIONS_DIR = path.join(ROOT, 'lib', 'services', 'storage', 'migrations', 'sql');
|
||||
|
||||
/**
|
||||
* Ensures that the given directory exists, creating it recursively if needed.
|
||||
* @param {string} p - Path to the directory.
|
||||
*/
|
||||
function ensureDir(p) {
|
||||
if (!fs.existsSync(p)) fs.mkdirSync(p, { recursive: true });
|
||||
}
|
||||
|
||||
/**
|
||||
* Lists all migration files in the migrations directory.
|
||||
* Migration files must follow the format: <number>.<label>.js
|
||||
* @returns {Array<{id:number, name:string, label:string, path:string}>}
|
||||
*/
|
||||
function listMigrationFiles() {
|
||||
ensureDir(MIGRATIONS_DIR);
|
||||
return fs
|
||||
.readdirSync(MIGRATIONS_DIR)
|
||||
.filter((f) => /^\d+\..+\.js$/.test(f))
|
||||
.map((file) => {
|
||||
const [idStr, ...rest] = file.split('.');
|
||||
const id = Number.parseInt(idStr, 10);
|
||||
const label = rest.slice(0, -1).join('.');
|
||||
const fullPath = path.join(MIGRATIONS_DIR, file);
|
||||
return { id, name: file, label, path: fullPath };
|
||||
})
|
||||
.sort((a, b) => (a.id === b.id ? a.name.localeCompare(b.name) : a.id - b.id));
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the SHA-256 checksum of a file.
|
||||
* @param {string} filePath - Path to the file.
|
||||
* @returns {string} Hex-encoded checksum.
|
||||
*/
|
||||
function sha256File(filePath) {
|
||||
const buf = fs.readFileSync(filePath);
|
||||
return crypto.createHash('sha256').update(buf).digest('hex');
|
||||
}
|
||||
|
||||
/**
|
||||
* Dynamically imports a migration module and returns its `up` function.
|
||||
* @param {string} filePath - Path to the migration file.
|
||||
* @returns {Promise<Function>} Migration function.
|
||||
* @throws {Error} If the migration file does not export a valid function.
|
||||
*/
|
||||
async function loadMigrationModule(filePath) {
|
||||
const testImporter = globalThis.__TEST_MIGRATE_IMPORT__;
|
||||
const url = pathToFileURL(filePath);
|
||||
const mod = testImporter ? await testImporter(filePath, url) : await import(url.href);
|
||||
const fn = mod.up || mod.default;
|
||||
if (typeof fn !== 'function') {
|
||||
throw new Error(`Migration ${filePath} must export function up(db) or default function(db)`);
|
||||
}
|
||||
return fn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads all previously executed migrations from the database.
|
||||
* @returns {Map<string,string>} Map of migration name to checksum.
|
||||
*/
|
||||
function loadExecutedMigrations() {
|
||||
const executed = new Map();
|
||||
const hasTable = SqliteConnection.tableExists('schema_migrations');
|
||||
if (!hasTable) return executed;
|
||||
const rows = SqliteConnection.query('SELECT name, checksum FROM schema_migrations ORDER BY applied_at ASC');
|
||||
for (const r of rows) executed.set(r.name, r.checksum);
|
||||
return executed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes all pending migrations.
|
||||
* Ensures that each migration runs inside its own transaction.
|
||||
* Already applied migrations are skipped, unless checksum updates are allowed.
|
||||
* On success, updates the schema_migrations table and runs PRAGMA optimize.
|
||||
*/
|
||||
export async function runMigrations() {
|
||||
ensureDir(path.join(ROOT, 'db'));
|
||||
ensureDir(MIGRATIONS_DIR);
|
||||
|
||||
const files = listMigrationFiles();
|
||||
if (files.length === 0) {
|
||||
logger.info('No migration files found under', MIGRATIONS_DIR);
|
||||
return;
|
||||
}
|
||||
|
||||
SqliteConnection.getConnection();
|
||||
|
||||
const executed = loadExecutedMigrations();
|
||||
|
||||
let appliedMigrations = 0;
|
||||
for (const m of files) {
|
||||
const checksum = sha256File(m.path);
|
||||
|
||||
if (executed.has(m.name)) {
|
||||
const prev = executed.get(m.name);
|
||||
if (prev !== checksum) {
|
||||
logger.info(`Mismatch found in migration ${m.name}. Fixing.`);
|
||||
SqliteConnection.execute('UPDATE schema_migrations SET checksum = @checksum WHERE name = @name', {
|
||||
checksum,
|
||||
name: m.name,
|
||||
});
|
||||
executed.set(m.name, checksum);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
appliedMigrations++;
|
||||
logger.info(`Applying migration: ${m.name}`);
|
||||
const fn = await loadMigrationModule(m.path);
|
||||
|
||||
try {
|
||||
let duration = 0;
|
||||
SqliteConnection.withTransaction((db) => {
|
||||
const t0 = Date.now();
|
||||
fn(db);
|
||||
duration = Date.now() - t0;
|
||||
db.prepare(
|
||||
"INSERT INTO schema_migrations (name, checksum, applied_at, duration_ms) VALUES (?, ?, datetime('now'), ?)",
|
||||
).run(m.name, checksum, duration);
|
||||
});
|
||||
logger.info(`Migration applied: ${m.name} (${duration} ms)`);
|
||||
} catch (e) {
|
||||
logger.error(`Migration failed and was rolled back: ${m.name}`, e);
|
||||
process.exitCode = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
SqliteConnection.optimize();
|
||||
if (appliedMigrations > 0) {
|
||||
logger.info('All migrations completed successfully.');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects whether the current file is being executed directly via Node.js.
|
||||
* This allows `node lib/services/storage/migrations/migrate.js` to run migrations directly.
|
||||
* @returns {boolean} True if the file was run directly.
|
||||
*/
|
||||
const isDirectRun = (() => {
|
||||
try {
|
||||
const thisFile = import.meta.url;
|
||||
const invoked = pathToFileURL(process.argv[1] || '').href;
|
||||
return thisFile === invoked;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
})();
|
||||
|
||||
if (isDirectRun) {
|
||||
await runMigrations();
|
||||
}
|
||||
16
lib/services/storage/migrations/sql/0.init.js
Normal file
16
lib/services/storage/migrations/sql/0.init.js
Normal file
@@ -0,0 +1,16 @@
|
||||
// Initial migration: creates schema_migrations table used by the migration runner.
|
||||
//
|
||||
export function up(db) {
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL UNIQUE,
|
||||
checksum TEXT NOT NULL,
|
||||
applied_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
duration_ms INTEGER NOT NULL DEFAULT 0
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_schema_migrations_applied_at
|
||||
ON schema_migrations(applied_at);
|
||||
`);
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
// Migration: Create fredy's base structure (users, jobs and listings) import initial
|
||||
// data from JSON files if present. (This applies only for jobs and users, for the old jobListingData,
|
||||
// I cannot migrate the data as the new format is totally different.
|
||||
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { toJson } from '../../../../utils.js';
|
||||
|
||||
export function up(db) {
|
||||
// 1) Create tables
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS users
|
||||
(
|
||||
id TEXT PRIMARY KEY,
|
||||
username TEXT NOT NULL,
|
||||
password TEXT NOT NULL,
|
||||
last_login INTEGER,
|
||||
is_admin INTEGER NOT NULL DEFAULT 0
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_users_username ON users (username);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS jobs
|
||||
(
|
||||
id TEXT PRIMARY KEY,
|
||||
user_id TEXT NOT NULL,
|
||||
enabled INTEGER NOT NULL DEFAULT 1,
|
||||
name TEXT,
|
||||
blacklist JSONB NOT NULL DEFAULT '[]',
|
||||
provider JSONB NOT NULL DEFAULT '[]',
|
||||
notification_adapter JSONB NOT NULL DEFAULT '[]',
|
||||
FOREIGN KEY (user_id) REFERENCES users (id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_jobs_user_id ON jobs (user_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_jobs_enabled ON jobs (enabled);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS listings
|
||||
(
|
||||
id TEXT PRIMARY KEY,
|
||||
created_at INTEGER,
|
||||
hash TEXT,
|
||||
provider TEXT,
|
||||
job_id TEXT,
|
||||
price INTEGER,
|
||||
size INTEGER,
|
||||
title TEXT,
|
||||
image_url TEXT,
|
||||
description TEXT,
|
||||
address TEXT,
|
||||
link TEXT,
|
||||
FOREIGN KEY (job_id) REFERENCES jobs (id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_listings_hash ON listings (hash);
|
||||
`);
|
||||
|
||||
// 2) Optionally import data from JSON files if present for users and jobs
|
||||
const ROOT = path.resolve('.');
|
||||
const usersJsonPath = path.join(ROOT, 'db', 'users.json');
|
||||
const jobsJsonPath = path.join(ROOT, 'db', 'jobs.json');
|
||||
|
||||
// Insert users
|
||||
if (fs.existsSync(usersJsonPath)) {
|
||||
try {
|
||||
const raw = fs.readFileSync(usersJsonPath, 'utf8');
|
||||
const json = JSON.parse(raw);
|
||||
const arr = Array.isArray(json?.user) ? json.user : [];
|
||||
if (arr.length > 0) {
|
||||
const stmt = db.prepare(
|
||||
`INSERT INTO users (id, username, password, last_login, is_admin)
|
||||
VALUES (@id, @username, @password, @last_login, @is_admin)`,
|
||||
);
|
||||
for (const u of arr) {
|
||||
stmt.run({
|
||||
id: u.id,
|
||||
username: u.username,
|
||||
password: u.password,
|
||||
last_login: u.lastLogin ?? null,
|
||||
is_admin: u.isAdmin ? 1 : 0,
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
// If parsing fails, let it throw to rollback the migration
|
||||
throw new Error(`Failed to import users from ${usersJsonPath}: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Insert jobs
|
||||
if (fs.existsSync(jobsJsonPath)) {
|
||||
try {
|
||||
const raw = fs.readFileSync(jobsJsonPath, 'utf8');
|
||||
const json = JSON.parse(raw);
|
||||
const arr = Array.isArray(json?.jobs) ? json.jobs : [];
|
||||
if (arr.length > 0) {
|
||||
const stmt = db.prepare(
|
||||
`INSERT INTO jobs (id, user_id, enabled, name, blacklist, provider, notification_adapter)
|
||||
VALUES (@id, @user_id, @enabled, @name, @blacklist, @provider, @notification_adapter)`,
|
||||
);
|
||||
for (const j of arr) {
|
||||
stmt.run({
|
||||
id: j.id,
|
||||
user_id: j.userId,
|
||||
enabled: j.enabled ? 1 : 0,
|
||||
name: j.name ?? null,
|
||||
blacklist: toJson(j.blacklist ?? []),
|
||||
provider: toJson(j.provider ?? []),
|
||||
notification_adapter: toJson(j.notificationAdapter ?? []),
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
throw new Error(`Failed to import jobs from ${jobsJsonPath}: ${e.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
// Migration: there needs to be a unique index on job_id and hash as only
|
||||
// this makes the listing indeed unique
|
||||
|
||||
export function up(db) {
|
||||
db.exec(`
|
||||
ALTER TABLE listings ADD COLUMN is_active INTEGER DEFAULT 1;
|
||||
`);
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
// Migration: there needs to be a unique index on job_id and hash as only
|
||||
// this makes the listing indeed unique
|
||||
|
||||
export function up(db) {
|
||||
db.exec(`
|
||||
DROP INDEX IF EXISTS idx_listings_hash;
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_listings_job_hash
|
||||
ON listings (job_id, hash);
|
||||
`);
|
||||
}
|
||||
@@ -1,123 +1,176 @@
|
||||
import { JSONFileSync } from 'lowdb/node';
|
||||
import { config, getDirName } from '../../utils.js';
|
||||
import { config } from '../../utils.js';
|
||||
import * as hasher from '../security/hash.js';
|
||||
import { nanoid } from 'nanoid';
|
||||
import * as jobStorage from './jobStorage.js';
|
||||
import path from 'path';
|
||||
import LowdashAdapter from './LowDashAdapter.js';
|
||||
|
||||
const defaultData = {
|
||||
user: [
|
||||
//you probably want to change the default password ;)
|
||||
{
|
||||
id: nanoid(),
|
||||
lastLogin: Date.now(),
|
||||
username: 'admin',
|
||||
password: hasher.hash('admin'),
|
||||
isAdmin: true,
|
||||
},
|
||||
{
|
||||
id: nanoid(),
|
||||
lastLogin: Date.now(),
|
||||
username: 'demo',
|
||||
password: hasher.hash('demo'),
|
||||
isAdmin: true,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const file = path.join(getDirName(), '../', 'db/users.json');
|
||||
const adapter = new JSONFileSync(file);
|
||||
const db = new LowdashAdapter(adapter, defaultData);
|
||||
|
||||
db.read();
|
||||
import SqliteConnection from './SqliteConnection.js';
|
||||
|
||||
/**
|
||||
* Get all users.
|
||||
*
|
||||
* Notes:
|
||||
* - Password hashes are omitted by default to avoid leaking them to callers that don’t need them.
|
||||
* - numberOfJobs is computed via a subquery for each user.
|
||||
*
|
||||
* @param {boolean} withPassword - If true, include the hashed password in the returned objects; otherwise set password to null.
|
||||
* @returns {User[]} Array of users ordered by username.
|
||||
*/
|
||||
export const getUsers = (withPassword) => {
|
||||
const jobs = jobStorage.getJobs();
|
||||
return db.chain
|
||||
.get('user')
|
||||
.value()
|
||||
.map((user) => ({
|
||||
//we dont want the password in the frontend, even tho it's hashed
|
||||
...user,
|
||||
password: withPassword ? user.password : null,
|
||||
numberOfJobs: jobs.filter((job) => job.userId === user.id).length,
|
||||
}));
|
||||
};
|
||||
export const getUser = (id) => {
|
||||
const jobs = jobStorage.getJobs();
|
||||
const user = db.chain
|
||||
.get('user')
|
||||
.find((user) => user.id === id)
|
||||
.value();
|
||||
if (user == null) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
...user,
|
||||
numberOfJobs: jobs.filter((job) => job.userId === user.id).length,
|
||||
};
|
||||
};
|
||||
export const upsertUser = ({ username, password, userId, isAdmin }) => {
|
||||
const user = db.chain
|
||||
.get('user')
|
||||
.filter((u) => u.id !== userId)
|
||||
.value();
|
||||
user.push({
|
||||
id: userId || nanoid(),
|
||||
username,
|
||||
lastLogin: user.lastLogin,
|
||||
password: hasher.hash(password),
|
||||
isAdmin,
|
||||
});
|
||||
db.chain.set('user', user).value();
|
||||
db.write();
|
||||
};
|
||||
export const setLastLoginToNow = ({ userId }) => {
|
||||
db.chain
|
||||
.get('user')
|
||||
.find((u) => u.id === userId)
|
||||
.assign({ lastLogin: Date.now() })
|
||||
.value();
|
||||
db.write();
|
||||
};
|
||||
export const removeUser = (userId) => {
|
||||
const user = db.chain.get('user').value();
|
||||
db.chain
|
||||
.set(
|
||||
'user',
|
||||
user.filter((u) => u.id !== userId),
|
||||
)
|
||||
.value();
|
||||
db.write();
|
||||
const rows = SqliteConnection.query(
|
||||
`SELECT u.id, u.username, u.password, u.last_login AS lastLogin, u.is_admin AS isAdmin,
|
||||
(SELECT COUNT(1) FROM jobs j WHERE j.user_id = u.id) AS numberOfJobs
|
||||
FROM users u
|
||||
ORDER BY u.username`,
|
||||
);
|
||||
return rows.map((u) => ({
|
||||
...u,
|
||||
password: withPassword ? u.password : null,
|
||||
isAdmin: !!u.isAdmin,
|
||||
}));
|
||||
};
|
||||
|
||||
export const handleDemoUser = () => {
|
||||
if (!config.demoMode) {
|
||||
const user = db.chain.get('user').value();
|
||||
db.chain
|
||||
.set(
|
||||
'user',
|
||||
user.filter((u) => u.username !== 'demo'),
|
||||
)
|
||||
.value();
|
||||
db.write();
|
||||
/**
|
||||
* Get a single user by id.
|
||||
*
|
||||
* @param {string} id - User id (primary key).
|
||||
* @returns {User|null} The user when found; otherwise null. The password field is included but callers should not expose it.
|
||||
*/
|
||||
export const getUser = (id) => {
|
||||
const rows = SqliteConnection.query(
|
||||
`SELECT u.id, u.username, u.password, u.last_login AS lastLogin, u.is_admin AS isAdmin,
|
||||
(SELECT COUNT(1) FROM jobs j WHERE j.user_id = u.id) AS numberOfJobs
|
||||
FROM users u
|
||||
WHERE u.id = @id
|
||||
LIMIT 1`,
|
||||
{ id },
|
||||
);
|
||||
const u = rows[0];
|
||||
if (!u) return null;
|
||||
return { ...u, isAdmin: !!u.isAdmin };
|
||||
};
|
||||
|
||||
/**
|
||||
* Insert a new user or update an existing one.
|
||||
*
|
||||
* Behavior:
|
||||
* - When userId is provided and exists: updates username and isAdmin. Password is only updated when a non-empty password is provided.
|
||||
* - When userId is missing or does not exist: inserts a new user with a freshly generated id. last_login is initialized to null.
|
||||
* - Passwords are hashed using the same hashing function used for login comparison.
|
||||
*
|
||||
* @param {Object} params
|
||||
* @param {string} params.username - Username (must be unique in DB).
|
||||
* @param {string} [params.password] - Plain text password to set; if omitted on update, existing hash is preserved.
|
||||
* @param {string} [params.userId] - Existing user id to update; if missing, a new id is generated.
|
||||
* @param {boolean} params.isAdmin - Whether the user should have admin privileges.
|
||||
* @returns {void}
|
||||
*/
|
||||
export const upsertUser = ({ username, password, userId, isAdmin }) => {
|
||||
const id = userId || nanoid();
|
||||
// Check if user exists
|
||||
const exists = SqliteConnection.query(`SELECT 1 FROM users WHERE id = @id LIMIT 1`, { id }).length > 0;
|
||||
if (exists) {
|
||||
// Update existing user. Update password only if provided (non-empty string)
|
||||
if (password && password.length > 0) {
|
||||
SqliteConnection.execute(
|
||||
`UPDATE users SET username = @username, password = @password, is_admin = @is_admin WHERE id = @id`,
|
||||
{ id, username, password: hasher.hash(password), is_admin: isAdmin ? 1 : 0 },
|
||||
);
|
||||
} else {
|
||||
SqliteConnection.execute(`UPDATE users SET username = @username, is_admin = @is_admin WHERE id = @id`, {
|
||||
id,
|
||||
username,
|
||||
is_admin: isAdmin ? 1 : 0,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
const demoUser = db.chain
|
||||
.get('user')
|
||||
.filter((u) => u.username === 'demo')
|
||||
.value();
|
||||
if (demoUser == null || demoUser.length === 0) {
|
||||
db.chain
|
||||
.get('user')
|
||||
.value()
|
||||
.push({
|
||||
id: nanoid(),
|
||||
username: 'demo',
|
||||
password: hasher.hash('demo'),
|
||||
isAdmin: true,
|
||||
});
|
||||
db.write();
|
||||
SqliteConnection.execute(
|
||||
`INSERT INTO users (id, username, password, last_login, is_admin)
|
||||
VALUES (@id, @username, @password, @last_login, @is_admin)`,
|
||||
{
|
||||
id,
|
||||
username,
|
||||
password: hasher.hash(password || ''),
|
||||
last_login: null,
|
||||
is_admin: isAdmin ? 1 : 0,
|
||||
},
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Update the last_login timestamp to now for the given user.
|
||||
*
|
||||
* @param {{userId: string}} params - Parameters.
|
||||
* @param {string} params.userId - The user's id.
|
||||
* @returns {void}
|
||||
*/
|
||||
export const setLastLoginToNow = ({ userId }) => {
|
||||
SqliteConnection.execute(`UPDATE users SET last_login = @now WHERE id = @id`, { id: userId, now: Date.now() });
|
||||
};
|
||||
|
||||
/**
|
||||
* Remove a user by id.
|
||||
*
|
||||
* Notes:
|
||||
* - In the SQLite schema, jobs reference users with ON DELETE CASCADE, so jobs (and their listings via jobs) are removed automatically.
|
||||
*
|
||||
* @param {string} userId - The id of the user to remove.
|
||||
* @returns {void}
|
||||
*/
|
||||
export const removeUser = (userId) => {
|
||||
SqliteConnection.execute(`DELETE FROM users WHERE id = @id`, { id: userId });
|
||||
};
|
||||
|
||||
/**
|
||||
* Ensure the demo user matches the demo mode setting.
|
||||
*
|
||||
* Behavior:
|
||||
* - When config.demoMode is false: remove the demo user (and its cascading data via FKs).
|
||||
* - When config.demoMode is true: ensure a 'demo' user exists with password 'demo' and admin rights.
|
||||
*
|
||||
* Security: The demo user's password is set to a known value ('demo') and should only be enabled in demoMode.
|
||||
* @returns {void}
|
||||
*/
|
||||
export const ensureDemoUserExists = () => {
|
||||
if (!config.demoMode) {
|
||||
// Remove demo user (and cascade delete their jobs/listings)
|
||||
SqliteConnection.execute(`DELETE FROM users WHERE username = 'demo'`);
|
||||
return;
|
||||
}
|
||||
// Ensure demo user exists when demo mode is on
|
||||
const existing = SqliteConnection.query(`SELECT id FROM users WHERE username = 'demo' LIMIT 1`);
|
||||
if (existing.length === 0) {
|
||||
SqliteConnection.execute(
|
||||
`INSERT INTO users (id, username, password, last_login, is_admin)
|
||||
VALUES (@id, 'demo', @password, NULL, 1)`,
|
||||
{ id: nanoid(), password: hasher.hash('demo') },
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Ensure there is at least one administrator in the system.
|
||||
*
|
||||
* Behavior:
|
||||
* - If there are no users at all, create default 'admin' user with password 'admin'.
|
||||
* - If users exist but none is admin, promote the first existing user to admin.
|
||||
*
|
||||
* Security: On a fresh instance, a default admin/admin is created; change this password immediately.
|
||||
* @returns {void}
|
||||
*/
|
||||
export const ensureAdminUserExists = () => {
|
||||
const anyUser = SqliteConnection.query(`SELECT id FROM users LIMIT 1`).length > 0;
|
||||
if (!anyUser) {
|
||||
SqliteConnection.execute(
|
||||
`INSERT INTO users (id, username, password, last_login, is_admin)
|
||||
VALUES (@id, 'admin', @password, @last_login, 1)`,
|
||||
{ id: nanoid(), password: hasher.hash('admin'), last_login: Date.now() },
|
||||
);
|
||||
return;
|
||||
}
|
||||
const adminCount = SqliteConnection.query(`SELECT COUNT(1) AS c FROM users WHERE is_admin = 1`)[0]?.c ?? 0;
|
||||
if (adminCount === 0) {
|
||||
const firstUser = SqliteConnection.query(`SELECT id FROM users LIMIT 1`)[0];
|
||||
if (firstUser) {
|
||||
SqliteConnection.execute(`UPDATE users SET is_admin = 1 WHERE id = @id`, { id: firstUser.id });
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,65 +1,64 @@
|
||||
import Mixpanel from 'mixpanel';
|
||||
import { getJobs } from '../storage/jobStorage.js';
|
||||
import { getUniqueId } from './uniqueId.js';
|
||||
import { config, inDevMode } from '../../utils.js';
|
||||
import { config, getPackageVersion, inDevMode } from '../../utils.js';
|
||||
import os from 'os';
|
||||
import { readFileSync } from 'fs';
|
||||
import { packageUp } from 'package-up';
|
||||
import fetch from 'node-fetch';
|
||||
import logger from '../logger.js';
|
||||
|
||||
const mixpanelTracker = Mixpanel.init('718670ef1c58c0208256c1e408a3d75e');
|
||||
const distinct_id = getUniqueId() || 'N/A';
|
||||
const deviceId = getUniqueId() || 'N/A';
|
||||
const version = await getPackageVersion();
|
||||
const FREDY_TRACKING_URL = 'https://fredy.orange-coding.net/tracking';
|
||||
|
||||
export const track = function () {
|
||||
//only send tracking information if the user allowed to do so.
|
||||
if (config.analyticsEnabled && !inDevMode()) {
|
||||
const activeProvider = new Set();
|
||||
const activeAdapter = new Set();
|
||||
export const trackMainEvent = async () => {
|
||||
try {
|
||||
if (config.analyticsEnabled && !inDevMode()) {
|
||||
const activeProvider = new Set();
|
||||
const activeAdapter = new Set();
|
||||
|
||||
const jobs = getJobs();
|
||||
const jobs = getJobs();
|
||||
|
||||
if (jobs != null && jobs.length > 0) {
|
||||
jobs.forEach((job) => {
|
||||
job.provider.forEach((provider) => {
|
||||
activeProvider.add(provider.id);
|
||||
if (jobs != null && jobs.length > 0) {
|
||||
jobs.forEach((job) => {
|
||||
job.provider.forEach((provider) => activeProvider.add(provider.id));
|
||||
job.notificationAdapter.forEach((adapter) => activeAdapter.add(adapter.id));
|
||||
});
|
||||
job.notificationAdapter.forEach((adapter) => {
|
||||
activeAdapter.add(adapter.id);
|
||||
});
|
||||
});
|
||||
|
||||
mixpanelTracker.track(
|
||||
'fredy_tracking',
|
||||
enrichTrackingObject({
|
||||
const trackingObj = enrichTrackingObject({
|
||||
adapter: Array.from(activeAdapter),
|
||||
provider: Array.from(activeProvider),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
await fetch(`${FREDY_TRACKING_URL}/main`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(trackingObj),
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn('Error sending tracking data', error);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Note, this will only be used when Fredy runs in demo mode
|
||||
*/
|
||||
export function trackDemoJobCreated(jobData) {
|
||||
export async function trackDemoAccessed() {
|
||||
if (config.analyticsEnabled && !inDevMode() && config.demoMode) {
|
||||
mixpanelTracker.track('demoJobCreated', enrichTrackingObject(jobData));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Note, this will only be used when Fredy runs in demo mode
|
||||
*/
|
||||
export function trackDemoAccessed() {
|
||||
if (config.analyticsEnabled && !inDevMode() && config.demoMode) {
|
||||
mixpanelTracker.track('demoAccessed', enrichTrackingObject({}));
|
||||
try {
|
||||
await fetch(`${FREDY_TRACKING_URL}/demo/accessed`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
} catch (error) {
|
||||
logger.warn('Error sending tracking data', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function enrichTrackingObject(trackingObject) {
|
||||
const operating_system = os.platform();
|
||||
const os_version = os.release();
|
||||
const operatingSystem = os.platform();
|
||||
const osVersion = os.release();
|
||||
const arch = process.arch;
|
||||
const language = process.env.LANG || 'en';
|
||||
const nodeVersion = process.version || 'N/A';
|
||||
@@ -67,24 +66,12 @@ function enrichTrackingObject(trackingObject) {
|
||||
return {
|
||||
...trackingObject,
|
||||
isDemo: config.demoMode,
|
||||
operating_system,
|
||||
os_version,
|
||||
operatingSystem,
|
||||
osVersion,
|
||||
arch,
|
||||
nodeVersion,
|
||||
language,
|
||||
distinct_id,
|
||||
fredy_version: version,
|
||||
deviceId,
|
||||
version,
|
||||
};
|
||||
}
|
||||
|
||||
async function getPackageVersion() {
|
||||
try {
|
||||
const packagePath = await packageUp();
|
||||
const packageJson = readFileSync(packagePath, 'utf8');
|
||||
const json = JSON.parse(packageJson);
|
||||
return json.version;
|
||||
} catch (error) {
|
||||
console.error('Error reading version from package.json', error);
|
||||
}
|
||||
return 'N/A';
|
||||
}
|
||||
|
||||
199
lib/utils.js
199
lib/utils.js
@@ -1,23 +1,104 @@
|
||||
import { dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath, pathToFileURL } from 'node:url';
|
||||
import { readFile } from 'fs/promises';
|
||||
import { createHash } from 'crypto';
|
||||
import { DEFAULT_CONFIG } from './defaultConfig.js';
|
||||
import fs, { readFileSync } from 'fs';
|
||||
import logger from './services/logger.js';
|
||||
import { packageUp } from 'package-up';
|
||||
|
||||
const RE_GT = />/g;
|
||||
const RE_WEBP = /\/format\/webp/gi;
|
||||
const RE_EXT = /\.(jpe?g|png|gif)(\?.*)?$/i;
|
||||
const HTTPS_PREFIX = 'https://';
|
||||
const providersDirectoryPath = `${getDirName()}/provider`;
|
||||
|
||||
/**
|
||||
* Lazily load all provider modules from the provider directory.
|
||||
* Caches the resolved array to avoid re-importing on subsequent calls.
|
||||
*
|
||||
* @returns {Promise<any[]>} A list of loaded provider modules.
|
||||
*/
|
||||
let cachedProvidersPromise = null;
|
||||
|
||||
export function getProviders() {
|
||||
if (!cachedProvidersPromise) {
|
||||
/** @type {string[]} */
|
||||
const providerFileNames = fs.readdirSync(providersDirectoryPath).filter((fileName) => fileName.endsWith('.js'));
|
||||
cachedProvidersPromise = Promise.all(
|
||||
providerFileNames.map((fileName) => import(pathToFileURL(path.join(providersDirectoryPath, fileName)).href)),
|
||||
);
|
||||
}
|
||||
return cachedProvidersPromise;
|
||||
}
|
||||
|
||||
/**
|
||||
* Safely stringify a value to JSON for storage.
|
||||
* - Returns null when the input is null or undefined.
|
||||
* - Uses JSON.stringify directly otherwise.
|
||||
*
|
||||
* @template T
|
||||
* @param {T} v - Any JSON-serializable value.
|
||||
* @returns {string|null} JSON string or null.
|
||||
*/
|
||||
const toJson = (v) => (v == null ? null : JSON.stringify(v));
|
||||
|
||||
/**
|
||||
* Safely parse JSON text coming from storage.
|
||||
* - Returns the provided fallback when input is null/undefined.
|
||||
* - Returns the fallback when parsing fails.
|
||||
*
|
||||
* @template T
|
||||
* @param {string|null|undefined} txt - JSON text from DB/storage.
|
||||
* @param {T} fallback - Value to return when txt is null/invalid.
|
||||
* @returns {T} Parsed value or fallback.
|
||||
*/
|
||||
const fromJson = (txt, fallback) => {
|
||||
if (txt == null) return fallback;
|
||||
try {
|
||||
return JSON.parse(txt);
|
||||
} catch {
|
||||
return fallback;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Determine if the current process runs in development mode.
|
||||
* Returns true when NODE_ENV is not 'production'.
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function inDevMode() {
|
||||
return process.env.NODE_ENV == null || process.env.NODE_ENV !== 'production';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a word contains any of the strings in the given array (case-insensitive, substring match).
|
||||
* @param {string} word
|
||||
* @param {string[]} arr
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function isOneOf(word, arr) {
|
||||
if (!arr || arr.length === 0 || word == null) return false;
|
||||
const lowerWord = word.toLowerCase();
|
||||
return arr.some((item) => lowerWord.indexOf(item.toLowerCase()) !== -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a value is null or an empty string/array.
|
||||
* @param {any} val
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function nullOrEmpty(val) {
|
||||
return val == null || val.length === 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a day time string (HH:mm) to epoch milliseconds for the given reference date.
|
||||
* @param {string} timeString - Format HH:mm
|
||||
* @param {number} now - Epoch ms used as the date basis
|
||||
* @returns {number}
|
||||
*/
|
||||
function timeStringToMs(timeString, now) {
|
||||
const d = new Date(now);
|
||||
const parts = timeString.split(':');
|
||||
@@ -27,6 +108,13 @@ function timeStringToMs(timeString, now) {
|
||||
return d.getTime();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether current time is within configured working hours, or no hours are set.
|
||||
* If working hours are missing or incomplete, returns true.
|
||||
* @param {{workingHours?: {from?: string, to?: string}}} config
|
||||
* @param {number} now - Epoch ms
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function duringWorkingHoursOrNotSet(config, now) {
|
||||
const { workingHours } = config;
|
||||
if (workingHours == null || nullOrEmpty(workingHours.from) || nullOrEmpty(workingHours.to)) {
|
||||
@@ -37,10 +125,20 @@ function duringWorkingHoursOrNotSet(config, now) {
|
||||
return fromDate <= now && toDate >= now;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the directory name of the current module (ESM equivalent of __dirname).
|
||||
* @returns {string}
|
||||
*/
|
||||
function getDirName() {
|
||||
return dirname(fileURLToPath(import.meta.url));
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a sha256 hash string from the provided inputs (ignores null/empty strings).
|
||||
* Returns null if there are no valid inputs.
|
||||
* @param {...(string|null|undefined)} inputs
|
||||
* @returns {string|null}
|
||||
*/
|
||||
function buildHash(...inputs) {
|
||||
if (inputs == null) {
|
||||
return null;
|
||||
@@ -52,28 +150,61 @@ function buildHash(...inputs) {
|
||||
return createHash('sha256').update(cleaned.join(',')).digest('hex');
|
||||
}
|
||||
|
||||
/**
|
||||
* The in-memory configuration object. Call refreshConfig() to populate/update.
|
||||
* @type {any}
|
||||
*/
|
||||
let config = {};
|
||||
|
||||
/**
|
||||
* Read config JSON from disk (conf/config.json) and parse it.
|
||||
* @returns {Promise<any>} Parsed configuration object.
|
||||
*/
|
||||
export async function readConfigFromStorage() {
|
||||
return JSON.parse(await readFile(new URL('../conf/config.json', import.meta.url)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh the in-memory config, ensuring the file exists and setting backward-compatible defaults.
|
||||
* Populates defaults for analyticsEnabled, demoMode, sqlitepath when missing.
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
export async function refreshConfig() {
|
||||
checkIfConfigExistsAndWriteIfNot();
|
||||
|
||||
try {
|
||||
config = await readConfigFromStorage();
|
||||
//backwards compatability...
|
||||
//backwards compatibility...
|
||||
config.analyticsEnabled ??= null;
|
||||
config.demoMode ??= false;
|
||||
// default sqlitepath when missing in older configs
|
||||
config.sqlitepath ??= '/db';
|
||||
} catch (error) {
|
||||
config = { ...DEFAULT_CONFIG };
|
||||
console.error('Error reading config file', error);
|
||||
logger.info('Error reading config file.', error);
|
||||
}
|
||||
}
|
||||
|
||||
const RE_GT = />/g;
|
||||
const RE_WEBP = /\/format\/webp/gi;
|
||||
const RE_EXT = /\.(jpe?g|png|gif)(\?.*)?$/i;
|
||||
const HTTPS_PREFIX = 'https://';
|
||||
/**
|
||||
* If the config file does not exist, create it with DEFAULT_CONFIG.
|
||||
* @returns {void}
|
||||
*/
|
||||
const checkIfConfigExistsAndWriteIfNot = () => {
|
||||
if (!fs.existsSync(`${getDirName()}/../conf/config.json`)) {
|
||||
logger.info('Could not find config file. Will create one with default values now');
|
||||
fs.writeFileSync(`${getDirName()}/../conf/config.json`, JSON.stringify({ ...DEFAULT_CONFIG }));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Normalize image URLs:
|
||||
* - Trim, remove stray '>' characters.
|
||||
* - Convert '/format/webp' segments to '/format/jpg'.
|
||||
* - Enforce HTTPS and ensure a valid image extension (jpg/png/gif). If URL contains '.jpg' without query, cut trailing parts.
|
||||
* - Return null for invalid inputs.
|
||||
* @param {string} url
|
||||
* @returns {string|null}
|
||||
*/
|
||||
const normalizeImageUrl = (url) => {
|
||||
if (typeof url !== 'string' || url.length === 0) return null;
|
||||
|
||||
@@ -87,20 +218,56 @@ const normalizeImageUrl = (url) => {
|
||||
return u;
|
||||
};
|
||||
|
||||
/**
|
||||
* returns Fredy's version
|
||||
* @returns {Promise<*|string>}
|
||||
*/
|
||||
async function getPackageVersion() {
|
||||
try {
|
||||
const packagePath = await packageUp();
|
||||
const packageJson = readFileSync(packagePath, 'utf8');
|
||||
const json = JSON.parse(packageJson);
|
||||
return json.version;
|
||||
} catch (error) {
|
||||
logger.error('Error reading version from package.json', error);
|
||||
}
|
||||
return 'N/A';
|
||||
}
|
||||
|
||||
/**
|
||||
* Sleep helper
|
||||
* @param {number} ms milliseconds to wait
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
function sleep(ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* returns a random into between start and end
|
||||
* @param a start int
|
||||
* @param b max int
|
||||
* @returns {*}
|
||||
*/
|
||||
function randomBetween(a, b) {
|
||||
return Math.floor(Math.random() * (b - a + 1)) + a;
|
||||
}
|
||||
|
||||
// Call refreshConfig() from the application entrypoint during startup to populate config.
|
||||
await refreshConfig();
|
||||
|
||||
export { isOneOf };
|
||||
export { normalizeImageUrl };
|
||||
export { inDevMode };
|
||||
export { nullOrEmpty };
|
||||
export { duringWorkingHoursOrNotSet };
|
||||
export { getDirName };
|
||||
export { config };
|
||||
export { buildHash };
|
||||
export default {
|
||||
export {
|
||||
isOneOf,
|
||||
normalizeImageUrl,
|
||||
inDevMode,
|
||||
nullOrEmpty,
|
||||
duringWorkingHoursOrNotSet,
|
||||
getDirName,
|
||||
sleep,
|
||||
randomBetween,
|
||||
config,
|
||||
buildHash,
|
||||
getPackageVersion,
|
||||
toJson,
|
||||
fromJson,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user