mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7a9d49899b | ||
|
|
9a87c58d3e | ||
|
|
fdd7e835e8 | ||
|
|
00d6a12b30 | ||
|
|
05218800d2 |
@@ -63,13 +63,15 @@ class FredyPipelineExecutioner {
|
||||
* @param {string} providerId The ID of the provider currently in use.
|
||||
* @param {string} jobKey Key of the job that is currently running (from within the config).
|
||||
* @param {SimilarityCache} similarityCache Cache instance for checking similar entries.
|
||||
* @param browser
|
||||
*/
|
||||
constructor(providerConfig, notificationConfig, providerId, jobKey, similarityCache) {
|
||||
constructor(providerConfig, notificationConfig, providerId, jobKey, similarityCache, browser) {
|
||||
this._providerConfig = providerConfig;
|
||||
this._notificationConfig = notificationConfig;
|
||||
this._providerId = providerId;
|
||||
this._jobKey = jobKey;
|
||||
this._similarityCache = similarityCache;
|
||||
this._browser = browser;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -119,7 +121,7 @@ class FredyPipelineExecutioner {
|
||||
* @returns {Promise<Listing[]>} Resolves with an array of listings (empty when none found).
|
||||
*/
|
||||
_getListings(url) {
|
||||
const extractor = new Extractor();
|
||||
const extractor = new Extractor({ ...this._providerConfig.puppeteerOptions, browser: this._browser });
|
||||
return new Promise((resolve, reject) => {
|
||||
extractor
|
||||
.execute(url, this._providerConfig.waitForSelector)
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2026 by Christian Kellner.
|
||||
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
|
||||
*/
|
||||
|
||||
import { isOneOf, buildHash } from '../utils.js';
|
||||
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
|
||||
let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
const size = o.size != null ? o.size.replace('Wohnfläche ', '') : 'N/A m²';
|
||||
const price = o.price.replace('Kaufpreis ', '');
|
||||
const address = o.address?.split(' • ')?.pop() ?? null;
|
||||
const title = o.title || 'No title available';
|
||||
const link = o.link != null ? decodeURIComponent(o.link) : config.url;
|
||||
const id = buildHash(title, price);
|
||||
return Object.assign(o, { id, address, price, size, title, link });
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: 'div[data-testid="serp-core-classified-card-testid"]',
|
||||
sortByDateParam: 'sortby=19',
|
||||
waitForSelector: 'div[data-testid="serp-gridcontainer-testid"]',
|
||||
crawlFields: {
|
||||
id: 'button@title |trim',
|
||||
title: 'button@title |trim',
|
||||
price: 'div[data-testid="cardmfe-price-testid"] | trim',
|
||||
size: 'div[data-testid="cardmfe-keyfacts-testid"] | trim',
|
||||
address: 'div[data-testid="cardmfe-description-box-address"] | trim',
|
||||
image: 'div[data-testid="cardmfe-picture-box-test-id"] img@src',
|
||||
link: 'button@data-base',
|
||||
description: 'div[data-testid="cardmfe-description-text-test-id"] | trim',
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
activeTester: checkIfListingIsActive,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
};
|
||||
export const metaInformation = {
|
||||
name: 'Immonet',
|
||||
baseUrl: 'https://www.immonet.de/',
|
||||
id: 'immonet',
|
||||
};
|
||||
export { config };
|
||||
@@ -19,52 +19,80 @@ import path from 'path';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
export default async function execute(url, waitForSelector, options) {
|
||||
let browser;
|
||||
let page;
|
||||
let result;
|
||||
export async function launchBrowser(url, options) {
|
||||
const preCfg = getPreLaunchConfig(url, options || {});
|
||||
const launchArgs = [
|
||||
'--no-sandbox',
|
||||
'--disable-gpu',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-crash-reporter',
|
||||
'--no-first-run',
|
||||
'--no-default-browser-check',
|
||||
preCfg.langArg,
|
||||
preCfg.windowSizeArg,
|
||||
...preCfg.extraArgs,
|
||||
];
|
||||
if (options?.proxyUrl) {
|
||||
launchArgs.push(`--proxy-server=${options.proxyUrl}`);
|
||||
}
|
||||
|
||||
let userDataDir;
|
||||
let removeUserDataDir = false;
|
||||
if (options && options.userDataDir) {
|
||||
userDataDir = options.userDataDir;
|
||||
} else {
|
||||
const prefix = path.join(os.tmpdir(), 'puppeteer-fredy-');
|
||||
userDataDir = fs.mkdtempSync(prefix);
|
||||
removeUserDataDir = true;
|
||||
}
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: options?.puppeteerHeadless ?? true,
|
||||
args: launchArgs,
|
||||
timeout: options?.puppeteerTimeout || 45_000,
|
||||
userDataDir,
|
||||
executablePath: options?.executablePath,
|
||||
});
|
||||
|
||||
browser.__fredy_userDataDir = userDataDir;
|
||||
browser.__fredy_removeUserDataDir = removeUserDataDir;
|
||||
|
||||
return browser;
|
||||
}
|
||||
|
||||
export async function closeBrowser(browser) {
|
||||
if (!browser) return;
|
||||
const userDataDir = browser.__fredy_userDataDir;
|
||||
const removeUserDataDir = browser.__fredy_removeUserDataDir;
|
||||
try {
|
||||
await browser.close();
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
if (removeUserDataDir && userDataDir) {
|
||||
try {
|
||||
await fs.promises.rm(userDataDir, { recursive: true, force: true });
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export default async function execute(url, waitForSelector, options) {
|
||||
let browser = options?.browser;
|
||||
let isExternalBrowser = !!browser;
|
||||
let page;
|
||||
let result;
|
||||
try {
|
||||
debug(`Sending request to ${url} using Puppeteer.`);
|
||||
|
||||
// Prepare a dedicated temporary userDataDir to avoid leaking /tmp/.org.chromium.* dirs
|
||||
if (options && options.userDataDir) {
|
||||
userDataDir = options.userDataDir;
|
||||
removeUserDataDir = !!options.cleanupUserDataDir;
|
||||
} else {
|
||||
const prefix = path.join(os.tmpdir(), 'puppeteer-fredy-');
|
||||
userDataDir = fs.mkdtempSync(prefix);
|
||||
removeUserDataDir = true;
|
||||
if (!isExternalBrowser) {
|
||||
browser = await launchBrowser(url, options);
|
||||
}
|
||||
|
||||
const launchArgs = [
|
||||
'--no-sandbox',
|
||||
'--disable-gpu',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-crash-reporter',
|
||||
'--no-first-run',
|
||||
'--no-default-browser-check',
|
||||
];
|
||||
if (options?.proxyUrl) {
|
||||
launchArgs.push(`--proxy-server=${options.proxyUrl}`);
|
||||
}
|
||||
// Prepare bot prevention pre-launch config
|
||||
const preCfg = getPreLaunchConfig(url, options || {});
|
||||
launchArgs.push(preCfg.langArg);
|
||||
launchArgs.push(preCfg.windowSizeArg);
|
||||
launchArgs.push(...preCfg.extraArgs);
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: options?.puppeteerHeadless ?? true,
|
||||
args: launchArgs,
|
||||
timeout: options?.puppeteerTimeout || 30_000,
|
||||
userDataDir,
|
||||
executablePath: options?.executablePath, // allow using system Chrome
|
||||
});
|
||||
|
||||
page = await browser.newPage();
|
||||
const preCfg = getPreLaunchConfig(url, options || {});
|
||||
await applyBotPreventionToPage(page, preCfg);
|
||||
// Provide languages value before navigation
|
||||
await applyLanguagePersistence(page, preCfg);
|
||||
@@ -104,7 +132,7 @@ export default async function execute(url, waitForSelector, options) {
|
||||
result = pageSource || (await page.content());
|
||||
}
|
||||
} catch (error) {
|
||||
if (error?.message?.includes('Timeout')) {
|
||||
if (error?.name?.includes('Timeout')) {
|
||||
logger.debug('Error executing with puppeteer executor', error);
|
||||
} else {
|
||||
logger.warn('Error executing with puppeteer executor', error);
|
||||
@@ -118,19 +146,8 @@ export default async function execute(url, waitForSelector, options) {
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
try {
|
||||
if (browser != null) {
|
||||
await browser.close();
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
try {
|
||||
if (removeUserDataDir && userDataDir) {
|
||||
await fs.promises.rm(userDataDir, { recursive: true, force: true });
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
if (browser != null && !isExternalBrowser) {
|
||||
await closeBrowser(browser);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
||||
@@ -13,6 +13,7 @@ import FredyPipelineExecutioner from '../../FredyPipelineExecutioner.js';
|
||||
import * as similarityCache from '../similarity-check/similarityCache.js';
|
||||
import { isRunning, markFinished, markRunning } from './run-state.js';
|
||||
import { sendToUsers } from '../sse/sse-broker.js';
|
||||
import * as puppeteerExtractor from '../extractor/puppeteerExtractor.js';
|
||||
|
||||
/**
|
||||
* Initializes the job execution service.
|
||||
@@ -94,7 +95,7 @@ export function initJobExecutionService({ providers, settings, intervalMs }) {
|
||||
* @param {{userId?: string, isAdmin?: boolean}} [context] - Who requested the run; determines job filtering.
|
||||
* @returns {void}
|
||||
*/
|
||||
function runAll(respectWorkingHours = true, context = undefined) {
|
||||
async function runAll(respectWorkingHours = true, context = undefined) {
|
||||
if (settings.demoMode) return;
|
||||
const now = Date.now();
|
||||
const withinHours = duringWorkingHoursOrNotSet(settings, now);
|
||||
@@ -103,15 +104,18 @@ export function initJobExecutionService({ providers, settings, intervalMs }) {
|
||||
return;
|
||||
}
|
||||
settings.lastRun = now;
|
||||
jobStorage
|
||||
const jobs = jobStorage
|
||||
.getJobs()
|
||||
.filter((job) => job.enabled)
|
||||
.filter((job) => {
|
||||
if (!context) return true; // startup/cron → all
|
||||
if (context.isAdmin) return true; // admin → all
|
||||
return context.userId ? job.userId === context.userId : false; // user → own
|
||||
})
|
||||
.forEach((job) => executeJob(job));
|
||||
});
|
||||
|
||||
for (const job of jobs) {
|
||||
await executeJob(job);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -154,28 +158,42 @@ export function initJobExecutionService({ providers, settings, intervalMs }) {
|
||||
} catch (err) {
|
||||
logger.warn('Failed to emit start status for job', job.id, err);
|
||||
}
|
||||
let browser;
|
||||
try {
|
||||
const jobProviders = job.provider.filter(
|
||||
(p) => providers.find((loaded) => loaded.metaInformation.id === p.id) != null,
|
||||
);
|
||||
const executions = jobProviders.map(async (prov) => {
|
||||
const matchedProvider = providers.find((loaded) => loaded.metaInformation.id === prov.id);
|
||||
matchedProvider.init(prov, job.blacklist);
|
||||
await new FredyPipelineExecutioner(
|
||||
matchedProvider.config,
|
||||
job.notificationAdapter,
|
||||
prov.id,
|
||||
job.id,
|
||||
similarityCache,
|
||||
).execute();
|
||||
});
|
||||
const results = await Promise.allSettled(executions);
|
||||
for (const r of results) {
|
||||
if (r.status === 'rejected') {
|
||||
logger.error(r.reason);
|
||||
for (const prov of jobProviders) {
|
||||
try {
|
||||
const matchedProvider = providers.find((loaded) => loaded.metaInformation.id === prov.id);
|
||||
matchedProvider.init(prov, job.blacklist);
|
||||
|
||||
if (browser && !browser.isConnected()) {
|
||||
logger.debug('Browser is disconnected, nullifying to launch a new one.');
|
||||
await puppeteerExtractor.closeBrowser(browser);
|
||||
browser = null;
|
||||
}
|
||||
|
||||
if (!browser && matchedProvider.config.getListings == null) {
|
||||
browser = await puppeteerExtractor.launchBrowser(matchedProvider.config.url, {});
|
||||
}
|
||||
|
||||
await new FredyPipelineExecutioner(
|
||||
matchedProvider.config,
|
||||
job.notificationAdapter,
|
||||
prov.id,
|
||||
job.id,
|
||||
similarityCache,
|
||||
browser,
|
||||
).execute();
|
||||
} catch (err) {
|
||||
logger.error(err);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
if (browser) {
|
||||
await puppeteerExtractor.closeBrowser(browser);
|
||||
}
|
||||
markFinished(job.id);
|
||||
try {
|
||||
bus.emit('jobs:status', { jobId: job.id, running: false });
|
||||
|
||||
13553
package-lock.json
generated
13553
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "fredy",
|
||||
"version": "19.4.1",
|
||||
"version": "19.5.1",
|
||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||
"scripts": {
|
||||
"prepare": "husky",
|
||||
@@ -80,7 +80,7 @@
|
||||
"node-mailjet": "6.0.11",
|
||||
"p-throttle": "^8.1.0",
|
||||
"package-up": "^5.0.0",
|
||||
"puppeteer": "^24.37.2",
|
||||
"puppeteer": "^24.37.3",
|
||||
"puppeteer-extra": "^3.3.6",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||
"query-string": "9.3.1",
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2026 by Christian Kellner.
|
||||
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
|
||||
*/
|
||||
|
||||
import * as similarityCache from '../../lib/services/similarity-check/similarityCache.js';
|
||||
import { get } from '../mocks/mockNotification.js';
|
||||
import { mockFredy, providerConfig } from '../utils.js';
|
||||
import { expect } from 'chai';
|
||||
import * as provider from '../../lib/provider/immonet.js';
|
||||
|
||||
describe('#immonet testsuite()', () => {
|
||||
it('should test immonet provider', async () => {
|
||||
const Fredy = await mockFredy();
|
||||
provider.init(providerConfig.immonet, [], []);
|
||||
|
||||
const fredy = new Fredy(provider.config, null, provider.metaInformation.id, 'immonet', similarityCache);
|
||||
const listing = await fredy.execute();
|
||||
|
||||
expect(listing).to.be.a('array');
|
||||
const notificationObj = get();
|
||||
expect(notificationObj).to.be.a('object');
|
||||
expect(notificationObj.serviceName).to.equal('immonet');
|
||||
notificationObj.payload.forEach((notify) => {
|
||||
/** check the actual structure **/
|
||||
expect(notify.id).to.be.a('string');
|
||||
expect(notify.price).to.be.a('string');
|
||||
expect(notify.size).to.be.a('string');
|
||||
expect(notify.title).to.be.a('string');
|
||||
expect(notify.link).to.be.a('string');
|
||||
expect(notify.address).to.be.a('string');
|
||||
/** check the values if possible **/
|
||||
expect(notify.size).that.does.include('m²');
|
||||
expect(notify.title).to.be.not.empty;
|
||||
expect(notify.address).to.be.not.empty;
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -8,10 +8,6 @@
|
||||
"url": "https://www.immobilien.de/Wohnen/Suchergebnisse-51797.html?search._digest=true&search._filter=wohnen&search.flaeche_von=50&search.objektart=wohnung&search.preis_bis=1200&search.typ=mieten&search.umkreis=15&search.wo=district%3A2434%2C2695%2C2621%2C2700%2C2967%2C2734%2C2909%2C2955%2C2392%2C2746%2C2767%2C2982%2C2904%2C2612%2C2892%2C2587%2C2871%2C2975%2C2591%2C2887%2C2569%2C2640%2C2735&sort_col=*created_ts&sort_dir=desc",
|
||||
"enabled": true
|
||||
},
|
||||
"immonet": {
|
||||
"url": "https://www.immonet.de/classified-search?distributionTypes=Buy,Buy_Auction,Compulsory_Auction&estateTypes=House,Apartment&locations=AD08DE2112&order=Default&m=homepage_new_search_classified_search_result",
|
||||
"enabled": true
|
||||
},
|
||||
"immowelt": {
|
||||
"url": "https://www.immowelt.de/classified-search?distributionTypes=Buy,Buy_Auction,Compulsory_Auction&estateTypes=House,Apartment&locations=AD08DE2350",
|
||||
"enabled": true
|
||||
|
||||
@@ -14,12 +14,6 @@
|
||||
"shouldBecome": "https://www.wg-gesucht.de/1-zimmer-wohnungen-in-Dusseldorf.30.1.1.0.html?sort_column=0&sort_order=0",
|
||||
"id": "wgGesucht"
|
||||
},
|
||||
|
||||
{
|
||||
"url": "https://www.immonet.de/immobiliensuche/sel.do?sortby=0&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
|
||||
"shouldBecome": "https://www.immonet.de/immobiliensuche/sel.do?sortby=19&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
|
||||
"id": "immonet"
|
||||
},
|
||||
{
|
||||
"url": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/",
|
||||
"shouldBecome": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/?Sortierung=Id&Richtung=DESC",
|
||||
|
||||
@@ -11,7 +11,7 @@ import GeneralSettings from './views/generalSettings/GeneralSettings';
|
||||
import UserSettings from './views/userSettings/UserSettings';
|
||||
import JobMutation from './views/jobs/mutation/JobMutation';
|
||||
import UserMutator from './views/user/mutation/UserMutator';
|
||||
import { useActions, useSelector, useFredyState } from './services/state/store';
|
||||
import { useActions, useSelector } from './services/state/store';
|
||||
import { Routes, Route, Navigate } from 'react-router-dom';
|
||||
import Login from './views/login/Login';
|
||||
import Users from './views/user/Users';
|
||||
@@ -41,24 +41,21 @@ export default function FredyApp() {
|
||||
useEffect(() => {
|
||||
async function init() {
|
||||
await actions.user.getCurrentUser();
|
||||
const user = useFredyState.getState().user.currentUser;
|
||||
if (!user || Object.keys(user).length === 0) {
|
||||
setLoading(false);
|
||||
return;
|
||||
if (!needsLogin()) {
|
||||
await actions.provider.getProvider();
|
||||
await actions.jobsData.getJobs();
|
||||
await actions.jobsData.getSharableUserList();
|
||||
await actions.notificationAdapter.getAdapter();
|
||||
await actions.generalSettings.getGeneralSettings();
|
||||
await actions.userSettings.getUserSettings();
|
||||
await actions.versionUpdate.getVersionUpdate();
|
||||
await actions.tracking.getTrackingPois();
|
||||
}
|
||||
await actions.provider.getProvider();
|
||||
await actions.jobsData.getJobs();
|
||||
await actions.jobsData.getSharableUserList();
|
||||
await actions.notificationAdapter.getAdapter();
|
||||
await actions.generalSettings.getGeneralSettings();
|
||||
await actions.userSettings.getUserSettings();
|
||||
await actions.versionUpdate.getVersionUpdate();
|
||||
await actions.tracking.getTrackingPois();
|
||||
setLoading(false);
|
||||
}
|
||||
|
||||
init();
|
||||
}, []);
|
||||
}, [currentUser?.userId]);
|
||||
|
||||
const needsLogin = () => {
|
||||
return currentUser == null || Object.keys(currentUser).length === 0;
|
||||
@@ -68,7 +65,10 @@ export default function FredyApp() {
|
||||
const { Sider, Content } = Layout;
|
||||
|
||||
return loading ? null : needsLogin() ? (
|
||||
<Login />
|
||||
<Routes>
|
||||
<Route path="/login" element={<Login />} />
|
||||
<Route path="*" element={<Navigate to="/login" replace />} />
|
||||
</Routes>
|
||||
) : (
|
||||
<Layout className="app">
|
||||
<Sider>
|
||||
@@ -137,7 +137,6 @@ export default function FredyApp() {
|
||||
}
|
||||
/>
|
||||
|
||||
{/* Authenticated fallbacks */}
|
||||
<Route path="/" element={<Navigate to="/dashboard" replace />} />
|
||||
</Routes>
|
||||
</Content>
|
||||
|
||||
@@ -6283,7 +6283,7 @@ puppeteer-extra@^3.3.6:
|
||||
debug "^4.1.1"
|
||||
deepmerge "^4.2.2"
|
||||
|
||||
puppeteer@^24.37.2:
|
||||
puppeteer@^24.37.3:
|
||||
version "24.37.3"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer/-/puppeteer-24.37.3.tgz#68327e1f571887ed1ba7916d770e6028975a21db"
|
||||
integrity sha512-AUGGWq0BhPM+IOS2U9A+ZREH3HDFkV1Y5HERYGDg5cbGXjoGsTCT7/A6VZRfNU0UJJdCclyEimZICkZW6pqJyw==
|
||||
|
||||
Reference in New Issue
Block a user