mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
* init map view * switching off 3d buildings when sattelite view is on * rename menu items * upgrading dependencies, adding provider to popups * adding screenshot for map view * fixing readme * next release version
50 lines
1.3 KiB
JavaScript
50 lines
1.3 KiB
JavaScript
/*
|
|
* Copyright (c) 2026 by Christian Kellner.
|
|
* Licensed under Apache-2.0 with Commons Clause and Attribution/Naming Clause
|
|
*/
|
|
|
|
import { setDebug } from './utils.js';
|
|
import puppeteerExtractor from './puppeteerExtractor.js';
|
|
import { loadParser, parse } from './parser/parser.js';
|
|
import logger from '../logger.js';
|
|
|
|
const DEFAULT_OPTIONS = {
|
|
debug: false,
|
|
puppeteerTimeout: 60_000,
|
|
puppeteerHeadless: true,
|
|
};
|
|
|
|
export default class Extractor {
|
|
constructor(options) {
|
|
this.options = {
|
|
...DEFAULT_OPTIONS,
|
|
...options,
|
|
};
|
|
this.responseText = null;
|
|
setDebug(this.options);
|
|
}
|
|
|
|
/**
|
|
* if you are extracting data from a SPA, you must provide a selector, otherwise
|
|
* your response will never contain what you are really looking for
|
|
* @param url
|
|
* @param waitForSelector
|
|
*/
|
|
execute = async (url, waitForSelector = null) => {
|
|
this.responseText = null;
|
|
try {
|
|
this.responseText = await puppeteerExtractor(url, waitForSelector, this.options);
|
|
if (this.responseText != null) {
|
|
loadParser(this.responseText);
|
|
}
|
|
} catch (error) {
|
|
logger.error('Error trying to load page.', error);
|
|
}
|
|
return this;
|
|
};
|
|
|
|
parseResponseText = (crawlContainer, crawlFields, url) => {
|
|
return parse(crawlContainer, crawlFields, this.responseText, url);
|
|
};
|
|
}
|