import {setDebug} from './utils.js'; import puppeteerExtractor from './puppeteerExtractor.js'; import {loadParser, parse} from './parser/parser.js'; const DEFAULT_OPTIONS = { debug: false, puppeteerTimeout: 20_000, puppeteerHeadless: true }; export default class Extractor { constructor(options) { this.options = { ...DEFAULT_OPTIONS, ...options }; this.responseText = null; setDebug(this.options); } /** * if you are extracting data from a SPA, you must provide a selector, otherwise * your response will never contain what you are really looking for * @param url * @param waitForSelector */ execute = async (url, waitForSelector = null) => { this.responseText = null; try { this.responseText = await puppeteerExtractor(url, waitForSelector, this.options); if(this.responseText != null) { loadParser(this.responseText); } } catch (error) { console.error('Error trying to load page.', error); } return this; }; parseResponseText = (crawlContainer, crawlFields) => { return parse(crawlContainer, crawlFields, this.responseText); }; }