mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
using winston logger
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import { setDebug } from './utils.js';
|
||||
import puppeteerExtractor from './puppeteerExtractor.js';
|
||||
import { loadParser, parse } from './parser/parser.js';
|
||||
import logger from '../logger.js';
|
||||
|
||||
const DEFAULT_OPTIONS = {
|
||||
debug: false,
|
||||
@@ -32,7 +33,7 @@ export default class Extractor {
|
||||
loadParser(this.responseText);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error trying to load page.', error);
|
||||
logger.error('Error trying to load page.', error);
|
||||
}
|
||||
return this;
|
||||
};
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import * as cheerio from 'cheerio';
|
||||
import logger from '../../logger.js';
|
||||
|
||||
let $ = null;
|
||||
|
||||
@@ -8,19 +9,19 @@ export function loadParser(text) {
|
||||
|
||||
export function parse(crawlContainer, crawlFields, text, url) {
|
||||
if (!text) {
|
||||
console.warn('No content found for ', url);
|
||||
logger.warn('No content found for ', url);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!crawlContainer || !crawlFields) {
|
||||
console.warn('Cannot parse, selector was empty for url ', url);
|
||||
logger.warn('Cannot parse, selector was empty for url ', url);
|
||||
return null;
|
||||
}
|
||||
|
||||
const result = [];
|
||||
|
||||
if ($(crawlContainer).length === 0) {
|
||||
console.warn('No elements in crawl container found for url ', url);
|
||||
logger.warn('No elements in crawl container found for url ', url);
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -58,7 +59,7 @@ export function parse(crawlContainer, crawlFields, text, url) {
|
||||
|
||||
parsedObject[key] = value || null;
|
||||
} catch (error) {
|
||||
console.error(`Error parsing field '${key}' with selector '${fieldSelector}':`, error);
|
||||
logger.error(`Error parsing field '${key}' with selector '${fieldSelector}':`, error);
|
||||
parsedObject[key] = null;
|
||||
}
|
||||
}
|
||||
@@ -66,9 +67,7 @@ export function parse(crawlContainer, crawlFields, text, url) {
|
||||
if (parsedObject.id != null) {
|
||||
result.push(parsedObject);
|
||||
} else {
|
||||
/* eslint-disable no-console */
|
||||
console.debug('ID not found. Not relaying object.');
|
||||
/* eslint-enable no-console */
|
||||
logger.debug('ID not found. Not relaying object.');
|
||||
}
|
||||
});
|
||||
|
||||
@@ -91,7 +90,7 @@ function applyModifiers(value, modifiers) {
|
||||
value = value.replace(/\n/g, ' ');
|
||||
break;
|
||||
default:
|
||||
console.warn(`Unknown modifier: ${modifier}`);
|
||||
logger.warn(`Unknown modifier: ${modifier}`);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { debug, DEFAULT_HEADER, botDetected } from './utils.js';
|
||||
import logger from '../logger.js';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
@@ -33,13 +34,13 @@ export default async function execute(url, waitForSelector, options) {
|
||||
const statusCode = response.status();
|
||||
|
||||
if (botDetected(pageSource, statusCode)) {
|
||||
console.warn('We have been detected as a bot :-/ Tried url: => ', url);
|
||||
logger.warn('We have been detected as a bot :-/ Tried url: => ', url);
|
||||
return null;
|
||||
}
|
||||
|
||||
return await page.content();
|
||||
} catch (error) {
|
||||
console.error('Error executing with puppeteer executor', error);
|
||||
logger.error('Error executing with puppeteer executor', error);
|
||||
return null;
|
||||
} finally {
|
||||
if (browser != null) {
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import logger from '../logger.js';
|
||||
|
||||
let debuggingOn = false;
|
||||
|
||||
export const DEFAULT_HEADER = {
|
||||
@@ -15,9 +17,7 @@ export const setDebug = (options) => {
|
||||
|
||||
export const debug = (message) => {
|
||||
if (debuggingOn) {
|
||||
/* eslint-disable no-console */
|
||||
console.debug(message);
|
||||
/* eslint-enable no-console */
|
||||
logger.debug(message);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user