mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2896d531e4 | ||
|
|
0cbfa25062 | ||
|
|
bcd3042026 |
@@ -227,7 +227,7 @@ class FredyPipelineExecutioner {
|
|||||||
const extractor = new Extractor({ ...this._providerConfig.puppeteerOptions, browser: this._browser });
|
const extractor = new Extractor({ ...this._providerConfig.puppeteerOptions, browser: this._browser });
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
extractor
|
extractor
|
||||||
.execute(url, this._providerConfig.waitForSelector)
|
.execute(url, this._providerConfig.waitForSelector, this._providerId)
|
||||||
.then(() => {
|
.then(() => {
|
||||||
const listings = extractor.parseResponseText(
|
const listings = extractor.parseResponseText(
|
||||||
this._providerConfig.crawlContainer,
|
this._providerConfig.crawlContainer,
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ function parseId(shortenedLink) {
|
|||||||
|
|
||||||
async function fetchDetails(listing, browser) {
|
async function fetchDetails(listing, browser) {
|
||||||
try {
|
try {
|
||||||
const html = await puppeteerExtractor(listing.link, null, { browser });
|
const html = await puppeteerExtractor(listing.link, null, { browser, name: 'immobilienDe_details' });
|
||||||
if (!html) return listing;
|
if (!html) return listing;
|
||||||
|
|
||||||
const $ = cheerio.load(html);
|
const $ = cheerio.load(html);
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ let appliedBlackList = [];
|
|||||||
|
|
||||||
async function fetchDetails(listing, browser) {
|
async function fetchDetails(listing, browser) {
|
||||||
try {
|
try {
|
||||||
const html = await puppeteerExtractor(listing.link, null, { browser });
|
const html = await puppeteerExtractor(listing.link, null, { browser, name: 'immowelt_details' });
|
||||||
if (!html) return listing;
|
if (!html) return listing;
|
||||||
|
|
||||||
const $ = cheerio.load(html);
|
const $ = cheerio.load(html);
|
||||||
|
|||||||
@@ -128,7 +128,7 @@ async function enrichListingFromDetails(listing, browser) {
|
|||||||
if (!absoluteLink) return listing;
|
if (!absoluteLink) return listing;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const html = await puppeteerExtractor(absoluteLink, null, { browser });
|
const html = await puppeteerExtractor(absoluteLink, null, { browser, name: 'kleinanzeigen_details' });
|
||||||
if (!html) return { ...listing, link: absoluteLink };
|
if (!html) return { ...listing, link: absoluteLink };
|
||||||
|
|
||||||
const { detailAddress, detailDescription } = extractDetailFromHtml(html);
|
const { detailAddress, detailDescription } = extractDetailFromHtml(html);
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ let appliedBlackList = [];
|
|||||||
|
|
||||||
async function fetchDetails(listing, browser) {
|
async function fetchDetails(listing, browser) {
|
||||||
try {
|
try {
|
||||||
const html = await puppeteerExtractor(listing.link, 'body', { browser });
|
const html = await puppeteerExtractor(listing.link, 'body', { browser, name: 'sparkasse_details' });
|
||||||
|
|
||||||
const $ = cheerio.load(html);
|
const $ = cheerio.load(html);
|
||||||
const nextDataRaw = $('#__NEXT_DATA__').text;
|
const nextDataRaw = $('#__NEXT_DATA__').text;
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ let appliedBlackList = [];
|
|||||||
|
|
||||||
async function fetchDetails(listing, browser) {
|
async function fetchDetails(listing, browser) {
|
||||||
try {
|
try {
|
||||||
const html = await puppeteerExtractor(listing.link, null, { browser });
|
const html = await puppeteerExtractor(listing.link, null, { browser, name: 'wgGesucht_details' });
|
||||||
if (!html) return listing;
|
if (!html) return listing;
|
||||||
|
|
||||||
const $ = cheerio.load(html);
|
const $ = cheerio.load(html);
|
||||||
|
|||||||
@@ -29,11 +29,12 @@ export default class Extractor {
|
|||||||
* your response will never contain what you are really looking for
|
* your response will never contain what you are really looking for
|
||||||
* @param url
|
* @param url
|
||||||
* @param waitForSelector
|
* @param waitForSelector
|
||||||
|
* @param jobKey
|
||||||
*/
|
*/
|
||||||
execute = async (url, waitForSelector = null) => {
|
execute = async (url, waitForSelector = null, jobKey = null) => {
|
||||||
this.responseText = null;
|
this.responseText = null;
|
||||||
try {
|
try {
|
||||||
this.responseText = await puppeteerExtractor(url, waitForSelector, this.options);
|
this.responseText = await puppeteerExtractor(url, waitForSelector, { ...this.options, name: jobKey });
|
||||||
if (this.responseText != null) {
|
if (this.responseText != null) {
|
||||||
loadParser(this.responseText);
|
loadParser(this.responseText);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -148,7 +148,11 @@ export default async function execute(url, waitForSelector, options) {
|
|||||||
if (botDetected(pageSource, statusCode)) {
|
if (botDetected(pageSource, statusCode)) {
|
||||||
logger.warn('We have been detected as a bot :-/ Tried url: => ', url);
|
logger.warn('We have been detected as a bot :-/ Tried url: => ', url);
|
||||||
|
|
||||||
await trackPoi(TRACKING_POIS.DETECTED_AS_BOT);
|
if (options != null && options.name != null) {
|
||||||
|
await trackPoi(TRACKING_POIS.DETECTED_AS_BOT + '_' + options.name);
|
||||||
|
} else {
|
||||||
|
await trackPoi(TRACKING_POIS.DETECTED_AS_BOT);
|
||||||
|
}
|
||||||
|
|
||||||
result = null;
|
result = null;
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "fredy",
|
"name": "fredy",
|
||||||
"version": "22.0.5",
|
"version": "22.0.7",
|
||||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"prepare": "husky",
|
"prepare": "husky",
|
||||||
|
|||||||
@@ -95,7 +95,10 @@ async function downloadHtmlProvider(name, providerConfig, launchBrowser, closeBr
|
|||||||
|
|
||||||
const browser = await launchBrowser(providerConfig.url, {});
|
const browser = await launchBrowser(providerConfig.url, {});
|
||||||
try {
|
try {
|
||||||
const html = await puppeteerExtractor(providerConfig.url, providerConfig.waitForSelector, { browser });
|
const html = await puppeteerExtractor(providerConfig.url, providerConfig.waitForSelector, {
|
||||||
|
browser,
|
||||||
|
name: 'dowload_fixtures',
|
||||||
|
});
|
||||||
|
|
||||||
if (!html) {
|
if (!html) {
|
||||||
console.warn(` Failed to download ${name}`);
|
console.warn(` Failed to download ${name}`);
|
||||||
|
|||||||
@@ -174,7 +174,7 @@ const JobGrid = () => {
|
|||||||
Toast.success('Job status successfully changed');
|
Toast.success('Job status successfully changed');
|
||||||
loadData();
|
loadData();
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
Toast.error(error);
|
Toast.error(error.error);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ const Users = function Users() {
|
|||||||
await actions.jobsData.getJobs();
|
await actions.jobsData.getJobs();
|
||||||
await actions.user.getUsers();
|
await actions.user.getUsers();
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
Toast.error(error);
|
Toast.error(error.error);
|
||||||
setUserIdToBeRemoved(null);
|
setUserIdToBeRemoved(null);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user