mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
upgrading dependencies, fixing immowelt, using hash of price and id as unique identifier for listings
This commit is contained in:
@@ -1,18 +1,22 @@
|
||||
import utils from '../utils.js';
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
let size = `${o.size.replace(' Wohnfläche ', '').trim()}`;
|
||||
if (o.rooms != null) {
|
||||
size += ` / / ${o.rooms.trim()}`;
|
||||
}
|
||||
const link = `https://www.1a-immobilienmarkt.de/expose/${o.id}.html`;
|
||||
return Object.assign(o, { size, link });
|
||||
const id = buildHash(o.id, o.price);
|
||||
return Object.assign(o, { id, size, link });
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.tabelle',
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import utils from '../utils.js';
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
let appliedBlackList = [];
|
||||
function shortenLink(link) {
|
||||
return link.substring(0, link.indexOf('?'));
|
||||
@@ -7,12 +7,12 @@ function parseId(shortenedLink) {
|
||||
return shortenedLink.substring(shortenedLink.lastIndexOf('/') + 1);
|
||||
}
|
||||
function normalize(o) {
|
||||
const id = parseId(shortenLink(o.link));
|
||||
const size = o.size || 'N/A m²';
|
||||
const price = o.price || 'N/A €';
|
||||
const title = o.title || 'No title available';
|
||||
const address = o.address || 'No address available';
|
||||
const link = shortenLink(o.link);
|
||||
const id = buildHash(parseId(shortenLink(o.link)), o.price);
|
||||
return Object.assign(o, { id, price, size, title, address, link });
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import utils from '../utils.js';
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
let appliedBlackList = [];
|
||||
function normalize(o) {
|
||||
const id = o.id.substring(o.id.lastIndexOf('/') + 1, o.id.length);
|
||||
const size = o.size != null ? o.size.replace('Wohnfläche ', '') : 'N/A m²';
|
||||
const price = o.price.replace('Kaufpreis ', '');
|
||||
const address = o.address.split(' • ')[o.address.split(' • ').length - 1];
|
||||
const title = o.title || 'No title available';
|
||||
const link = o.id;
|
||||
const id = buildHash(o.id.substring(o.id.lastIndexOf('/') + 1, o.id.length), price);
|
||||
return Object.assign(o, { id, address, price, size, title, link });
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import utils from '../utils.js';
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
let appliedBlackList = [];
|
||||
function nullOrEmpty(val) {
|
||||
return val == null || val.length === 0;
|
||||
@@ -7,7 +7,8 @@ function normalize(o) {
|
||||
const title = nullOrEmpty(o.title) ? 'NO TITLE FOUND' : o.title.replace('NEU', '');
|
||||
const address = nullOrEmpty(o.address) ? 'NO ADDRESS FOUND' : (o.address || '').replace(/\(.*\),.*$/, '').trim();
|
||||
const link = nullOrEmpty(o.link) ? 'NO LINK' : `https://www.immobilienscout24.de${o.link.substring(o.link.indexOf('/expose'))}`;
|
||||
return Object.assign(o, { title, address, link });
|
||||
const id = buildHash(o.id, o.price);
|
||||
return Object.assign(o, { id, title, address, link });
|
||||
}
|
||||
function applyBlacklist(o) {
|
||||
return !utils.isOneOf(o.title, appliedBlackList);
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
import utils from '../utils.js';
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
const id = o.id.substring(o.id.indexOf('-') + 1, o.id.length);
|
||||
const size = o.size || 'N/A m²';
|
||||
const price = (o.price || '--- €').replace('Preis auf Anfrage', '--- €');
|
||||
const title = o.title || 'No title available';
|
||||
const link = `https://immo.swp.de/immobilien/${id}`;
|
||||
const immoId = o.id.substring(o.id.indexOf('-') + 1, o.id.length);
|
||||
const link = `https://immo.swp.de/immobilien/${immoId}`;
|
||||
const description = o.description;
|
||||
const id = buildHash(immoId, price);
|
||||
return Object.assign(o, {id, price, size, title, link, description});
|
||||
}
|
||||
|
||||
|
||||
@@ -1,37 +1,42 @@
|
||||
import utils from '../utils.js';
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
return o;
|
||||
const id = buildHash(o.id, o.price);
|
||||
return Object.assign(o, {id});
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
return titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: "div[class^='EstateItem-']",
|
||||
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
|
||||
crawlFields: {
|
||||
id: 'a@id',
|
||||
price: "div[class^='KeyFacts-'] [data-test='price'] | removeNewline | trim",
|
||||
size: "div[class^='KeyFacts-'] [data-test='area'] | removeNewline | trim",
|
||||
title: "div[class^='FactsMain-'] h2",
|
||||
link: 'a@href',
|
||||
address: "div[class^='estateFacts-'] span | removeNewline | trim",
|
||||
},
|
||||
paginate: '#pnlPaging #nlbPlus@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
url: null,
|
||||
crawlContainer: 'div[data-testid="serp-card-testid"]',
|
||||
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
|
||||
crawlFields: {
|
||||
id: 'a@id',
|
||||
price: 'div[data-testid="cardmfe-price-testid"] | removeNewline | trim',
|
||||
size: 'div[data-testid="cardmfe-keyfacts-testid"] | removeNewline | trim',
|
||||
title: '.css-1cbj9xw',
|
||||
link: 'a@href',
|
||||
address: 'div[data-testid="cardmfe-description-box-address"] | removeNewline | trim',
|
||||
},
|
||||
paginate: '#pnlPaging #nlbPlus@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
};
|
||||
export const metaInformation = {
|
||||
name: 'Immowelt',
|
||||
baseUrl: 'https://www.immowelt.de/',
|
||||
id: 'immowelt',
|
||||
name: 'Immowelt',
|
||||
baseUrl: 'https://www.immowelt.de/',
|
||||
id: 'immowelt',
|
||||
};
|
||||
export { config };
|
||||
export {config};
|
||||
|
||||
@@ -1,44 +1,49 @@
|
||||
import utils from '../utils.js';
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
let appliedBlacklistedDistricts = [];
|
||||
|
||||
function normalize(o) {
|
||||
const size = o.size || '--- m²';
|
||||
return Object.assign(o, { size });
|
||||
const size = o.size || '--- m²';
|
||||
const id = buildHash(o.id, o.price);
|
||||
return Object.assign(o, {id, size});
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const isBlacklistedDistrict =
|
||||
appliedBlacklistedDistricts.length === 0 ? false : utils.isOneOf(o.description, appliedBlacklistedDistricts);
|
||||
return !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
const isBlacklistedDistrict =
|
||||
appliedBlacklistedDistricts.length === 0 ? false : utils.isOneOf(o.description, appliedBlacklistedDistricts);
|
||||
return !isBlacklistedDistrict && titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#srchrslt-adtable .ad-listitem ',
|
||||
//sort by date is standard oO
|
||||
sortByDateParam: null,
|
||||
crawlFields: {
|
||||
id: '.aditem@data-adid | int',
|
||||
price: '.aditem-main--middle--price-shipping--price | removeNewline | trim',
|
||||
size: '.aditem-main .text-module-end span:nth-child(2) | removeNewline | trim',
|
||||
title: '.aditem-main .text-module-begin a | removeNewline | trim',
|
||||
link: '.aditem-main .text-module-begin a@href | removeNewline | trim',
|
||||
description: '.aditem-main p:not(.text-module-end) | removeNewline | trim',
|
||||
address: '.aditem-main--top--left | trim | removeNewline',
|
||||
},
|
||||
paginate: '#srchrslt-pagination .pagination-next@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
url: null,
|
||||
crawlContainer: '#srchrslt-adtable .ad-listitem ',
|
||||
//sort by date is standard oO
|
||||
sortByDateParam: null,
|
||||
crawlFields: {
|
||||
id: '.aditem@data-adid | int',
|
||||
price: '.aditem-main--middle--price-shipping--price | removeNewline | trim',
|
||||
size: '.aditem-main .text-module-end span:nth-child(2) | removeNewline | trim',
|
||||
title: '.aditem-main .text-module-begin a | removeNewline | trim',
|
||||
link: '.aditem-main .text-module-begin a@href | removeNewline | trim',
|
||||
description: '.aditem-main p:not(.text-module-end) | removeNewline | trim',
|
||||
address: '.aditem-main--top--left | trim | removeNewline',
|
||||
},
|
||||
paginate: '#srchrslt-pagination .pagination-next@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
};
|
||||
export const metaInformation = {
|
||||
name: 'Ebay Kleinanzeigen',
|
||||
baseUrl: 'https://www.kleinanzeigen.de/',
|
||||
id: 'kleinanzeigen',
|
||||
name: 'Ebay Kleinanzeigen',
|
||||
baseUrl: 'https://www.kleinanzeigen.de/',
|
||||
id: 'kleinanzeigen',
|
||||
};
|
||||
export const init = (sourceConfig, blacklist, blacklistedDistricts) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlacklistedDistricts = blacklistedDistricts || [];
|
||||
appliedBlackList = blacklist || [];
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlacklistedDistricts = blacklistedDistricts || [];
|
||||
appliedBlackList = blacklist || [];
|
||||
};
|
||||
export { config };
|
||||
export {config};
|
||||
|
||||
@@ -1,38 +1,44 @@
|
||||
import utils from '../utils.js';
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
function nullOrEmpty(val) {
|
||||
return val == null || val.length === 0;
|
||||
}
|
||||
|
||||
function normalize(o) {
|
||||
const link = nullOrEmpty(o.link) ? 'NO LINK' : `https://www.neubaukompass.de${o.link.substring(o.link.indexOf('/neubau'))}`;
|
||||
return {...o, link};
|
||||
const id = buildHash(o.id, o.price);
|
||||
return Object.assign(o, {id, link});
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
return !utils.isOneOf(o.title, appliedBlackList);
|
||||
return !utils.isOneOf(o.title, appliedBlackList);
|
||||
}
|
||||
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.nbk-container >div article',
|
||||
sortByDateParam: 'Sortierung=Id&Richtung=DESC',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
title: 'a.nbk-truncate@title | removeNewline | trim',
|
||||
link: 'a.nbk-truncate@href',
|
||||
address: 'p.nbk-truncate | removeNewline | trim',
|
||||
price: 'p.nbk-mb-0 | removeNewline | trim',
|
||||
},
|
||||
paginate: '.numbered-pager__bottom .numbered-pager--info li:nth-child(2) a@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
url: null,
|
||||
crawlContainer: '.nbk-container >div article',
|
||||
sortByDateParam: 'Sortierung=Id&Richtung=DESC',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
title: 'a.nbk-truncate@title | removeNewline | trim',
|
||||
link: 'a.nbk-truncate@href',
|
||||
address: 'p.nbk-truncate | removeNewline | trim',
|
||||
price: 'p.nbk-mb-0 | removeNewline | trim',
|
||||
},
|
||||
paginate: '.numbered-pager__bottom .numbered-pager--info li:nth-child(2) a@href',
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
};
|
||||
export const metaInformation = {
|
||||
name: 'Neubau Kompass',
|
||||
baseUrl: 'https://www.neubaukompass.de/',
|
||||
id: 'neubauKompass',
|
||||
name: 'Neubau Kompass',
|
||||
baseUrl: 'https://www.neubaukompass.de/',
|
||||
id: 'neubauKompass',
|
||||
};
|
||||
export { config };
|
||||
export {config};
|
||||
|
||||
@@ -1,36 +1,41 @@
|
||||
import utils from '../utils.js';
|
||||
import utils, {buildHash} from '../utils.js';
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
return o;
|
||||
const id = buildHash(o.id, o.price);
|
||||
return Object.assign(o, {id});
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
return o.id != null && titleNotBlacklisted && descNotBlacklisted;
|
||||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList);
|
||||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList);
|
||||
return o.id != null && titleNotBlacklisted && descNotBlacklisted;
|
||||
}
|
||||
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#main_column .wgg_card',
|
||||
sortByDateParam: 'sort_column=0&sort_order=0',
|
||||
crawlFields: {
|
||||
id: '@data-id',
|
||||
details: '.row .noprint .col-xs-11 |removeNewline |trim',
|
||||
price: '.middle .col-xs-3 |removeNewline |trim',
|
||||
size: '.middle .text-right |removeNewline |trim',
|
||||
title: '.truncate_title a |removeNewline |trim',
|
||||
link: '.truncate_title a@href',
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
url: null,
|
||||
crawlContainer: '#main_column .wgg_card',
|
||||
sortByDateParam: 'sort_column=0&sort_order=0',
|
||||
crawlFields: {
|
||||
id: '@data-id',
|
||||
details: '.row .noprint .col-xs-11 |removeNewline |trim',
|
||||
price: '.middle .col-xs-3 |removeNewline |trim',
|
||||
size: '.middle .text-right |removeNewline |trim',
|
||||
title: '.truncate_title a |removeNewline |trim',
|
||||
link: '.truncate_title a@href',
|
||||
},
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
};
|
||||
export const init = (sourceConfig, blacklist) => {
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
config.enabled = sourceConfig.enabled;
|
||||
config.url = sourceConfig.url;
|
||||
appliedBlackList = blacklist || [];
|
||||
};
|
||||
export const metaInformation = {
|
||||
name: 'Wg gesucht',
|
||||
baseUrl: 'https://www.wg-gesucht.de/',
|
||||
id: 'wgGesucht',
|
||||
name: 'Wg gesucht',
|
||||
baseUrl: 'https://www.wg-gesucht.de/',
|
||||
id: 'wgGesucht',
|
||||
};
|
||||
export { config };
|
||||
export {config};
|
||||
|
||||
Reference in New Issue
Block a user