mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
next release version
This commit is contained in:
@@ -261,7 +261,7 @@ class FredyPipelineExecutioner {
|
|||||||
* @returns {ParsedListing[]} Filtered listings that pass validation and provider filter.
|
* @returns {ParsedListing[]} Filtered listings that pass validation and provider filter.
|
||||||
*/
|
*/
|
||||||
_filter(listings) {
|
_filter(listings) {
|
||||||
const requiredKeys = this._providerConfig.fieldNames;
|
const requiredKeys = this._providerConfig.requiredFieldNames;
|
||||||
const requireValues = ['id', 'link', 'title'];
|
const requireValues = ['id', 'link', 'title'];
|
||||||
|
|
||||||
const filteredListings = listings
|
const filteredListings = listings
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ function applyBlacklist(o) {
|
|||||||
|
|
||||||
/** @type {ProviderConfig} */
|
/** @type {ProviderConfig} */
|
||||||
const config = {
|
const config = {
|
||||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: '.tabelle',
|
crawlContainer: '.tabelle',
|
||||||
sortByDateParam: 'sort_type=newest',
|
sortByDateParam: 'sort_type=newest',
|
||||||
|
|||||||
@@ -105,7 +105,7 @@ function applyBlacklist(o) {
|
|||||||
|
|
||||||
/** @type {ProviderConfig} */
|
/** @type {ProviderConfig} */
|
||||||
const config = {
|
const config = {
|
||||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: 'a.lr-card',
|
crawlContainer: 'a.lr-card',
|
||||||
sortByDateParam: 'sort_col=*created_ts&sort_dir=desc',
|
sortByDateParam: 'sort_col=*created_ts&sort_dir=desc',
|
||||||
|
|||||||
@@ -202,7 +202,7 @@ function applyBlacklist(o) {
|
|||||||
}
|
}
|
||||||
/** @type {ProviderConfig} */
|
/** @type {ProviderConfig} */
|
||||||
const config = {
|
const config = {
|
||||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||||
url: null,
|
url: null,
|
||||||
crawlFields: {
|
crawlFields: {
|
||||||
id: 'id',
|
id: 'id',
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ function applyBlacklist(o) {
|
|||||||
|
|
||||||
/** @type {ProviderConfig} */
|
/** @type {ProviderConfig} */
|
||||||
const config = {
|
const config = {
|
||||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: '.js-serp-item',
|
crawlContainer: '.js-serp-item',
|
||||||
sortByDateParam: 's=most_recently_updated_first',
|
sortByDateParam: 's=most_recently_updated_first',
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ function applyBlacklist(o) {
|
|||||||
|
|
||||||
/** @type {ProviderConfig} */
|
/** @type {ProviderConfig} */
|
||||||
const config = {
|
const config = {
|
||||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer:
|
crawlContainer:
|
||||||
'div[data-testid="serp-core-scrollablelistview-testid"]:not(div[data-testid="serp-enlargementlist-testid"] div[data-testid="serp-card-testid"]) div[data-testid="serp-core-classified-card-testid"]',
|
'div[data-testid="serp-core-scrollablelistview-testid"]:not(div[data-testid="serp-enlargementlist-testid"] div[data-testid="serp-card-testid"]) div[data-testid="serp-core-classified-card-testid"]',
|
||||||
|
|||||||
@@ -186,7 +186,7 @@ function applyBlacklist(o) {
|
|||||||
|
|
||||||
/** @type {ProviderConfig} */
|
/** @type {ProviderConfig} */
|
||||||
const config = {
|
const config = {
|
||||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: '#srchrslt-adtable .ad-listitem ',
|
crawlContainer: '#srchrslt-adtable .ad-listitem ',
|
||||||
//sort by date is standard oO
|
//sort by date is standard oO
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ function applyBlacklist(o) {
|
|||||||
}
|
}
|
||||||
/** @type {ProviderConfig} */
|
/** @type {ProviderConfig} */
|
||||||
const config = {
|
const config = {
|
||||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: 'article[data-testid="propertyCard"]',
|
crawlContainer: 'article[data-testid="propertyCard"]',
|
||||||
sortByDateParam: 'sortBy=DATE&sortOn=DESC',
|
sortByDateParam: 'sortBy=DATE&sortOn=DESC',
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ function applyBlacklist(o) {
|
|||||||
|
|
||||||
/** @type {ProviderConfig} */
|
/** @type {ProviderConfig} */
|
||||||
const config = {
|
const config = {
|
||||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: '.col-12.mb-4',
|
crawlContainer: '.col-12.mb-4',
|
||||||
sortByDateParam: 'Sortierung=Id&Richtung=DESC',
|
sortByDateParam: 'Sortierung=Id&Richtung=DESC',
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ function applyBlacklist(o) {
|
|||||||
}
|
}
|
||||||
/** @type {ProviderConfig} */
|
/** @type {ProviderConfig} */
|
||||||
const config = {
|
const config = {
|
||||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: 'div[data-livecomponent-id*="search/property_list"] .grid > div',
|
crawlContainer: 'div[data-livecomponent-id*="search/property_list"] .grid > div',
|
||||||
sortByDateParam: null,
|
sortByDateParam: null,
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ function applyBlacklist(o) {
|
|||||||
}
|
}
|
||||||
/** @type {ProviderConfig} */
|
/** @type {ProviderConfig} */
|
||||||
const config = {
|
const config = {
|
||||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: '.listentry-content',
|
crawlContainer: '.listentry-content',
|
||||||
sortByDateParam: null, // sort by date is standard
|
sortByDateParam: null, // sort by date is standard
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ function applyBlacklist(o) {
|
|||||||
}
|
}
|
||||||
/** @type {ProviderConfig} */
|
/** @type {ProviderConfig} */
|
||||||
const config = {
|
const config = {
|
||||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||||
url: null,
|
url: null,
|
||||||
crawlContainer: 'div[data-testid="estate-link"]',
|
crawlContainer: 'div[data-testid="estate-link"]',
|
||||||
sortByDateParam: 'sortBy=date_desc',
|
sortByDateParam: 'sortBy=date_desc',
|
||||||
|
|||||||
@@ -84,7 +84,7 @@ const config = {
|
|||||||
image: '.img-responsive@src',
|
image: '.img-responsive@src',
|
||||||
description: '.row .noprint .col-xs-11 |removeNewline |trim',
|
description: '.row .noprint .col-xs-11 |removeNewline |trim',
|
||||||
},
|
},
|
||||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||||
normalize: normalize,
|
normalize: normalize,
|
||||||
filter: applyBlacklist,
|
filter: applyBlacklist,
|
||||||
fetchDetails,
|
fetchDetails,
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ function applyBlacklist(o) {
|
|||||||
|
|
||||||
/** @type {ProviderConfig} */
|
/** @type {ProviderConfig} */
|
||||||
const config = {
|
const config = {
|
||||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||||
url: null,
|
url: null,
|
||||||
sortByDateParam: null,
|
sortByDateParam: null,
|
||||||
waitForSelector: 'body',
|
waitForSelector: 'body',
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
* @property {string} [sortByDateParam] Query parameter used to enforce sorting by date.
|
* @property {string} [sortByDateParam] Query parameter used to enforce sorting by date.
|
||||||
* @property {string} [waitForSelector] CSS selector to wait for before parsing content.
|
* @property {string} [waitForSelector] CSS selector to wait for before parsing content.
|
||||||
* @property {Object.<string, string>} crawlFields Mapping of field names to selectors/paths.
|
* @property {Object.<string, string>} crawlFields Mapping of field names to selectors/paths.
|
||||||
* @property {string[]} fieldNames List of field names that this provider supports.
|
* @property {string[]} requiredFieldNames List of field names that this provider supports.
|
||||||
* @property {string} [crawlContainer] CSS selector for the container holding listing items.
|
* @property {string} [crawlContainer] CSS selector for the container holding listing items.
|
||||||
* @property {(raw: any) => ParsedListing} normalize Function to convert raw scraped data into a ParsedListing shape.
|
* @property {(raw: any) => ParsedListing} normalize Function to convert raw scraped data into a ParsedListing shape.
|
||||||
* @property {(listing: ParsedListing) => boolean} filter Function to filter out unwanted listings.
|
* @property {(listing: ParsedListing) => boolean} filter Function to filter out unwanted listings.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "fredy",
|
"name": "fredy",
|
||||||
"version": "20.2.1",
|
"version": "20.3.0",
|
||||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"prepare": "husky",
|
"prepare": "husky",
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ describe('Issue reproduction: listings filtered by similarity or area should be
|
|||||||
normalize: (l) => l,
|
normalize: (l) => l,
|
||||||
filter: () => true,
|
filter: () => true,
|
||||||
crawlFields: { id: 'id', title: 'title', address: 'address', price: 'price' },
|
crawlFields: { id: 'id', title: 'title', address: 'address', price: 'price' },
|
||||||
fieldNames: ['id', 'title', 'address', 'price'],
|
requiredFieldNames: ['id', 'title', 'address', 'price'],
|
||||||
};
|
};
|
||||||
|
|
||||||
const mockedJob = {
|
const mockedJob = {
|
||||||
@@ -97,7 +97,7 @@ describe('Issue reproduction: listings filtered by similarity or area should be
|
|||||||
normalize: (l) => l,
|
normalize: (l) => l,
|
||||||
filter: () => true,
|
filter: () => true,
|
||||||
crawlFields: { id: 'id', title: 'title', address: 'address', price: 'price' },
|
crawlFields: { id: 'id', title: 'title', address: 'address', price: 'price' },
|
||||||
fieldNames: ['id', 'title', 'address', 'price'],
|
requiredFieldNames: ['id', 'title', 'address', 'price'],
|
||||||
};
|
};
|
||||||
|
|
||||||
const fredy = new Fredy(providerConfig, mockedJob, 'test-provider', mockSimilarityCache, undefined);
|
const fredy = new Fredy(providerConfig, mockedJob, 'test-provider', mockSimilarityCache, undefined);
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ export default mergeConfig(
|
|||||||
'test/provider/immonet.test.js',
|
'test/provider/immonet.test.js',
|
||||||
'test/provider/immobilienDe.test.js',
|
'test/provider/immobilienDe.test.js',
|
||||||
'test/provider/immowelt.test.js',
|
'test/provider/immowelt.test.js',
|
||||||
|
'test/provider/sparkasse.test.js',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
|
|||||||
Reference in New Issue
Block a user