mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
next release version
This commit is contained in:
@@ -261,7 +261,7 @@ class FredyPipelineExecutioner {
|
||||
* @returns {ParsedListing[]} Filtered listings that pass validation and provider filter.
|
||||
*/
|
||||
_filter(listings) {
|
||||
const requiredKeys = this._providerConfig.fieldNames;
|
||||
const requiredKeys = this._providerConfig.requiredFieldNames;
|
||||
const requireValues = ['id', 'link', 'title'];
|
||||
|
||||
const filteredListings = listings
|
||||
|
||||
@@ -64,7 +64,7 @@ function applyBlacklist(o) {
|
||||
|
||||
/** @type {ProviderConfig} */
|
||||
const config = {
|
||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
url: null,
|
||||
crawlContainer: '.tabelle',
|
||||
sortByDateParam: 'sort_type=newest',
|
||||
|
||||
@@ -105,7 +105,7 @@ function applyBlacklist(o) {
|
||||
|
||||
/** @type {ProviderConfig} */
|
||||
const config = {
|
||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
url: null,
|
||||
crawlContainer: 'a.lr-card',
|
||||
sortByDateParam: 'sort_col=*created_ts&sort_dir=desc',
|
||||
|
||||
@@ -202,7 +202,7 @@ function applyBlacklist(o) {
|
||||
}
|
||||
/** @type {ProviderConfig} */
|
||||
const config = {
|
||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
url: null,
|
||||
crawlFields: {
|
||||
id: 'id',
|
||||
|
||||
@@ -44,7 +44,7 @@ function applyBlacklist(o) {
|
||||
|
||||
/** @type {ProviderConfig} */
|
||||
const config = {
|
||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
url: null,
|
||||
crawlContainer: '.js-serp-item',
|
||||
sortByDateParam: 's=most_recently_updated_first',
|
||||
|
||||
@@ -82,7 +82,7 @@ function applyBlacklist(o) {
|
||||
|
||||
/** @type {ProviderConfig} */
|
||||
const config = {
|
||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
url: null,
|
||||
crawlContainer:
|
||||
'div[data-testid="serp-core-scrollablelistview-testid"]:not(div[data-testid="serp-enlargementlist-testid"] div[data-testid="serp-card-testid"]) div[data-testid="serp-core-classified-card-testid"]',
|
||||
|
||||
@@ -186,7 +186,7 @@ function applyBlacklist(o) {
|
||||
|
||||
/** @type {ProviderConfig} */
|
||||
const config = {
|
||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
url: null,
|
||||
crawlContainer: '#srchrslt-adtable .ad-listitem ',
|
||||
//sort by date is standard oO
|
||||
|
||||
@@ -44,7 +44,7 @@ function applyBlacklist(o) {
|
||||
}
|
||||
/** @type {ProviderConfig} */
|
||||
const config = {
|
||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
url: null,
|
||||
crawlContainer: 'article[data-testid="propertyCard"]',
|
||||
sortByDateParam: 'sortBy=DATE&sortOn=DESC',
|
||||
|
||||
@@ -47,7 +47,7 @@ function applyBlacklist(o) {
|
||||
|
||||
/** @type {ProviderConfig} */
|
||||
const config = {
|
||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
url: null,
|
||||
crawlContainer: '.col-12.mb-4',
|
||||
sortByDateParam: 'Sortierung=Id&Richtung=DESC',
|
||||
|
||||
@@ -41,7 +41,7 @@ function applyBlacklist(o) {
|
||||
}
|
||||
/** @type {ProviderConfig} */
|
||||
const config = {
|
||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
url: null,
|
||||
crawlContainer: 'div[data-livecomponent-id*="search/property_list"] .grid > div',
|
||||
sortByDateParam: null,
|
||||
|
||||
@@ -45,7 +45,7 @@ function applyBlacklist(o) {
|
||||
}
|
||||
/** @type {ProviderConfig} */
|
||||
const config = {
|
||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
url: null,
|
||||
crawlContainer: '.listentry-content',
|
||||
sortByDateParam: null, // sort by date is standard
|
||||
|
||||
@@ -90,7 +90,7 @@ function applyBlacklist(o) {
|
||||
}
|
||||
/** @type {ProviderConfig} */
|
||||
const config = {
|
||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
url: null,
|
||||
crawlContainer: 'div[data-testid="estate-link"]',
|
||||
sortByDateParam: 'sortBy=date_desc',
|
||||
|
||||
@@ -84,7 +84,7 @@ const config = {
|
||||
image: '.img-responsive@src',
|
||||
description: '.row .noprint .col-xs-11 |removeNewline |trim',
|
||||
},
|
||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
normalize: normalize,
|
||||
filter: applyBlacklist,
|
||||
fetchDetails,
|
||||
|
||||
@@ -43,7 +43,7 @@ function applyBlacklist(o) {
|
||||
|
||||
/** @type {ProviderConfig} */
|
||||
const config = {
|
||||
fieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
requiredFieldNames: ['id', 'link', 'title', 'price', 'size', 'rooms', 'address', 'image', 'description'],
|
||||
url: null,
|
||||
sortByDateParam: null,
|
||||
waitForSelector: 'body',
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
* @property {string} [sortByDateParam] Query parameter used to enforce sorting by date.
|
||||
* @property {string} [waitForSelector] CSS selector to wait for before parsing content.
|
||||
* @property {Object.<string, string>} crawlFields Mapping of field names to selectors/paths.
|
||||
* @property {string[]} fieldNames List of field names that this provider supports.
|
||||
* @property {string[]} requiredFieldNames List of field names that this provider supports.
|
||||
* @property {string} [crawlContainer] CSS selector for the container holding listing items.
|
||||
* @property {(raw: any) => ParsedListing} normalize Function to convert raw scraped data into a ParsedListing shape.
|
||||
* @property {(listing: ParsedListing) => boolean} filter Function to filter out unwanted listings.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "fredy",
|
||||
"version": "20.2.1",
|
||||
"version": "20.3.0",
|
||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||
"scripts": {
|
||||
"prepare": "husky",
|
||||
|
||||
@@ -22,7 +22,7 @@ describe('Issue reproduction: listings filtered by similarity or area should be
|
||||
normalize: (l) => l,
|
||||
filter: () => true,
|
||||
crawlFields: { id: 'id', title: 'title', address: 'address', price: 'price' },
|
||||
fieldNames: ['id', 'title', 'address', 'price'],
|
||||
requiredFieldNames: ['id', 'title', 'address', 'price'],
|
||||
};
|
||||
|
||||
const mockedJob = {
|
||||
@@ -97,7 +97,7 @@ describe('Issue reproduction: listings filtered by similarity or area should be
|
||||
normalize: (l) => l,
|
||||
filter: () => true,
|
||||
crawlFields: { id: 'id', title: 'title', address: 'address', price: 'price' },
|
||||
fieldNames: ['id', 'title', 'address', 'price'],
|
||||
requiredFieldNames: ['id', 'title', 'address', 'price'],
|
||||
};
|
||||
|
||||
const fredy = new Fredy(providerConfig, mockedJob, 'test-provider', mockSimilarityCache, undefined);
|
||||
|
||||
@@ -15,6 +15,7 @@ export default mergeConfig(
|
||||
'test/provider/immonet.test.js',
|
||||
'test/provider/immobilienDe.test.js',
|
||||
'test/provider/immowelt.test.js',
|
||||
'test/provider/sparkasse.test.js',
|
||||
],
|
||||
},
|
||||
}),
|
||||
|
||||
Reference in New Issue
Block a user