mirror of
https://github.com/orangecoding/fredy.git
synced 2026-06-16 12:31:07 +00:00
Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c831057fba | ||
|
|
355f3bfc76 | ||
|
|
949abcaf09 | ||
|
|
24e925ae0d | ||
|
|
2764b2b776 | ||
|
|
0de6d3df04 | ||
|
|
45a18529ba | ||
|
|
84c8aa6d72 | ||
|
|
0f7a07e7fb | ||
|
|
7a65b788b9 | ||
|
|
ec7689f73f | ||
|
|
5a13e6a0a0 | ||
|
|
7bd36e554c | ||
|
|
d7e3dfc05e | ||
|
|
c1c4d55ede | ||
|
|
aad0884976 | ||
|
|
c0ae72424b | ||
|
|
a3aa512db3 | ||
|
|
8361d9c8ff | ||
|
|
ad7415f4f5 | ||
|
|
c97b323b35 | ||
|
|
ec986e4b18 |
@@ -1,3 +1,7 @@
|
||||
node_modules
|
||||
node_modules/
|
||||
npm-debug.log
|
||||
test
|
||||
test/
|
||||
conf/
|
||||
db/
|
||||
.git/
|
||||
.github/
|
||||
|
||||
24
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
24
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a report to help us improve
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behavior:
|
||||
1. Go to '...'
|
||||
2. Click on '....'
|
||||
3. Scroll down to '....'
|
||||
4. See error
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
**Screenshots**
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
20
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
||||
|
||||
**Describe the solution you'd like**
|
||||
A clear and concise description of what you want to happen.
|
||||
|
||||
**Describe alternatives you've considered**
|
||||
A clear and concise description of any alternative solutions or features you've considered.
|
||||
|
||||
**Additional context**
|
||||
Add any other context or screenshots about the feature request here.
|
||||
46
.github/workflows/docker.yml
vendored
Normal file
46
.github/workflows/docker.yml
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
name: Create and publish Docker image
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- 'master'
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
env:
|
||||
REGISTRY: ghcr.io
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
|
||||
jobs:
|
||||
build-and-push-image:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v3
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
21
.github/workflows/test.yml
vendored
Normal file
21
.github/workflows/test.yml
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
name: Test
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
jobs:
|
||||
test:
|
||||
name: Test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v2.5.1
|
||||
with:
|
||||
node-version: 16
|
||||
cache: 'yarn'
|
||||
- run: yarn install
|
||||
- run: yarn run test
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -3,3 +3,4 @@ ui/public/
|
||||
db/
|
||||
npm-debug.log
|
||||
.DS_Store
|
||||
.idea
|
||||
|
||||
15
CHANGELOG.md
15
CHANGELOG.md
@@ -1,3 +1,18 @@
|
||||
###### [V5.4.0]
|
||||
- Upgrading dependencies
|
||||
- Provider urls are now automagically been changed to include the correct sort order for search results
|
||||
|
||||
```
|
||||
Note: It has been an point of confusion since the very beginning of Fredy, that people simply copied the url, but
|
||||
did not take care of sorting the search results by date. If this is not done, Fredy will most likely not see the latest
|
||||
results, thus cannot report them. This release fixes it by adding the necessary params (or replaces them).
|
||||
```
|
||||
|
||||
###### [V5.3.0]
|
||||
- Upgrading dependencies
|
||||
- It's now possible to send mails to multiple receiver using comma separation for MailJet & Sendgrid
|
||||
- Fixing Immowelt scraping
|
||||
|
||||
###### [V5.2.0]
|
||||
- Upgrading dependencies
|
||||
- Adding new similarity check layer (Duplicates are being removed now)
|
||||
|
||||
18
Dockerfile
Normal file
18
Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
||||
# syntax=docker/dockerfile:1.3
|
||||
FROM node:16-alpine AS builder
|
||||
COPY --chown=1000:1000 . /fredy
|
||||
WORKDIR /fredy
|
||||
USER 1000
|
||||
RUN yarn install
|
||||
RUN yarn run prod
|
||||
|
||||
FROM node:16-alpine
|
||||
COPY --from=builder --chown=1000:1000 /fredy /fredy
|
||||
RUN mkdir /db /conf && \
|
||||
chown 1000:1000 /db /conf && \
|
||||
ln -s /db /fredy/db && ln -s /conf /fredy/conf
|
||||
EXPOSE 9998
|
||||
USER 1000
|
||||
VOLUME [ "/conf", "/db" ]
|
||||
WORKDIR /fredy
|
||||
CMD node index.js --no-daemon
|
||||
74
README.md
74
README.md
@@ -1,23 +1,23 @@
|
||||
<img src="https://github.com/orangecoding/fredy/blob/master/doc/logo.png" width="400">
|
||||
|
||||
[](https://travis-ci.org/orangecoding/fredy)
|
||||

|
||||
|
||||
Searching an apartment in Germany can be quite frustrating. Not any longer as Fredy will take over and only notifies you once new listings have been found that matches your requirements.
|
||||
Searching an apartment in Germany can be a frustrating task. Not any longer though, as _Fredy_ will take over and will only notify you once new listings have been found that match your requirements.
|
||||
|
||||
_Fredy_ scrapes multiple services (Immonet, Immowelt etc.) and send new listings to you once they appear. The list of available services can easily be extended. For your convenience, a ui helps you to configure your search jobs.
|
||||
_Fredy_ scrapes multiple services (Immonet, Immowelt etc.) and send new listings to you once they become available. The list of available services can easily be extended. For your convenience, _Fredy_ has a UI to help you configure your search jobs.
|
||||
|
||||
If _Fredy_ found matching results, it will send them to you via Slack, Email, Telegram etc. (More adapter possible.) As _Fredy_ will store the listings it has found, new results will not be sent twice (and as a side-effect, _Fredy_ can show some statistics..). Furthermore, _Fredy_ checks duplicates per scraping so that the same listings are not being sent when posted on various platforms. (Happens more often than one might think)
|
||||
If _Fredy_ finds matching results, it will send them to you via Slack, Email, Telegram etc. (More adapters can be configured.) As _Fredy_ stores the listings it has found, new results will not be sent to you twice (and as a side-effect, _Fredy_ can show some statistics). Furthermore, _Fredy_ checks duplicates per scraping so that the same listings are not being sent twice or more when posted on various platforms (which happens more often than one might think).
|
||||
|
||||
## Usage
|
||||
|
||||
- Make sure to use NodeJs 12 and above
|
||||
- Run the following commands
|
||||
- Make sure to use Node.js 12 or above
|
||||
- Run the following commands:
|
||||
```ssh
|
||||
yarn (or npm install)
|
||||
yarn run prod
|
||||
yarn run start
|
||||
```
|
||||
_Fredy_ will start with the default port, set to `9998`. You can access _Fredy_ by opening a browser `http://localhost:9998`. The default login is `admin` both for username and password. (You should change the password asap when you plan to run Fredy on your server.)
|
||||
_Fredy_ will start with the default port, set to `9998`. You can access _Fredy_ by opening your browser at `http://localhost:9998`. The default login is `admin`, both for username and password. You should change the password as soon as possible when you plan to run Fredy on a server.
|
||||
|
||||
<p align="center">
|
||||
<img alt="Job Configuration" src="https://github.com/orangecoding/fredy/blob/master/doc/screenshot__1.png" width="30%">
|
||||
@@ -30,48 +30,38 @@ _Fredy_ will start with the default port, set to `9998`. You can access _Fredy_
|
||||
|
||||
</p>
|
||||
|
||||
## Immoscout
|
||||
I have added **experimental** support for Immoscout. Immoscout is somewhat special, coz they have decided to secure their service from bots using Re-Capture. Finding a way around this is barely possible. For _Fredy_ to be able to bypass the check, I'm using a service called [ScrapingAnt](https://scrapingant.com/). The trick is to use a headless browser, rotating proxies and (once successful validated) re-send the cookies each time.
|
||||
|
||||
To be able to use Immoscout, you need to create an account at ScrapingAnt. Configure the ApiKey in the "General Settings" tab (visible when logged in as administrator).
|
||||
The rest should be done by _Fredy_. Keep in mind, the support is experimental. There might be bugs and you might not always get pass the re-capture check, but most of the time it works pretty good :)
|
||||
|
||||
If you need more that the 1000 api calls you can do per month, I'd suggest opting for a paid account... ScrapingAnt loves OpenSource, therefor they've decided to give all _Fredy_ users a 10% discount by using the code **FREDY10** (No I don't get any money for recommending good services...)
|
||||
|
||||
|
||||
## Understanding the fundamentals
|
||||
There are 3 important parts in Fredy, that you need to understand leveraging the full power of _Fredy_.
|
||||
There are 3 important parts in Fredy, that you need to understand to leverage the full power of _Fredy_.
|
||||
|
||||
#### Adapter
|
||||
_Fredy_ supports multiple services. Immonet, Immowelt and Ebay are just a few. Those services are called adapter within _Fredy_. When creating a new job, you can choose 1 or many adapter.
|
||||
An adapter holds the url that points to the search results for the service. If you go to immonet.de and search for something, the shown url in the browser is what the adapter needs to do it's magic.
|
||||
**It is important that you order the search results by date, so that _Fredy_ always picks the latest ones first**
|
||||
_Fredy_ supports multiple services. Immonet, Immowelt and Ebay are just a few examples. Those services are called adapters within _Fredy_. When creating a new job, you can choose one or more adapters.
|
||||
An adapter contains the URL that points to the search results for the respective service. If you go to immonet.de and search for something, the displayed URL in the browser is what the adapter needs to do its magic.
|
||||
**It is important that you order the search results by date, so that _Fredy_ always picks the latest results first!**
|
||||
|
||||
#### Provider
|
||||
_Fredy_ supports multiple provider. Slack, SendGrid, Telegram etc. A search job can have as many provider as supported by _Fredy_. Each provider needs different configuration values, which you have to provide when using it. A provider itself dictactes how the frontend renders by telling the frontend what information it needs in order to send listings to the user.
|
||||
_Fredy_ supports multiple providers, such as Slack, SendGrid, Telegram etc. A search job can have as many providers as supported by _Fredy_. Each provider needs different configuration values, which you have to provide when using them. A provider dictactes how the frontend renders by telling the frontend what information it needs in order to send listings to the user.
|
||||
|
||||
#### Jobs
|
||||
A Job wraps adapter and provider. _Fredy_ runs the configured jobs in a specific interval (can be configured in `/conf/config.json`).
|
||||
A Job wraps adapters and providers. _Fredy_ runs the configured jobs in a specific interval (can be configured in `/conf/config.json`).
|
||||
|
||||
## Creating your first job
|
||||
To create your first job, click on the button "Create New Job" on the job table. The job creation dialog should be self explanatory, however there's one important thing.
|
||||
When configuring adapter, before copying the url from your browser make sure that you have sorted the results by date to make sure _Fredy_ always picks the latest results first.
|
||||
To create your first job, click on the button "Create New Job" on the job table. The job creation dialog should be self-explanatory, however there is one important thing.
|
||||
When configuring adapters, before copying the URL from your browser, make sure that you have sorted the results by date to make sure _Fredy_ always picks the latest results first.
|
||||
|
||||
## User management
|
||||
As an administrator, you can create/edit and remove user from _Fredy_. Be careful, each job is connected to the user that has created the job. If you remove the user, the jobs will also be removed.
|
||||
|
||||
As an administrator, you can create, edit and remove users from _Fredy_. Be careful, each job is connected to the user that has created the job. If you remove the user, their jobs will also be removed.
|
||||
|
||||
# Development
|
||||
|
||||
### Running Fredy in dev mode
|
||||
To run _Fredy_ in dev mode, you need to run the backend & frontend separately. Run the backend in your favorite IDE, the frontend can be started from the terminal.
|
||||
### Running Fredy in development mode
|
||||
To run _Fredy_ in development mode, you need to run the backend & frontend separately. Run the backend in your favorite IDE, the frontend can be started from the terminal.
|
||||
```shell
|
||||
yarn run dev
|
||||
```
|
||||
You should now be able to access _Fredy_ with your browser. Go to `http://localhost:9000`
|
||||
You should now be able to access _Fredy_ from your browser. Go to `http://localhost:9000`.
|
||||
|
||||
### Running Tests
|
||||
To run the tests, simply run
|
||||
To run the tests, run
|
||||
```shell
|
||||
yarn run test
|
||||
```
|
||||
@@ -79,22 +69,20 @@ yarn run test
|
||||
# Architecture
|
||||

|
||||
|
||||
## Immoscout
|
||||
I have added EXPERIMENTAL support for Immoscout. Immoscout is somewhat special, coz they have decided to secure their service from bots using Re-Capture. Finding a way
|
||||
around this is barely possible. For _Fredy_ to be able to bypass the check, I'm using a service called [ScrapingAnt](https://scrapingant.com/).
|
||||
## Immoscout
|
||||
I have added **experimental** support for Immoscout. Immoscout is somewhat special, because they have decided to secure their service from bots using Re-Capture. Finding a way around this is barely possible. For _Fredy_ to be able to bypass this check, I'm using a service called [ScrapingAnt](https://scrapingant.com/). The trick is to use a headless browser, rotating proxies and (once successfully validated) to re-send the cookies each time.
|
||||
|
||||
To be able to use Immoscout, you need to create an account and copy the apiKey into the config file under /conf/config.json.
|
||||
The rest should be done by _Fredy_. Keep in mind, the support is experimental. There might be bugs and you might not always get pass the re-capture check, but most of the time
|
||||
it works pretty good :)
|
||||
To be able to use Immoscout, you need to create an account at ScrapingAnt. Configure the API key in the "General Settings" tab (visible when logged in as administrator).
|
||||
The rest will be handled by _Fredy_. Keep in mind, the support is experimental. There might be bugs and you might not always pass the re-capture check, but most of the time it works rather well :)
|
||||
|
||||
If you need more that the 1000 api calls you can do per month, I'd suggest opting for a paid account... (No I don't get any money for recommending good service)
|
||||
If you need more than the 1000 API calls allowed per month, I'd suggest opting for a paid account... ScrapingAnt loves OpenSource, therefore they have decided to give all _Fredy_ users a 10% discount by using the code **FREDY10** (Disclaimer: I do not earn any money for recommending their service).
|
||||
|
||||
#### Contribution guidelines
|
||||
|
||||
See [Contribution](https://github.com/orangecoding/fredy/blob/master/CONTRIBUTION.md)
|
||||
See [Contributing](https://github.com/orangecoding/fredy/blob/master/CONTRIBUTING.md)
|
||||
|
||||
# Docker
|
||||
Use the Dockerfile in this Repository to build an image.
|
||||
Use the Dockerfile in this repository to build an image.
|
||||
|
||||
Example: `docker build -t fredy/fredy /path/to/your/Dockerfile`
|
||||
|
||||
@@ -102,12 +90,16 @@ Or use docker-compose:
|
||||
|
||||
Example `docker-compose build`
|
||||
|
||||
Or use the container that will be built automatically.
|
||||
|
||||
`docker pull ghcr.io/orangecoding/fredy:master`
|
||||
|
||||
## Create & run a container
|
||||
|
||||
Put your config.json to `/path/to/your/conf/`
|
||||
Put your config.json into a path of your choice, such as `/path/to/your/conf/`.
|
||||
|
||||
Example: `docker create --name fredy -v /path/to/your/conf/:/conf -p 9998:9998 fredy/fredy`
|
||||
|
||||
## Logs
|
||||
|
||||
You can browse the logs with `docker logs fredy -f`
|
||||
You can browse the logs with `docker logs fredy -f`.
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
FROM alpine:latest AS build
|
||||
# use given repository, default below:
|
||||
ARG repo=https://github.com/orangecoding/fredy.git
|
||||
|
||||
RUN mkdir -p /usr/src/
|
||||
#Install Software
|
||||
RUN apk add --update nodejs npm git
|
||||
|
||||
# Output used repository
|
||||
RUN echo "Cloning from $repo"
|
||||
|
||||
RUN cd /usr/src && git clone $repo
|
||||
|
||||
RUN ln -s /usr/src/fredy/conf/ /conf
|
||||
|
||||
# create db folder
|
||||
RUN mkdir /usr/src/fredy/db/
|
||||
|
||||
RUN ln -s /usr/src/fredy/db/ /db
|
||||
|
||||
RUN npm i -g yarn
|
||||
|
||||
RUN cd /usr/src/fredy/ && yarn
|
||||
|
||||
WORKDIR /usr/src/fredy
|
||||
|
||||
RUN yarn run prod
|
||||
|
||||
EXPOSE 9998
|
||||
|
||||
VOLUME [ "/conf", "/db" ]
|
||||
# --no-daemon is required for keeping Container alive
|
||||
CMD node index.js --no-daemon
|
||||
@@ -4,10 +4,8 @@ services:
|
||||
container_name: fredy
|
||||
# build from empty build folder to reduce size of image
|
||||
build:
|
||||
context: ./build
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
repo: https://github.com/orangecoding/fredy.git
|
||||
image: fredy/fredy
|
||||
# map existing config and database
|
||||
volumes:
|
||||
|
||||
@@ -4,6 +4,7 @@ const { setKnownListings, getKnownListings } = require('./services/storage/listi
|
||||
const notify = require('./notification/notify');
|
||||
const xray = require('./services/scraper');
|
||||
const scrapingAnt = require('./services/scrapingAnt');
|
||||
const urlModifier = require('./services/queryStringMutator');
|
||||
|
||||
class FredyRuntime {
|
||||
/**
|
||||
@@ -24,7 +25,8 @@ class FredyRuntime {
|
||||
|
||||
execute() {
|
||||
return (
|
||||
Promise.resolve(this._providerConfig.url)
|
||||
//modify the url to make sure search order is correctly set
|
||||
Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam))
|
||||
//scraping the site and try finding new listings
|
||||
.then(this._getListings.bind(this))
|
||||
//bring them in a proper form (dictated by the provider)
|
||||
|
||||
@@ -21,6 +21,13 @@ exports.send = ({ serviceName, newListings, notificationConfig, jobKey }) => {
|
||||
(adapter) => adapter.id === 'mailJet'
|
||||
).fields;
|
||||
|
||||
const to = receiver
|
||||
.trim()
|
||||
.split(',')
|
||||
.map((r) => ({
|
||||
Email: r.trim(),
|
||||
}));
|
||||
|
||||
return mailjet
|
||||
.connect(apiPublicKey, apiPrivateKey)
|
||||
.post('send', { version: 'v3.1' })
|
||||
@@ -31,11 +38,7 @@ exports.send = ({ serviceName, newListings, notificationConfig, jobKey }) => {
|
||||
Email: from,
|
||||
Name: 'Fredy',
|
||||
},
|
||||
To: [
|
||||
{
|
||||
Email: receiver,
|
||||
},
|
||||
],
|
||||
To: to,
|
||||
Subject: `Fredy found ${newListings.length} new listings for ${serviceName}`,
|
||||
HTMLPart: emailTemplate({
|
||||
serviceName: `Job: (${jobKey}) | Service: ${serviceName}`,
|
||||
|
||||
@@ -4,3 +4,5 @@ To use [MailJet](https://mailjet.com), you need to create an account. You'll nee
|
||||
|
||||
E.g. if you use yourGmailAccount@gmail.com, you have to add this to MailJet and verify it as well.
|
||||
The given public/private api keys are needed in order to use MailJet with Fredy. Fredy will use the same template, it is using for SendGrid.
|
||||
|
||||
If this email should be sent to multiple receiver use a comma separator (some@email.com, someOther@email.com).
|
||||
|
||||
@@ -14,7 +14,10 @@ exports.send = ({ serviceName, newListings, notificationConfig, jobKey }) => {
|
||||
sgMail.setApiKey(apiKey);
|
||||
const msg = {
|
||||
templateId,
|
||||
to: receiver,
|
||||
to: receiver
|
||||
.trim()
|
||||
.split(',')
|
||||
.map((r) => r.trim()),
|
||||
from,
|
||||
subject: `Job ${jobKey} | Service ${serviceName} found ${newListings.length} new listing(s)`,
|
||||
dynamic_template_data: {
|
||||
|
||||
@@ -6,3 +6,5 @@ SendGrid is a free email service (free as in "you cannot send more than 100(Send
|
||||
To use [SendGrid](https://sendgrid.com/), you need to create an account. You'll need to decided from which email address you want Fredy to send from. E.g. if you use yourGmailAccount@gmail.com, you have to add this to sendgrid and verify it as well.
|
||||
|
||||
Lastly you have to create an api-key and feed it into Fredy's config, as well as creating a new dynamic template. For this new template, I recommend copying and pasting the code from the one I have provided under `/lib/notification/emailTemplate/template.hbs`.
|
||||
|
||||
If this email should be sent to multiple receiver use a comma separator (some@email.com, someOther@email.com).
|
||||
|
||||
@@ -1,6 +1,19 @@
|
||||
const { markdown2Html } = require('../../services/markdown');
|
||||
const axios = require('axios');
|
||||
|
||||
/**
|
||||
* splitting an array into chunks because Telegram only allows for messages up to
|
||||
* 4096 chars, thus we have to split messages into chunks
|
||||
* @param inputArray
|
||||
* @param perChunk
|
||||
*/
|
||||
const arrayChunks = (inputArray, perChunk) =>
|
||||
inputArray.reduce((all, one, i) => {
|
||||
const ch = Math.floor(i / perChunk);
|
||||
all[ch] = [].concat(all[ch] || [], one);
|
||||
return all;
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* sends new listings to telegram
|
||||
* @param serviceName e.g immowelt
|
||||
@@ -12,22 +25,28 @@ const axios = require('axios');
|
||||
exports.send = ({ serviceName, newListings, notificationConfig, jobKey }) => {
|
||||
const { token, chatId } = notificationConfig.find((adapter) => adapter.id === 'telegram').fields;
|
||||
|
||||
let message = `Job: ${jobKey} | Service <b>${serviceName}</b> found <b>${newListings.length}</b> new listings:\n\n`;
|
||||
//we have to split messages into chunk, because otherwise messages are going to become too big and will fail
|
||||
const chunks = arrayChunks(newListings, 3);
|
||||
|
||||
message += newListings.map(
|
||||
(o) =>
|
||||
`<b>${shorten(o.title.replace(/\*/g, ''), 45)}</b>\n` +
|
||||
[o.address, o.price, o.size].join(' | ') +
|
||||
'\n' +
|
||||
`<a href="${o.link}">${o.link}</a>\n\n`
|
||||
);
|
||||
const promises = chunks.map((chunk) => {
|
||||
let message = `Job: ${jobKey} | Service <b>${serviceName}</b> found <b>${newListings.length}</b> new listings:\n\n`;
|
||||
message += chunk.map(
|
||||
(o) =>
|
||||
`<b>${shorten(o.title.replace(/\*/g, ''), 45)}</b>\n` +
|
||||
[o.address, o.price, o.size].join(' | ') +
|
||||
'\n' +
|
||||
`<a href="${o.link}">${o.link}</a>\n\n`
|
||||
);
|
||||
|
||||
return axios.post(`https://api.telegram.org/bot${token}/sendMessage`, {
|
||||
chat_id: chatId,
|
||||
text: message,
|
||||
parse_mode: 'HTML',
|
||||
disable_web_page_preview: true,
|
||||
return axios.post(`https://api.telegram.org/bot${token}/sendMessage`, {
|
||||
chat_id: chatId,
|
||||
text: message,
|
||||
parse_mode: 'HTML',
|
||||
disable_web_page_preview: true,
|
||||
});
|
||||
});
|
||||
|
||||
return Promise.all(promises);
|
||||
};
|
||||
|
||||
function shorten(str, len = 30) {
|
||||
|
||||
@@ -22,6 +22,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.tabelle',
|
||||
sortByDateParam: 'sort_type=newest',
|
||||
crawlFields: {
|
||||
id: '.inner_object_data input[name="marker_objekt_id"]@value | int',
|
||||
price: '.tabelle .inner_object_data .single_data_price | removeNewline | trim',
|
||||
|
||||
@@ -24,6 +24,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#result-list-stage .item',
|
||||
sortByDateParam: 'sortby=19',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
price: 'div[id*="selPrice_"] | trim',
|
||||
|
||||
@@ -2,9 +2,13 @@ const utils = require('../utils');
|
||||
|
||||
let appliedBlackList = [];
|
||||
|
||||
function nullOrEmpty(val) {
|
||||
return val == null || val.length === 0;
|
||||
}
|
||||
|
||||
function normalize(o) {
|
||||
const title = o.title.replace('NEU', '');
|
||||
const address = (o.address || '').replace(/\(.*\),.*$/, '').trim();
|
||||
const title = nullOrEmpty(o.title) ? 'NO TITLE FOUND' : o.title.replace('NEU', '');
|
||||
const address = nullOrEmpty(o.address) ? 'NO ADDRESS FOUND' : (o.address || '').replace(/\(.*\),.*$/, '').trim();
|
||||
const link = `https://www.immobilienscout24.de${o.link.substring(o.link.indexOf('/expose'))}`;
|
||||
return Object.assign(o, { title, address, link });
|
||||
}
|
||||
@@ -16,6 +20,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#resultListItems li.result-list__listing',
|
||||
sortByDateParam: 'sorting=2',
|
||||
crawlFields: {
|
||||
id: '.result-list-entry@data-obid | int',
|
||||
price: '.result-list-entry .result-list-entry__criteria .grid-item:first-child dd | removeNewline | trim',
|
||||
|
||||
@@ -3,10 +3,7 @@ const utils = require('../utils');
|
||||
let appliedBlackList = [];
|
||||
|
||||
function normalize(o) {
|
||||
const size = o.size == null ? '--- m²' : o.size.split('Wohnfläche')[1].replace(' (ca.) ', '');
|
||||
const address = o.address;
|
||||
|
||||
return Object.assign(o, { size, address });
|
||||
return o;
|
||||
}
|
||||
|
||||
function applyBlacklist(o) {
|
||||
@@ -18,14 +15,15 @@ function applyBlacklist(o) {
|
||||
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.immoliste .js-object.listitem_wrap ',
|
||||
crawlContainer: "div[class^='EstateItem-']",
|
||||
sortByDateParam: 'sd=DESC&sf=TIMESTAMP',
|
||||
crawlFields: {
|
||||
id: '@data-estateid | int',
|
||||
price: '.hardfacts_3 strong | removeNewline | trim',
|
||||
size: '.js-object.listitem_wrap .hardfacts_3 div:nth-child(2)| removeNewline | trim',
|
||||
title: '.listcontent.clear h2',
|
||||
id: 'a@id',
|
||||
price: "div[class^='KeyFacts-'] [data-test='price'] | removeNewline | trim",
|
||||
size: "div[class^='KeyFacts-'] [data-test='area'] | removeNewline | trim",
|
||||
title: "div[class^='FactsMain-'] h2",
|
||||
link: 'a@href',
|
||||
address: '.listcontent .details .listlocation| removeNewline | trim',
|
||||
address: "div[class^='estateFacts-'] span | removeNewline | trim",
|
||||
},
|
||||
paginate: '#pnlPaging #nlbPlus@href',
|
||||
normalize: normalize,
|
||||
|
||||
@@ -21,6 +21,8 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#srchrslt-adtable .ad-listitem ',
|
||||
//sort by date is standard oO
|
||||
sortByDateParam: null,
|
||||
crawlFields: {
|
||||
id: '.aditem@data-adid | int',
|
||||
price: '.aditem-main--middle--price | removeNewline | trim',
|
||||
|
||||
@@ -13,6 +13,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '.nbk-container >div article',
|
||||
sortByDateParam: 'Sortierung=Id&Richtung=DESC',
|
||||
crawlFields: {
|
||||
id: '@id',
|
||||
title: 'a.nbk-truncate@title | removeNewline | trim',
|
||||
|
||||
@@ -16,6 +16,7 @@ function applyBlacklist(o) {
|
||||
const config = {
|
||||
url: null,
|
||||
crawlContainer: '#main_column .wgg_card',
|
||||
sortByDateParam: 'sort_column=0&sort_order=0',
|
||||
crawlFields: {
|
||||
id: '@data-id',
|
||||
details: '.row .noprint .col-xs-11 |removeNewline |trim',
|
||||
|
||||
22
lib/services/queryStringMutator.js
Normal file
22
lib/services/queryStringMutator.js
Normal file
@@ -0,0 +1,22 @@
|
||||
const queryString = require('query-string');
|
||||
|
||||
/**
|
||||
* for Fredy, it is important to sort search results by date, starting with the latest listing. if it is not sorted, we
|
||||
* might never actually find the newest results, no matter how many pages we crawl.
|
||||
* It has been written in the documentation, but obviously nobody reads docu theses days which is why it's been done
|
||||
* automagically now.
|
||||
*
|
||||
* @param _url actual provider url containing the searchParams
|
||||
* @param sortByDateParam param(s) indicating the correct sort order
|
||||
* @returns {`${string}?${string}`} correctly formatted url
|
||||
*/
|
||||
module.exports = (_url, sortByDateParam) => {
|
||||
//if no mutation is necessary, just return the original url
|
||||
if (sortByDateParam == null) {
|
||||
return _url;
|
||||
}
|
||||
|
||||
const original = queryString.parseUrl(_url);
|
||||
const mutate = queryString.parse(sortByDateParam);
|
||||
return `${original.url}?${queryString.stringify({ ...original.query, ...mutate })}`;
|
||||
};
|
||||
@@ -1,4 +1,7 @@
|
||||
const axios = require('axios');
|
||||
const axiosRetry = require('axios-retry');
|
||||
|
||||
axiosRetry(axios, { retryDelay: axiosRetry.exponentialDelay, retries: 3 });
|
||||
|
||||
function makeDriver(headers = {}) {
|
||||
let cookies = '';
|
||||
@@ -15,7 +18,8 @@ function makeDriver(headers = {}) {
|
||||
},
|
||||
});
|
||||
} catch (exception) {
|
||||
callback(exception, null);
|
||||
console.error(`Error while trying to scrape data. Received error: ${exception.message}`);
|
||||
callback(null, []);
|
||||
}
|
||||
|
||||
if (typeof result.data === 'object' && url.toLowerCase().indexOf('scrapingant') !== -1) {
|
||||
|
||||
72
package.json
72
package.json
@@ -1,13 +1,14 @@
|
||||
{
|
||||
"name": "fredy",
|
||||
"version": "5.2.0",
|
||||
"version": "5.4.3",
|
||||
"description": "[F]ind [R]eal [E]states [d]amn eas[y].",
|
||||
"scripts": {
|
||||
"start": "node index.js",
|
||||
"dev": "yarn && export BUILD_DEV='true' && export NODE_ENV='development' && webpack-dev-server --progress --colors --watch --config ./webpack.dev.js",
|
||||
"prod": "export BUILD_DEV='false' && webpack --node-env=production --config ./webpack.prod.js",
|
||||
"format": "prettier --write lib/**/*.js ui/src/**/*.js test/**/*.js *.js --single-quote --print-width 120",
|
||||
"test": "mocha --timeout 20000 test/**/*.test.js"
|
||||
"test": "mocha --timeout 20000 test/**/*.test.js",
|
||||
"lint": "eslint ./index.js ./lib/**/*.js ./test/**/*.js"
|
||||
},
|
||||
"husky": {
|
||||
"hooks": {
|
||||
@@ -32,6 +33,7 @@
|
||||
"house",
|
||||
"rent",
|
||||
"immoscout",
|
||||
"scraper",
|
||||
"immonet",
|
||||
"immowelt",
|
||||
"immobilienscout24"
|
||||
@@ -51,61 +53,63 @@
|
||||
"Firefox ESR"
|
||||
],
|
||||
"dependencies": {
|
||||
"@rematch/core": "2.0.1",
|
||||
"@rematch/loading": "2.0.1",
|
||||
"@sendgrid/mail": "7.4.5",
|
||||
"axios": "0.21.1",
|
||||
"@rematch/core": "2.2.0",
|
||||
"@rematch/loading": "2.1.2",
|
||||
"@sendgrid/mail": "7.6.0",
|
||||
"axios": "0.24.0",
|
||||
"axios-retry": "^3.2.4",
|
||||
"body-parser": "1.19.0",
|
||||
"cookie-session": "1.4.0",
|
||||
"handlebars": "4.7.7",
|
||||
"highcharts": "9.1.2",
|
||||
"highcharts-react-official": "3.0.0",
|
||||
"highcharts": "9.3.1",
|
||||
"highcharts-react-official": "3.1.0",
|
||||
"lowdb": "1.0.0",
|
||||
"markdown": "^0.5.0",
|
||||
"nanoid": "3.1.23",
|
||||
"nanoid": "3.1.30",
|
||||
"node-mailjet": "3.3.4",
|
||||
"query-string": "^7.0.1",
|
||||
"react": "17.0.2",
|
||||
"react-dom": "17.0.2",
|
||||
"react-redux": "7.2.4",
|
||||
"react-router": "5.2.0",
|
||||
"react-router-dom": "5.2.0",
|
||||
"react-redux": "7.2.6",
|
||||
"react-router": "5.2.1",
|
||||
"react-router-dom": "5.3.0",
|
||||
"react-switch": "^6.0.0",
|
||||
"redux": "4.1.0",
|
||||
"redux-thunk": "2.3.0",
|
||||
"restana": "4.9.1",
|
||||
"semantic-ui-react": "2.0.3",
|
||||
"redux": "4.1.2",
|
||||
"redux-thunk": "2.4.0",
|
||||
"restana": "4.9.2",
|
||||
"semantic-ui-react": "2.0.4",
|
||||
"serve-static": "^1.14.1",
|
||||
"slack": "11.0.2",
|
||||
"string-similarity": "^4.0.4",
|
||||
"x-ray": "2.3.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/core": "7.14.6",
|
||||
"@babel/preset-env": "7.14.7",
|
||||
"@babel/preset-react": "7.14.5",
|
||||
"@babel/core": "7.16.0",
|
||||
"@babel/preset-env": "7.16.4",
|
||||
"@babel/preset-react": "7.16.0",
|
||||
"babel-eslint": "10.1.0",
|
||||
"babel-loader": "8.2.2",
|
||||
"babel-loader": "8.2.3",
|
||||
"chai": "4.3.4",
|
||||
"clean-webpack-plugin": "3.0.0",
|
||||
"copy-webpack-plugin": "9.0.1",
|
||||
"css-loader": "5.2.6",
|
||||
"eslint": "7.29.0",
|
||||
"clean-webpack-plugin": "4.0.0",
|
||||
"copy-webpack-plugin": "10.0.0",
|
||||
"css-loader": "6.5.1",
|
||||
"eslint": "7.32.0",
|
||||
"eslint-config-prettier": "8.3.0",
|
||||
"eslint-plugin-react": "7.24.0",
|
||||
"eslint-plugin-react": "7.27.1",
|
||||
"file-loader": "6.2.0",
|
||||
"history": "5.0.0",
|
||||
"history": "5.1.0",
|
||||
"husky": "4.3.8",
|
||||
"less": "4.1.1",
|
||||
"less-loader": "10.0.0",
|
||||
"lint-staged": "11.0.0",
|
||||
"mocha": "9.0.1",
|
||||
"prettier": "2.3.2",
|
||||
"less": "4.1.2",
|
||||
"less-loader": "10.2.0",
|
||||
"lint-staged": "12.1.2",
|
||||
"mocha": "9.1.3",
|
||||
"prettier": "2.5.0",
|
||||
"proxyquire": "2.1.3",
|
||||
"redux-logger": "3.0.6",
|
||||
"style-loader": "3.0.0",
|
||||
"style-loader": "3.3.1",
|
||||
"url-loader": "4.1.1",
|
||||
"webpack": "5.40.0",
|
||||
"webpack-cli": "3.3.12",
|
||||
"webpack": "5.64.4",
|
||||
"webpack-cli": "4.9.1",
|
||||
"webpack-dev-server": "3.11.2",
|
||||
"webpack-merge": "5.8.0"
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ describe('#immowelt testsuite()', () => {
|
||||
|
||||
notificationObj.payload.forEach((notify) => {
|
||||
/** check the actual structure **/
|
||||
expect(notify.id).to.be.a('number');
|
||||
expect(notify.id).to.be.a('string');
|
||||
expect(notify.price).to.be.a('string');
|
||||
expect(notify.size).to.be.a('string');
|
||||
expect(notify.title).to.be.a('string');
|
||||
@@ -38,7 +38,6 @@ describe('#immowelt testsuite()', () => {
|
||||
expect(notify.address).to.be.a('string');
|
||||
|
||||
/** check the values if possible **/
|
||||
expect(notify.price).that.does.include('€');
|
||||
if (notify.size.trim().toLowerCase() !== 'k.a.') {
|
||||
expect(notify.size).that.does.include('m²');
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"enabled": true
|
||||
},
|
||||
"immowelt": {
|
||||
"url": "https://www.immowelt.de/liste/duesseldorf-benrath/wohnungen/kaufen?geoid=10805111000004%2C10805111000005%2C10805111000006%2C10805111000007%2C10805111000009%2C10805111000010%2C10805111000011%2C10805111000013%2C10805111000014%2C10805111000015%2C10805111000016%2C10805111000017%2C10805111000018%2C10805111000019%2C10805111000023%2C10805111000024%2C10805111000027%2C10805111000032%2C10805111000034%2C10805111000035%2C10805111000039%2C10805111000041%2C10805111000042%2C10805111000043%2C10805111000047%2C10805111000048%2C10805111000049%2C10805111000051%2C10805111000052%2C10805111000053&roomi=3&prima=420000&wflmi=90&sort=createdate%2Bdesc",
|
||||
"url": "https://www.immowelt.de/liste/duesseldorf/wohnungen/kaufen?d=true&rmi=3&sd=DESC&sf=TIMESTAMP&sp=1",
|
||||
"enabled": true
|
||||
},
|
||||
"immoscout": {
|
||||
|
||||
32
test/queryStringMutator/queryStringMutator.test.js
Normal file
32
test/queryStringMutator/queryStringMutator.test.js
Normal file
@@ -0,0 +1,32 @@
|
||||
const testData = require('./testData.json');
|
||||
const expect = require('chai').expect;
|
||||
const fs = require('fs');
|
||||
|
||||
const mutator = require('../../lib/services/queryStringMutator.js');
|
||||
const queryString = require('query-string');
|
||||
|
||||
/**
|
||||
* Test test might look a bit weird at first, but listen stranger...
|
||||
* It's not wise to compare 2 urls, as this means all url params must be in the expected order. This is however not
|
||||
* guaranteed, as params (and their order) are totally variable.
|
||||
*/
|
||||
describe('queryStringMutator', () => {
|
||||
it('should fix all urls', () => {
|
||||
let _provider = fs.readdirSync('./lib/provider/').map((integPath) => require(`../../lib/provider/${integPath}`));
|
||||
|
||||
for (let test of testData) {
|
||||
const provider = _provider.find((p) => p.metaInformation.id === test.id);
|
||||
if (provider == null) {
|
||||
throw new Error(`Cannot find provider for given id: ${test.id}`);
|
||||
}
|
||||
|
||||
const fixedUrl = mutator(test.url, provider.config.sortByDateParam);
|
||||
const expectedParams = queryString.parseUrl(test.shouldBecome);
|
||||
const actualParams = queryString.parseUrl(fixedUrl);
|
||||
|
||||
//check if all new params are existing
|
||||
expect(Object.keys(expectedParams.query)).to.include.members(Object.keys(actualParams.query));
|
||||
expect(Object.values(expectedParams.query)).to.include.members(Object.values(actualParams.query));
|
||||
}
|
||||
});
|
||||
});
|
||||
33
test/queryStringMutator/testData.json
Normal file
33
test/queryStringMutator/testData.json
Normal file
@@ -0,0 +1,33 @@
|
||||
[
|
||||
{
|
||||
"url": "https://www.immowelt.de/liste/40589/wohnungen/mieten?d=true&sd=DESC&sf=PRIMARY_PRICE_AMOUNT&sp=1",
|
||||
"shouldBecome": "https://www.immowelt.de/liste/40589/wohnungen/mieten?d=true&sd=DESC&sf=TIMESTAMP&sp=1",
|
||||
"id": "immowelt"
|
||||
},
|
||||
{
|
||||
"url": "https://www.1a-immobilienmarkt.de/suchen/duesseldorf/wohnung-mieten.html?search=yes",
|
||||
"shouldBecome": "https://www.1a-immobilienmarkt.de/suchen/duesseldorf/wohnung-mieten.html?search=yes&sort_type=newest",
|
||||
"id": "einsAImmobilien"
|
||||
},
|
||||
{
|
||||
"url": "https://www.wg-gesucht.de/1-zimmer-wohnungen-in-Dusseldorf.30.1.1.0.html?sort_column=1&sort_order=0",
|
||||
"shouldBecome": "https://www.wg-gesucht.de/1-zimmer-wohnungen-in-Dusseldorf.30.1.1.0.html?sort_column=0&sort_order=0",
|
||||
"id": "wgGesucht"
|
||||
},
|
||||
|
||||
{
|
||||
"url": "https://www.immonet.de/immobiliensuche/sel.do?sortby=0&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
|
||||
"shouldBecome": "https://www.immonet.de/immobiliensuche/sel.do?sortby=19&suchart=1&objecttype=1&marketingtype=2&parentcat=1&locationname=d%C3%BCsseldorf",
|
||||
"id": "immonet"
|
||||
},
|
||||
{
|
||||
"url": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten",
|
||||
"shouldBecome": "https://www.immobilienscout24.de/Suche/de/nordrhein-westfalen/duesseldorf/wohnung-mieten?sorting=2",
|
||||
"id": "immoscout"
|
||||
},
|
||||
{
|
||||
"url": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/",
|
||||
"shouldBecome": "https://www.neubaukompass.de/neubau-immobilien/berlin-region/?Sortierung=Id&Richtung=DESC",
|
||||
"id": "neubauKompass"
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user