mirror of
https://github.com/hashiromer/Upwork-Jobs-scraper-.git
synced 2025-12-29 16:16:01 +00:00
Output is one file
This commit is contained in:
183546
all_jobs.json
Normal file
183546
all_jobs.json
Normal file
File diff suppressed because it is too large
Load Diff
69
main.go
69
main.go
@@ -2,8 +2,10 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/Danny-Dasilva/CycleTLS/cycletls"
|
"github.com/Danny-Dasilva/CycleTLS/cycletls"
|
||||||
@@ -75,7 +77,7 @@ func main() {
|
|||||||
"x-requested-with": "XMLHttpRequest",
|
"x-requested-with": "XMLHttpRequest",
|
||||||
}
|
}
|
||||||
//Upwork limits pagination to 100 pages
|
//Upwork limits pagination to 100 pages
|
||||||
total_iterations := 100
|
total_iterations := 10
|
||||||
//Query to serach for on Upwork, searching for jobs with shopify keyword
|
//Query to serach for on Upwork, searching for jobs with shopify keyword
|
||||||
query := "shopify"
|
query := "shopify"
|
||||||
//Number of results per page
|
//Number of results per page
|
||||||
@@ -87,7 +89,6 @@ func main() {
|
|||||||
upwork_api_url_template := "https://www.upwork.com/search/jobs/url?q=%s&per_page=%d&sort=recency&page=%d"
|
upwork_api_url_template := "https://www.upwork.com/search/jobs/url?q=%s&per_page=%d&sort=recency&page=%d"
|
||||||
url := fmt.Sprintf(upwork_api_url_template, query, per_page, i)
|
url := fmt.Sprintf(upwork_api_url_template, query, per_page, i)
|
||||||
|
|
||||||
//Sleep for 2 seconds
|
|
||||||
time.Sleep(2 * time.Second)
|
time.Sleep(2 * time.Second)
|
||||||
|
|
||||||
data, err := get_data(url, headers)
|
data, err := get_data(url, headers)
|
||||||
@@ -107,5 +108,67 @@ func main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("Done")
|
fmt.Println("Scraping done")
|
||||||
|
|
||||||
|
files, err := filepath.Glob("data/*.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var all_jobs []map[string]interface{}
|
||||||
|
for _, file := range files {
|
||||||
|
fmt.Println(file)
|
||||||
|
data, err := os.ReadFile(file)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
//Parse data as json without interface
|
||||||
|
var result map[string]interface{}
|
||||||
|
err = json.Unmarshal(data, &result)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
//Get value from key
|
||||||
|
key := "searchResults"
|
||||||
|
value := result[key]
|
||||||
|
|
||||||
|
//Check for errors
|
||||||
|
is_error := value.(map[string]interface{})["jobSearchError"]
|
||||||
|
|
||||||
|
//Skip the file if is_error is True
|
||||||
|
|
||||||
|
if is_error == true {
|
||||||
|
fmt.Println("Error")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
//Get jobs from the json
|
||||||
|
jobs := value.(map[string]interface{})["jobs"]
|
||||||
|
|
||||||
|
//Add all jobs to the all_jobs slice
|
||||||
|
for _, job := range jobs.([]interface{}) {
|
||||||
|
all_jobs = append(all_jobs, job.(map[string]interface{}))
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
jobs := map[string]interface{}{
|
||||||
|
"jobs": all_jobs,
|
||||||
|
}
|
||||||
|
|
||||||
|
json_data, err := json.Marshal(jobs)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = save_to_file(string(json_data), "all_jobs.json")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
os.RemoveAll("data")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user