mirror of
https://github.com/hashiromer/Upwork-Jobs-scraper-.git
synced 2025-12-29 16:16:01 +00:00
Initial commit
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
data/
|
||||
5
.vscode/extensions.json
vendored
Normal file
5
.vscode/extensions.json
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"golang.go"
|
||||
]
|
||||
}
|
||||
16
go.mod
Normal file
16
go.mod
Normal file
@@ -0,0 +1,16 @@
|
||||
module github.com/USERNAME/simple-go-service
|
||||
|
||||
go 1.18
|
||||
|
||||
require (
|
||||
github.com/Danny-Dasilva/CycleTLS/cycletls v0.0.0-20220620102923-c84d740b4757
|
||||
github.com/Danny-Dasilva/fhttp v0.0.0-20220524230104-f801520157d6 // indirect
|
||||
github.com/Danny-Dasilva/utls v0.0.0-20220604023528-30cb107b834e // indirect
|
||||
github.com/andybalholm/brotli v1.0.4 // indirect
|
||||
github.com/dsnet/compress v0.0.1 // indirect
|
||||
github.com/gorilla/websocket v1.5.0 // indirect
|
||||
golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e // indirect
|
||||
golang.org/x/net v0.0.0-20220615171555-694bf12d69de // indirect
|
||||
golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c // indirect
|
||||
golang.org/x/text v0.3.7 // indirect
|
||||
)
|
||||
53
go.sum
Normal file
53
go.sum
Normal file
@@ -0,0 +1,53 @@
|
||||
github.com/Danny-Dasilva/CycleTLS/cycletls v0.0.0-20220620102923-c84d740b4757 h1:QH22vGS2DO07khPzKe3/CgFevznQkip5WNGEsQX7mFI=
|
||||
github.com/Danny-Dasilva/CycleTLS/cycletls v0.0.0-20220620102923-c84d740b4757/go.mod h1:R4Hj85bdRH8zqymQ/oZUCmEsODgP3NpUvTEJtaVai7Y=
|
||||
github.com/Danny-Dasilva/fhttp v0.0.0-20220418170016-5ea1c560e6a8/go.mod h1:t534vrahRNn9ax1tRiYSUvwJSa9jWaYYgETlfodBPm4=
|
||||
github.com/Danny-Dasilva/fhttp v0.0.0-20220524230104-f801520157d6 h1:Wzbitazy0HugGNRACX7ZB1En21LT/TiVF6YbxoTTqN8=
|
||||
github.com/Danny-Dasilva/fhttp v0.0.0-20220524230104-f801520157d6/go.mod h1:2IT2IFG+d+zzFuj3+ksGtVytcCBsF402zMNWHsWhD2U=
|
||||
github.com/Danny-Dasilva/utls v0.0.0-20220418055514-7c61e0dbb504/go.mod h1:A2g8gPTJWDD3Y4iCTNon2vG3VcjdTBcgWBlZtopfNxU=
|
||||
github.com/Danny-Dasilva/utls v0.0.0-20220418175931-f38e470e04f2/go.mod h1:A2g8gPTJWDD3Y4iCTNon2vG3VcjdTBcgWBlZtopfNxU=
|
||||
github.com/Danny-Dasilva/utls v0.0.0-20220604023528-30cb107b834e h1:tqiguW0yAcIwQBQtD+d2rjBnboqB7CwG1OZ12F8avX8=
|
||||
github.com/Danny-Dasilva/utls v0.0.0-20220604023528-30cb107b834e/go.mod h1:ssfbVNUfWJVRfW41RTpedOUlGXSq3J6aLmirUVkDgJk=
|
||||
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
|
||||
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
|
||||
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
|
||||
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
|
||||
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
|
||||
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
|
||||
github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
|
||||
github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
|
||||
gitlab.com/yawning/bsaes.git v0.0.0-20190805113838-0a714cd429ec/go.mod h1:BZ1RAoRPbCxum9Grlv5aeksu2H8BiKehBYooU2LFiOQ=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
|
||||
golang.org/x/crypto v0.0.0-20220518034528-6f7dac969898/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
|
||||
golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e h1:T8NU3HyQ8ClP4SEE+KbFlg6n0NhuTsN4MyznaarGsZM=
|
||||
golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
|
||||
golang.org/x/net v0.0.0-20190328230028-74de082e2cca/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20210610132358-84b48f89b13b/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/net v0.0.0-20220412020605-290c469a71a5/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
|
||||
golang.org/x/net v0.0.0-20220615171555-694bf12d69de h1:ogOG2+P6LjO2j55AkRScrkB2BFpd+Z8TY2wcM0Z3MGo=
|
||||
golang.org/x/net v0.0.0-20220615171555-694bf12d69de/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c h1:aFV+BgZ4svzjfabn8ERpuB4JI4N6/rdy1iusx77G3oU=
|
||||
golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
111
main.go
Normal file
111
main.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/Danny-Dasilva/CycleTLS/cycletls"
|
||||
)
|
||||
|
||||
func get_data(url string, headers map[string]string) (string, error) {
|
||||
client := cycletls.Init()
|
||||
response, err := client.Do(
|
||||
url,
|
||||
cycletls.Options{
|
||||
Body: "",
|
||||
Ja3: "771,4865-4867-4866-49195-49199-52393-52392-49196-49200-49162-49161-49171-49172-51-57-47-53-10,0-23-65281-10-11-35-16-5-51-43-13-45-28-21,29-23-24-25-256-257,0",
|
||||
UserAgent: "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:87.0) Gecko/20100101 Firefox/87.0",
|
||||
Headers: headers,
|
||||
},
|
||||
"GET",
|
||||
)
|
||||
|
||||
if err == nil {
|
||||
return string(response.Body), nil
|
||||
}
|
||||
return response.Body, nil
|
||||
|
||||
}
|
||||
|
||||
func save_to_file(data string, filename string) error {
|
||||
file, err := os.Create(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
writer := bufio.NewWriter(file)
|
||||
_, err = writer.WriteString(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
writer.Flush()
|
||||
return nil
|
||||
}
|
||||
|
||||
// func check_captcha(data map[string]string) bool {
|
||||
// if _, ok := data["blockScript"]; ok {
|
||||
// return true
|
||||
// }
|
||||
// return false
|
||||
|
||||
// }
|
||||
|
||||
func main() {
|
||||
|
||||
os.RemoveAll("data")
|
||||
|
||||
os.Mkdir("data", 0755)
|
||||
|
||||
headers := map[string]string{
|
||||
"authority": "www.upwork.com",
|
||||
"accept": "application/json, text/plain",
|
||||
"accept-language": "en",
|
||||
"cache-control": "no-cache",
|
||||
"pragma": "no-cache",
|
||||
"referer": "https://www.upwork.com/search/jobs/url?per_page=10&sort=recency",
|
||||
"sec-fetch-site": "same-origin",
|
||||
"sec-gpc": "1",
|
||||
"vnd-eo-parent-span-id": "2724011d-2430-47f5-b5b9-603f2e919685",
|
||||
"vnd-eo-span-id": "9d6e5b36-ace2-402e-a188-01da1d6b84ee",
|
||||
"x-odesk-user-agent": "oDesk LM",
|
||||
"x-requested-with": "XMLHttpRequest",
|
||||
}
|
||||
|
||||
total_iterations := 100
|
||||
//Query to serach for on Upwork
|
||||
query := "shopify"
|
||||
//Number of results per page
|
||||
per_page := 100
|
||||
|
||||
start := 1
|
||||
for i := start; i <= total_iterations; i++ {
|
||||
fmt.Print("Iteration: ", i, "\n")
|
||||
upwork_api_url_template := "https://www.upwork.com/search/jobs/url?q=%s&per_page=%d&sort=recency&page=%d"
|
||||
url := fmt.Sprintf(upwork_api_url_template, query, per_page, i)
|
||||
|
||||
//Sleep for 2 seconds
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
data, err := get_data(url, headers)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
fmt.Println(i)
|
||||
panic(err)
|
||||
|
||||
}
|
||||
|
||||
//Get body of the response
|
||||
filename := fmt.Sprintf("data/%d.json", i)
|
||||
err = save_to_file(data, filename)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println("Done")
|
||||
}
|
||||
1
transform.go
Normal file
1
transform.go
Normal file
@@ -0,0 +1 @@
|
||||
package transform
|
||||
Reference in New Issue
Block a user