Updated Upwork API

This commit is contained in:
Hashir Omer 2024-08-03 22:53:13 +05:00
commit c7c2998595
12 changed files with 1375 additions and 0 deletions

13
.gitignore vendored Normal file
View File

@ -0,0 +1,13 @@
data/
all_jobs.json
*.json
*.csv
*.7z
oryxBuildBinary
*.ipynb
*.txt
*.env
!.env.example

132
JobSearchQuery.gql Normal file
View File

@ -0,0 +1,132 @@
query UserJobSearch($requestVariables: UserJobSearchV1Request!) {
search {
universalSearchNuxt {
userJobSearchV1(request: $requestVariables) {
paging {
total
offset
count
}
facets {
jobType {
key
value
}
workload {
key
value
}
clientHires {
key
value
}
durationV3 {
key
value
}
amount {
key
value
}
contractorTier {
key
value
}
contractToHire {
key
value
}
connects: connectPrice {
key
value
}
paymentVerified: payment {
key
value
}
proposals {
key
value
}
previousClients {
key
value
}
}
results {
id
title
description
relevanceEncoded
ontologySkills {
uid
parentSkillUid
prefLabel
prettyName: prefLabel
freeText
highlighted
}
isSTSVectorSearchResult
connectPrice
applied
upworkHistoryData {
client {
paymentVerificationStatus
country
totalReviews
totalFeedback
hasFinancialPrivacy
totalSpent {
isoCurrencyCode
amount
}
}
freelancerClientRelation {
lastContractRid
companyName
lastContractTitle
}
}
jobTile {
job {
id
ciphertext: cipherText
jobType
weeklyRetainerBudget
hourlyBudgetMax
hourlyBudgetMin
hourlyEngagementType
contractorTier
sourcingTimestamp
createTime
publishTime
enterpriseJob
personsToHire
premium
totalApplicants
hourlyEngagementDuration {
rid
label
weeks
mtime
ctime
}
fixedPriceAmount {
isoCurrencyCode
amount
}
fixedPriceEngagementDuration {
id
rid
label
weeks
ctime
mtime
}
}
}
}
}
}
}
}

24
README.md Normal file
View File

@ -0,0 +1,24 @@
# Upwork-Jobs-scraper
The code uses Upwork's internal Api to scrape new jobs posted on Upwork. I am not using headless browser due to two reasons.
1. Using headless bowsers is way more resource intensive compared to using an Api.
2. I don't have to deal with HTML parsing, the api returns json which can be directly passed to downstream systems.
# Note
The code uses Golang instead of Python because Upwork filters bots by checking TLS signatures of incoming requests. Unfortunately, I could not
find a way to do it in pure Python because Python is compiled with openssl and popular browsers do not use it. Chrome uses BoringSSl and firefox uses NSS.
These SSL libraries use different extensions and cipher suites which makes detection of TLS level configurations a more robust method to detect bot traffic.
Golang is a more lower level language compared to Python, so it allows changing network level configurations. I am using `cycletls` package in golang which makes spoofing TLS/JA3 fingerprints an easy task.
# How can you contribute?
These are some of the features I think could be useful.
- Better error handling with channels
- Add support for automatic proxy rotation. It can be extremely effective when used in conjunction with go routines.
- Add Api schema for Upwork Api.
- Add more scrapers, a lot of logic is platform agnostic which could be used to build scrapers for more platforms.

17
go.mod Normal file
View File

@ -0,0 +1,17 @@
module scrapers
go 1.18
require github.com/Danny-Dasilva/CycleTLS/cycletls v0.0.0-20220620102923-c84d740b4757
require (
github.com/Danny-Dasilva/fhttp v0.0.0-20220524230104-f801520157d6 // indirect
github.com/Danny-Dasilva/utls v0.0.0-20220604023528-30cb107b834e // indirect
github.com/andybalholm/brotli v1.0.4 // indirect
github.com/dsnet/compress v0.0.1 // indirect
github.com/gorilla/websocket v1.5.0 // indirect
golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e // indirect
golang.org/x/net v0.0.0-20220615171555-694bf12d69de // indirect
golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c // indirect
golang.org/x/text v0.3.7 // indirect
)

53
go.sum Normal file
View File

@ -0,0 +1,53 @@
github.com/Danny-Dasilva/CycleTLS/cycletls v0.0.0-20220620102923-c84d740b4757 h1:QH22vGS2DO07khPzKe3/CgFevznQkip5WNGEsQX7mFI=
github.com/Danny-Dasilva/CycleTLS/cycletls v0.0.0-20220620102923-c84d740b4757/go.mod h1:R4Hj85bdRH8zqymQ/oZUCmEsODgP3NpUvTEJtaVai7Y=
github.com/Danny-Dasilva/fhttp v0.0.0-20220418170016-5ea1c560e6a8/go.mod h1:t534vrahRNn9ax1tRiYSUvwJSa9jWaYYgETlfodBPm4=
github.com/Danny-Dasilva/fhttp v0.0.0-20220524230104-f801520157d6 h1:Wzbitazy0HugGNRACX7ZB1En21LT/TiVF6YbxoTTqN8=
github.com/Danny-Dasilva/fhttp v0.0.0-20220524230104-f801520157d6/go.mod h1:2IT2IFG+d+zzFuj3+ksGtVytcCBsF402zMNWHsWhD2U=
github.com/Danny-Dasilva/utls v0.0.0-20220418055514-7c61e0dbb504/go.mod h1:A2g8gPTJWDD3Y4iCTNon2vG3VcjdTBcgWBlZtopfNxU=
github.com/Danny-Dasilva/utls v0.0.0-20220418175931-f38e470e04f2/go.mod h1:A2g8gPTJWDD3Y4iCTNon2vG3VcjdTBcgWBlZtopfNxU=
github.com/Danny-Dasilva/utls v0.0.0-20220604023528-30cb107b834e h1:tqiguW0yAcIwQBQtD+d2rjBnboqB7CwG1OZ12F8avX8=
github.com/Danny-Dasilva/utls v0.0.0-20220604023528-30cb107b834e/go.mod h1:ssfbVNUfWJVRfW41RTpedOUlGXSq3J6aLmirUVkDgJk=
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
gitlab.com/yawning/bsaes.git v0.0.0-20190805113838-0a714cd429ec/go.mod h1:BZ1RAoRPbCxum9Grlv5aeksu2H8BiKehBYooU2LFiOQ=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.0.0-20220518034528-6f7dac969898/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e h1:T8NU3HyQ8ClP4SEE+KbFlg6n0NhuTsN4MyznaarGsZM=
golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/net v0.0.0-20190328230028-74de082e2cca/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20210610132358-84b48f89b13b/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220412020605-290c469a71a5/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.0.0-20220615171555-694bf12d69de h1:ogOG2+P6LjO2j55AkRScrkB2BFpd+Z8TY2wcM0Z3MGo=
golang.org/x/net v0.0.0-20220615171555-694bf12d69de/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c h1:aFV+BgZ4svzjfabn8ERpuB4JI4N6/rdy1iusx77G3oU=
golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

13
main.go Normal file
View File

@ -0,0 +1,13 @@
package main
import "scrapers/upwork"
func main() {
p := upwork.InitPipeline()
err := p.Run("")
if err != nil {
panic(err)
}
}

35
network/httpClient.go Normal file
View File

@ -0,0 +1,35 @@
package network
import "github.com/Danny-Dasilva/CycleTLS/cycletls"
type Client struct {
httpClient cycletls.CycleTLS
options cycletls.Options
}
func InitClient(headers map[string]string) *Client {
client := cycletls.Init()
options := cycletls.Options{
Body: "",
Ja3: "771,4865-4867-4866-49195-49199-52393-52392-49196-49200-49162-49161-49171-49172-51-57-47-53-10,0-23-65281-10-11-35-16-5-51-43-13-45-28-21,29-23-24-25-256-257,0",
UserAgent: "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:87.0) Gecko/20100101 Firefox/87.0",
Headers: headers,
}
httpClient := Client{
httpClient: client,
options: options,
}
return &httpClient
}
func (c Client) GetRequest(url string) (cycletls.Response, error) {
return c.httpClient.Do(url, c.options, "GET")
}
func (c Client) PostRequest(url string, body string) (cycletls.Response, error) {
c.options.Body = body
c.options.Headers["Content-Type"] = "application/json"
return c.httpClient.Do(url, c.options, "POST")
}

BIN
scrapers Executable file

Binary file not shown.

2
upwork/.env.example Normal file
View File

@ -0,0 +1,2 @@
authorization
cookie

828
upwork/types.go Normal file
View File

@ -0,0 +1,828 @@
package upwork
import "time"
type LoggedOutError struct {
Error struct {
Code int `json:"code"`
Message string `json:"message"`
}
}
type UpworkApiResponse struct {
URL string `json:"url"`
SearchGUID string `json:"searchGuid"`
SearchResults struct {
Q string `json:"q"`
Paging struct {
Total int `json:"total"`
Offset int `json:"offset"`
Count int `json:"count"`
ResultSetTs int `json:"resultSetTs"`
} `json:"paging"`
Spellcheck struct {
CorrectedQueries []interface{} `json:"corrected_queries"`
} `json:"spellcheck"`
Jobs []struct {
Title string `json:"title"`
CreatedOn time.Time `json:"createdOn"`
Type int `json:"type"`
Ciphertext string `json:"ciphertext"`
Description string `json:"description"`
Category2 interface{} `json:"category2"`
Subcategory2 interface{} `json:"subcategory2"`
Skills []interface{} `json:"skills"`
Duration string `json:"duration"`
ShortDuration string `json:"shortDuration"`
DurationLabel string `json:"durationLabel"`
Engagement string `json:"engagement"`
ShortEngagement string `json:"shortEngagement"`
Amount struct {
CurrencyCode string `json:"currencyCode"`
Amount int `json:"amount"`
} `json:"amount"`
Recno int `json:"recno"`
UID string `json:"uid"`
Client struct {
PaymentVerificationStatus int `json:"paymentVerificationStatus"`
Location struct {
Country string `json:"country"`
} `json:"location"`
TotalSpent float64 `json:"totalSpent"`
TotalReviews int `json:"totalReviews"`
TotalFeedback float64 `json:"totalFeedback"`
CompanyRid int `json:"companyRid"`
CompanyName interface{} `json:"companyName"`
EdcUserID int `json:"edcUserId"`
LastContractPlatform interface{} `json:"lastContractPlatform"`
LastContractRid int `json:"lastContractRid"`
LastContractTitle interface{} `json:"lastContractTitle"`
FeedbackText string `json:"feedbackText"`
CompanyOrgUID string `json:"companyOrgUid"`
HasFinancialPrivacy bool `json:"hasFinancialPrivacy"`
} `json:"client"`
FreelancersToHire int `json:"freelancersToHire"`
RelevanceEncoded string `json:"relevanceEncoded"`
EnterpriseJob bool `json:"enterpriseJob"`
TierText string `json:"tierText"`
Tier string `json:"tier"`
TierLabel string `json:"tierLabel"`
IsSaved interface{} `json:"isSaved"`
Feedback string `json:"feedback"`
ProposalsTier string `json:"proposalsTier"`
IsApplied bool `json:"isApplied"`
Sticky bool `json:"sticky"`
StickyLabel string `json:"stickyLabel"`
JobTs string `json:"jobTs"`
PrefFreelancerLocationMandatory bool `json:"prefFreelancerLocationMandatory"`
PrefFreelancerLocation []interface{} `json:"prefFreelancerLocation"`
Premium bool `json:"premium"`
PlusBadge interface{} `json:"plusBadge"`
PublishedOn time.Time `json:"publishedOn"`
RenewedOn time.Time `json:"renewedOn"`
SandsService interface{} `json:"sandsService"`
SandsSpec interface{} `json:"sandsSpec"`
SandsAttrs interface{} `json:"sandsAttrs"`
Occupation interface{} `json:"occupation"`
Attrs []struct {
ParentSkillUID interface{} `json:"parentSkillUid"`
FreeText interface{} `json:"freeText"`
SkillType int `json:"skillType"`
UID string `json:"uid"`
Highlighted bool `json:"highlighted"`
PrettyName string `json:"prettyName"`
} `json:"attrs"`
IsLocal bool `json:"isLocal"`
WorkType interface{} `json:"workType"`
Locations []interface{} `json:"locations"`
Occupations struct {
Category struct {
UID string `json:"uid"`
PrefLabel string `json:"prefLabel"`
} `json:"category"`
Subcategories []struct {
UID string `json:"uid"`
PrefLabel string `json:"prefLabel"`
} `json:"subcategories"`
Oservice struct {
UID string `json:"uid"`
PrefLabel string `json:"prefLabel"`
} `json:"oservice"`
} `json:"occupations"`
WeeklyBudget interface{} `json:"weeklyBudget"`
HourlyBudgetText interface{} `json:"hourlyBudgetText"`
Tags []interface{} `json:"tags"`
ClientRelation interface{} `json:"clientRelation"`
TotalFreelancersToHire int `json:"totalFreelancersToHire"`
TeamUID interface{} `json:"teamUid"`
MultipleFreelancersToHirePredicted interface{} `json:"multipleFreelancersToHirePredicted"`
ConnectPrice int `json:"connectPrice"`
} `json:"jobs"`
Facets struct {
JobType struct {
Num0 int `json:"0"`
Num1 int `json:"1"`
WeeklyRetainer int `json:"weekly_retainer"`
} `json:"jobType"`
Workload struct {
None int `json:"none"`
FullTime int `json:"full_time"`
AsNeeded int `json:"as_needed"`
} `json:"workload"`
Duration struct {
Ongoing int `json:"ongoing"`
Week int `json:"week"`
Month int `json:"month"`
Semester int `json:"semester"`
Quarter int `json:"quarter"`
} `json:"duration"`
ClientHires struct {
Num0 int `json:"0"`
One9 int `json:"1-9"`
One0 int `json:"10-"`
} `json:"clientHires"`
Budget struct {
Num0 int `json:"0"`
Num50 int `json:"50"`
Num100 int `json:"100"`
Num250 int `json:"250"`
Num500 int `json:"500"`
Num1000 int `json:"1000"`
Num2000 int `json:"2000"`
Num5000 int `json:"5000"`
Num10000 int `json:"10000"`
Num20000 int `json:"20000"`
Num100000 int `json:"100000"`
} `json:"budget"`
ContractorTier struct {
Num1 int `json:"1"`
Num2 int `json:"2"`
Num3 int `json:"3"`
} `json:"contractorTier"`
Categories2 []struct {
Category struct {
Slug string `json:"slug"`
Name string `json:"name"`
ResultCount int `json:"resultCount"`
} `json:"category"`
Subcategories []struct {
Slug string `json:"slug"`
Name string `json:"name"`
ResultCount int `json:"resultCount"`
} `json:"subcategories"`
} `json:"categories2"`
PreviousClients struct {
All int `json:"all"`
} `json:"previousClients"`
Proposals struct {
Five9 int `json:"5-9"`
Two049 int `json:"20-49"`
Zero4 int `json:"0-4"`
One014 int `json:"10-14"`
NAMING_FAILED int `json:"*"`
One519 int `json:"15-19"`
} `json:"proposals"`
Amount struct {
FiveThousandPlus int `json:"5000-"`
ANY int `json:"*"`
Hundred_to_Five_hundred int `json:"100-499"`
Thousand_to_five_thousand int `json:"1000-4999"`
ZeroTo_Ninty_Nine int `json:"0-99"`
FiveHundred_to_thousand int `json:"500-999"`
} `json:"amount"`
UserLocationMatch struct {
Num1 int `json:"1"`
} `json:"userLocationMatch"`
Services interface{} `json:"services"`
Specs interface{} `json:"specs"`
Attrs interface{} `json:"attrs"`
DurationV2 struct {
Week int `json:"week"`
Ongoing int `json:"ongoing"`
Month int `json:"month"`
} `json:"durationV2"`
Enterprise struct {
True int `json:"true"`
False int `json:"false"`
} `json:"enterprise"`
UserDomesticJobsLastDays struct {
Num7 int `json:"7"`
} `json:"userDomesticJobsLastDays"`
DurationV3 struct {
Weeks int `json:"weeks"`
Months int `json:"months"`
Semester int `json:"semester"`
Ongoing int `json:"ongoing"`
} `json:"durationV3"`
Occupations []struct {
TaxonomyLevel string `json:"taxonomyLevel"`
UID string `json:"uid"`
Label string `json:"label"`
Count int `json:"count"`
Occupations []struct {
TaxonomyLevel string `json:"taxonomyLevel"`
UID string `json:"uid"`
Label string `json:"label"`
Count int `json:"count"`
Occupations []struct {
TaxonomyLevel string `json:"taxonomyLevel"`
UID string `json:"uid"`
Label string `json:"label"`
Count int `json:"count"`
Occupations interface{} `json:"occupations"`
} `json:"occupations"`
} `json:"occupations"`
} `json:"occupations"`
EngagementDuration interface{} `json:"engagementDuration"`
FreelancersNeeded struct {
Zero1 int `json:"0-1"`
Six int `json:"6-"`
Two5 int `json:"2-5"`
Two int `json:"2-"`
Zero int `json:"0-"`
} `json:"freelancersNeeded"`
Location struct {
SouthAmerica int `json:"South America"`
PapuaNewGuinea int `json:"Papua New Guinea"`
Cambodia int `json:"Cambodia"`
Paraguay int `json:"Paraguay"`
Kazakhstan int `json:"Kazakhstan"`
Bahamas int `json:"Bahamas"`
MarshallIslands int `json:"Marshall Islands"`
Mali int `json:"Mali"`
Panama int `json:"Panama"`
Guadeloupe int `json:"Guadeloupe"`
EasternAsia int `json:"Eastern Asia"`
Laos int `json:"Laos"`
Argentina int `json:"Argentina"`
Seychelles int `json:"Seychelles"`
Africa int `json:"Africa"`
Zambia int `json:"Zambia"`
Belize int `json:"Belize"`
Bahrain int `json:"Bahrain"`
Congo int `json:"Congo"`
GuineaBissau int `json:"Guinea-Bissau"`
SaintBarthelemy int `json:"Saint Barthelemy"`
Namibia int `json:"Namibia"`
PalestinianTerritories int `json:"Palestinian Territories"`
Finland int `json:"Finland"`
FaroeIslands int `json:"Faroe Islands"`
NetherlandsAntilles int `json:"Netherlands Antilles"`
Georgia int `json:"Georgia"`
SaintKittsAndNevis int `json:"Saint Kitts and Nevis"`
Yemen int `json:"Yemen"`
PuertoRico int `json:"Puerto Rico"`
Madagascar int `json:"Madagascar"`
Aruba int `json:"Aruba"`
Sweden int `json:"Sweden"`
Malawi int `json:"Malawi"`
Andorra int `json:"Andorra"`
Liechtenstein int `json:"Liechtenstein"`
Poland int `json:"Poland"`
UnitedStatesVirginIslands int `json:"United States Virgin Islands"`
Jordan int `json:"Jordan"`
Bulgaria int `json:"Bulgaria"`
Tunisia int `json:"Tunisia"`
UnitedArabEmirates int `json:"United Arab Emirates"`
Kenya int `json:"Kenya"`
FrenchPolynesia int `json:"French Polynesia"`
Lebanon int `json:"Lebanon"`
Djibouti int `json:"Djibouti"`
Azerbaijan int `json:"Azerbaijan"`
CzechRepublic int `json:"Czech Republic"`
SaintLucia int `json:"Saint Lucia"`
Mauritania int `json:"Mauritania"`
Guernsey int `json:"Guernsey"`
Mayotte int `json:"Mayotte"`
SanMarino int `json:"San Marino"`
Israel int `json:"Israel"`
Australia int `json:"Australia"`
Tajikistan int `json:"Tajikistan"`
Myanmar int `json:"Myanmar"`
CentralAmerica int `json:"Central America"`
EasternAfrica int `json:"Eastern Africa"`
Cameroon int `json:"Cameroon"`
Gibraltar int `json:"Gibraltar"`
Cyprus int `json:"Cyprus"`
Malaysia int `json:"Malaysia"`
Oman int `json:"Oman"`
Iceland int `json:"Iceland"`
Armenia int `json:"Armenia"`
Gabon int `json:"Gabon"`
WesternAsia int `json:"Western Asia"`
Polynesia int `json:"Polynesia"`
Luxembourg int `json:"Luxembourg"`
Brazil int `json:"Brazil"`
TurksAndCaicosIslands int `json:"Turks and Caicos Islands"`
Algeria int `json:"Algeria"`
Slovenia int `json:"Slovenia"`
Jersey int `json:"Jersey"`
AntiguaAndBarbuda int `json:"Antigua and Barbuda"`
Ecuador int `json:"Ecuador"`
Colombia int `json:"Colombia"`
Moldova int `json:"Moldova"`
Vanuatu int `json:"Vanuatu"`
UnitedStatesMinorOutlyingIslands int `json:"United States Minor Outlying Islands"`
Italy int `json:"Italy"`
Honduras int `json:"Honduras"`
Haiti int `json:"Haiti"`
Burundi int `json:"Burundi"`
Singapore int `json:"Singapore"`
FrenchGuiana int `json:"French Guiana"`
AmericanSamoa int `json:"American Samoa"`
Russia int `json:"Russia"`
Netherlands int `json:"Netherlands"`
China int `json:"China"`
Martinique int `json:"Martinique"`
SaintPierreAndMiquelon int `json:"Saint Pierre and Miquelon"`
Kyrgyzstan int `json:"Kyrgyzstan"`
Reunion int `json:"Reunion"`
Bhutan int `json:"Bhutan"`
Romania int `json:"Romania"`
MiddleAfrica int `json:"Middle Africa"`
Togo int `json:"Togo"`
SouthernAsia int `json:"Southern Asia"`
Philippines int `json:"Philippines"`
CoteDIvoire int `json:"Cote d\'Ivoire"`
Uzbekistan int `json:"Uzbekistan"`
Asia int `json:"Asia"`
BritishVirginIslands int `json:"British Virgin Islands"`
Zimbabwe int `json:"Zimbabwe"`
BritishIndianOceanTerritory int `json:"British Indian Ocean Territory"`
Montenegro int `json:"Montenegro"`
Indonesia int `json:"Indonesia"`
Dominica int `json:"Dominica"`
Benin int `json:"Benin"`
Angola int `json:"Angola"`
EasternEurope int `json:"Eastern Europe"`
Portugal int `json:"Portugal"`
BruneiDarussalam int `json:"Brunei Darussalam"`
NewCaledonia int `json:"New Caledonia"`
Grenada int `json:"Grenada"`
Greece int `json:"Greece"`
CaymanIslands int `json:"Cayman Islands"`
Mongolia int `json:"Mongolia"`
Latvia int `json:"Latvia"`
Morocco int `json:"Morocco"`
Guyana int `json:"Guyana"`
Guatemala int `json:"Guatemala"`
Chile int `json:"Chile"`
Nepal int `json:"Nepal"`
NorthernEurope int `json:"Northern Europe"`
IsleOfMan int `json:"Isle of Man"`
Ukraine int `json:"Ukraine"`
Tanzania int `json:"Tanzania"`
Ghana int `json:"Ghana"`
HolySee int `json:"Holy See"`
CentralAsia int `json:"Central Asia"`
Anguilla int `json:"Anguilla"`
SouthEasternAsia int `json:"South-Eastern Asia"`
India int `json:"India"`
Canada int `json:"Canada"`
Maldives int `json:"Maldives"`
Turkey int `json:"Turkey"`
Belgium int `json:"Belgium"`
Taiwan int `json:"Taiwan"`
TrinidadAndTobago int `json:"Trinidad and Tobago"`
SouthernEurope int `json:"Southern Europe"`
SouthAfrica int `json:"South Africa"`
Bermuda int `json:"Bermuda"`
AlandIslands int `json:"Aland Islands"`
Jamaica int `json:"Jamaica"`
Turkmenistan int `json:"Turkmenistan"`
Peru int `json:"Peru"`
Germany int `json:"Germany"`
Americas int `json:"Americas"`
Fiji int `json:"Fiji"`
HongKong int `json:"Hong Kong"`
UnitedStates int `json:"United States"`
Guinea int `json:"Guinea"`
MicronesiaFederatedStatesOf int `json:"Micronesia, Federated States of"`
Somalia int `json:"Somalia"`
Chad int `json:"Chad"`
Thailand int `json:"Thailand"`
Kiribati int `json:"Kiribati"`
EquatorialGuinea int `json:"Equatorial Guinea"`
CostaRica int `json:"Costa Rica"`
SaintMartinFrenchPart int `json:"Saint Martin (French part)"`
Vietnam int `json:"Vietnam"`
Nigeria int `json:"Nigeria"`
Kuwait int `json:"Kuwait"`
Croatia int `json:"Croatia"`
Uruguay int `json:"Uruguay"`
SriLanka int `json:"Sri Lanka"`
CookIslands int `json:"Cook Islands"`
TimorLeste int `json:"Timor-Leste"`
UnitedKingdom int `json:"United Kingdom"`
Switzerland int `json:"Switzerland"`
Samoa int `json:"Samoa"`
Spain int `json:"Spain"`
WesternAfrica int `json:"Western Africa"`
Venezuela int `json:"Venezuela"`
BurkinaFaso int `json:"Burkina Faso"`
Swaziland int `json:"Swaziland"`
Caribbean int `json:"Caribbean"`
Estonia int `json:"Estonia"`
Niue int `json:"Niue"`
SouthKorea int `json:"South Korea"`
Austria int `json:"Austria"`
Mozambique int `json:"Mozambique"`
ElSalvador int `json:"El Salvador"`
Monaco int `json:"Monaco"`
Guam int `json:"Guam"`
Lesotho int `json:"Lesotho"`
Tonga int `json:"Tonga"`
NorthernAfrica int `json:"Northern Africa"`
WesternSahara int `json:"Western Sahara"`
Hungary int `json:"Hungary"`
Japan int `json:"Japan"`
Europe int `json:"Europe"`
Curacao int `json:"Curacao"`
Belarus int `json:"Belarus"`
Mauritius int `json:"Mauritius"`
WesternEurope int `json:"Western Europe"`
Albania int `json:"Albania"`
NewZealand int `json:"New Zealand"`
NorthernAmerica int `json:"Northern America"`
SintMaartenDutchPart int `json:"Sint Maarten (Dutch part)"`
Senegal int `json:"Senegal"`
Macedonia int `json:"Macedonia"`
Ethiopia int `json:"Ethiopia"`
Egypt int `json:"Egypt"`
SierraLeone int `json:"Sierra Leone"`
Bolivia int `json:"Bolivia"`
Oceania int `json:"Oceania"`
Malta int `json:"Malta"`
SaudiArabia int `json:"Saudi Arabia"`
CapeVerde int `json:"Cape Verde"`
Pakistan int `json:"Pakistan"`
Gambia int `json:"Gambia"`
Qatar int `json:"Qatar"`
Ireland int `json:"Ireland"`
Slovakia int `json:"Slovakia"`
Serbia int `json:"Serbia"`
Lithuania int `json:"Lithuania"`
France int `json:"France"`
BosniaAndHerzegovina int `json:"Bosnia and Herzegovina"`
AustraliaAndNewZealand int `json:"Australia and New Zealand"`
Rwanda int `json:"Rwanda"`
Bangladesh int `json:"Bangladesh"`
Nicaragua int `json:"Nicaragua"`
Barbados int `json:"Barbados"`
Norway int `json:"Norway"`
SouthernAfrica int `json:"Southern Africa"`
SaintVincentAndTheGrenadines int `json:"Saint Vincent and the Grenadines"`
Botswana int `json:"Botswana"`
Melanesia int `json:"Melanesia"`
Macao int `json:"Macao"`
DominicanRepublic int `json:"Dominican Republic"`
Denmark int `json:"Denmark"`
Uganda int `json:"Uganda"`
Mexico int `json:"Mexico"`
Suriname int `json:"Suriname"`
Greenland int `json:"Greenland"`
} `json:"location"`
Timezone struct {
int `json:""`
AmericaSaoPaulo int `json:"America/Sao_Paulo"`
AsiaVladivostok int `json:"Asia/Vladivostok"`
EuropeBerlin int `json:"Europe/Berlin"`
AfricaCairo int `json:"Africa/Cairo"`
EuropeMoscow int `json:"Europe/Moscow"`
PacificHonolulu int `json:"Pacific/Honolulu"`
AustraliaHobart int `json:"Australia/Hobart"`
EuropeLondon int `json:"Europe/London"`
AsiaBaghdad int `json:"Asia/Baghdad"`
AsiaShanghai int `json:"Asia/Shanghai"`
AmericaTijuana int `json:"America/Tijuana"`
AmericaManagua int `json:"America/Managua"`
AsiaYerevan int `json:"Asia/Yerevan"`
AsiaKamchatka int `json:"Asia/Kamchatka"`
AfricaHarare int `json:"Africa/Harare"`
AmericaNome int `json:"America/Nome"`
AsiaYakutsk int `json:"Asia/Yakutsk"`
AmericaChicago int `json:"America/Chicago"`
AmericaHalifax int `json:"America/Halifax"`
AmericaIndianaIndianapolis int `json:"America/Indiana/Indianapolis"`
EuropeParis int `json:"Europe/Paris"`
PacificFiji int `json:"Pacific/Fiji"`
AsiaTehran int `json:"Asia/Tehran"`
AmericaLaPaz int `json:"America/La_Paz"`
AsiaTashkent int `json:"Asia/Tashkent"`
AsiaBangkok int `json:"Asia/Bangkok"`
PacificMidway int `json:"Pacific/Midway"`
AmericaRecife int `json:"America/Recife"`
AmericaBuenosAires int `json:"America/Buenos_Aires"`
AustraliaAdelaide int `json:"Australia/Adelaide"`
AsiaYangon int `json:"Asia/Yangon"`
AsiaKatmandu int `json:"Asia/Katmandu"`
AsiaAlmaty int `json:"Asia/Almaty"`
AmericaPhoenix int `json:"America/Phoenix"`
EuropePrague int `json:"Europe/Prague"`
AmericaMexicoCity int `json:"America/Mexico_City"`
AsiaTbilisi int `json:"Asia/Tbilisi"`
AsiaJerusalem int `json:"Asia/Jerusalem"`
EuropeLisbon int `json:"Europe/Lisbon"`
AtlanticSouthGeorgia int `json:"Atlantic/South_Georgia"`
AsiaKarachi int `json:"Asia/Karachi"`
AustraliaPerth int `json:"Australia/Perth"`
AustraliaDarwin int `json:"Australia/Darwin"`
AsiaCalcutta int `json:"Asia/Calcutta"`
AmericaBogota int `json:"America/Bogota"`
AsiaKabul int `json:"Asia/Kabul"`
AmericaNewYork int `json:"America/New_York"`
AtlanticAzores int `json:"Atlantic/Azores"`
AsiaKrasnoyarsk int `json:"Asia/Krasnoyarsk"`
EET int `json:"EET"`
PacificAuckland int `json:"Pacific/Auckland"`
EuropeMinsk int `json:"Europe/Minsk"`
AfricaCasablanca int `json:"Africa/Casablanca"`
AmericaCaracas int `json:"America/Caracas"`
EuropeKiev int `json:"Europe/Kiev"`
AsiaMagadan int `json:"Asia/Magadan"`
AmericaRegina int `json:"America/Regina"`
PacificGuam int `json:"Pacific/Guam"`
AsiaIrkutsk int `json:"Asia/Irkutsk"`
AfricaAlgiers int `json:"Africa/Algiers"`
AmericaStJohns int `json:"America/St_Johns"`
AmericaFortaleza int `json:"America/Fortaleza"`
AmericaDenver int `json:"America/Denver"`
AmericaIndianaKnox int `json:"America/Indiana/Knox"`
EtcUTC int `json:"Etc/UTC"`
AustraliaSydney int `json:"Australia/Sydney"`
PacificApia int `json:"Pacific/Apia"`
AsiaTokyo int `json:"Asia/Tokyo"`
AsiaIstanbul int `json:"Asia/Istanbul"`
AsiaOmsk int `json:"Asia/Omsk"`
AustraliaBrisbane int `json:"Australia/Brisbane"`
AmericaLosAngeles int `json:"America/Los_Angeles"`
AsiaYekaterinburg int `json:"Asia/Yekaterinburg"`
EuropeAthens int `json:"Europe/Athens"`
} `json:"timezone"`
ConnectPrice struct {
Num4 int `json:"4"`
Num6 int `json:"6"`
Zero2 int `json:"0-2"`
} `json:"connectPrice"`
ContractToHire struct {
True int `json:"true"`
False int `json:"false"`
} `json:"contractToHire"`
Categories []interface{} `json:"categories"`
Subcategories []interface{} `json:"subcategories"`
PaymentVerified struct {
Num1 int `json:"1"`
} `json:"paymentVerified"`
} `json:"facets"`
IsSearchWithEmptyParams bool `json:"isSearchWithEmptyParams"`
CurrentQuery struct {
Sort string `json:"sort"`
} `json:"currentQuery"`
QueryParsedParams struct {
Sort string `json:"sort"`
Paging string `json:"paging"`
} `json:"queryParsedParams"`
PageTitle string `json:"pageTitle"`
JobSearchError bool `json:"jobSearchError"`
RssLink string `json:"rssLink"`
AtomLink string `json:"atomLink"`
SmartSearch struct {
DownloadTeamApplication bool `json:"downloadTeamApplication"`
} `json:"smartSearch"`
} `json:"searchResults"`
Filters struct {
Q string `json:"q"`
Sort string `json:"sort"`
Skills struct {
Name string `json:"name"`
Label string `json:"label"`
Options []interface{} `json:"options"`
} `json:"skills"`
Categories []struct {
URLName string `json:"urlName"`
Value string `json:"value"`
Label string `json:"label"`
ActiveLabel string `json:"activeLabel"`
Checked bool `json:"checked"`
Count int `json:"count"`
Subcategories []struct {
URLName string `json:"urlName"`
Value string `json:"value"`
Label string `json:"label"`
ActiveLabel string `json:"activeLabel"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"subcategories"`
} `json:"categories"`
JobType struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
LoggingLabel string `json:"loggingLabel"`
Options []struct {
Value string `json:"value"`
Label string `json:"label"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"options"`
} `json:"jobType"`
ContractorTier struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
LoggingLabel string `json:"loggingLabel"`
Options []struct {
Value string `json:"value"`
Label string `json:"label"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"options"`
} `json:"contractorTier"`
ClientHires struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
LoggingLabel string `json:"loggingLabel"`
Options []struct {
Value string `json:"value"`
Label string `json:"label"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"options"`
} `json:"clientHires"`
Proposals struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
LoggingLabel string `json:"loggingLabel"`
Options []struct {
Value string `json:"value"`
Label string `json:"label"`
Checked bool `json:"checked"`
Count int `json:"count"`
ActiveLabel string `json:"activeLabel"`
} `json:"options"`
} `json:"proposals"`
Amount struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
DisabledJobTypeValue string `json:"disabledJobTypeValue"`
DisabledMessage string `json:"disabledMessage"`
Options []struct {
Value string `json:"value"`
Label string `json:"label"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"options"`
CustomAmount struct {
Value interface{} `json:"value"`
Label string `json:"label"`
Checked bool `json:"checked"`
CustomMin interface{} `json:"customMin"`
CustomMax interface{} `json:"customMax"`
} `json:"customAmount"`
} `json:"amount"`
Workload struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
LoggingLabel string `json:"loggingLabel"`
Options []struct {
Value string `json:"value"`
Label string `json:"label"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"options"`
DisabledJobTypeValue string `json:"disabledJobTypeValue"`
DisabledMessage string `json:"disabledMessage"`
} `json:"workload"`
DurationV3 struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
LoggingLabel string `json:"loggingLabel"`
Options []struct {
Value string `json:"value"`
Label string `json:"label"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"options"`
DisabledJobTypeValue string `json:"disabledJobTypeValue"`
DisabledMessage string `json:"disabledMessage"`
} `json:"duration_v3"`
PreviousClients struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
Value string `json:"value"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"previousClients"`
PaymentVerified struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
Value string `json:"value"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"paymentVerified"`
TalentClouds struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
Options []interface{} `json:"options"`
} `json:"talentClouds"`
UserLocationMatch struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
Value string `json:"value"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"userLocationMatch"`
Occupation struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
Options []interface{} `json:"options"`
} `json:"occupation"`
OntologySkills struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
Options []interface{} `json:"options"`
} `json:"ontologySkills"`
HourlyRate struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
Options []interface{} `json:"options"`
CustomHourlyRate struct {
Value interface{} `json:"value"`
Label string `json:"label"`
Checked bool `json:"checked"`
CustomMin interface{} `json:"customMin"`
CustomMax interface{} `json:"customMax"`
} `json:"customHourlyRate"`
} `json:"hourly_rate"`
FreelancersNeeded struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
LoggingLabel string `json:"loggingLabel"`
Options []struct {
Value string `json:"value"`
Label string `json:"label"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"options"`
} `json:"freelancers_needed"`
Location struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
Options []struct {
UID string `json:"uid"`
Label string `json:"label"`
SubRegionUID string `json:"subRegionUid,omitempty"`
RegionUID string `json:"regionUid,omitempty"`
Value string `json:"value"`
Type string `json:"type"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"options"`
} `json:"location"`
Timezones struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
Options []struct {
Value string `json:"value"`
Label string `json:"label"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"options"`
} `json:"timezones"`
Connects struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
LoggingLabel string `json:"loggingLabel"`
Options []struct {
Value string `json:"value"`
Label string `json:"label"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"options"`
} `json:"connects"`
ContractToHire struct {
Name string `json:"name"`
Label string `json:"label"`
LoggingSublocation string `json:"loggingSublocation"`
Value string `json:"value"`
Checked bool `json:"checked"`
Count int `json:"count"`
} `json:"contractToHire"`
} `json:"filters"`
}

104
upwork/upworkClient.go Normal file
View File

@ -0,0 +1,104 @@
package upwork
import (
"bufio"
"encoding/json"
"log"
"os"
"scrapers/network"
"strings"
)
type Upwork struct {
UpworkHttpClient *network.Client
}
type GraphQLRequest struct {
Query string `json:"query"`
Variables map[string]interface{} `json:"variables"`
}
func (u Upwork) SendRequest(query string, variables map[string]interface{}) (string, error) {
url := "https://www.upwork.com/api/graphql/v1"
requestBody := GraphQLRequest{
Query: query,
Variables: variables,
}
jsonBody, err := json.Marshal(requestBody)
if err != nil {
return "", err
}
resp, err := u.UpworkHttpClient.PostRequest(url, string(jsonBody))
if err != nil {
return "", err
}
return resp.Body, nil
}
func readGraphQLQuery(filename string) (string, error) {
content, err := os.ReadFile(filename)
if err != nil {
return "", err
}
return string(content), nil
}
func mergeMaps(m1, m2 map[string]string) map[string]string {
for k, v := range m2 {
m1[k] = v
}
return m1
}
func readEnv(filename string) (map[string]string, error) {
file, err := os.Open(filename)
if err != nil {
return nil, err
}
defer file.Close()
m := make(map[string]string)
scanner := bufio.NewScanner(file)
for scanner.Scan() {
parts := strings.SplitN(scanner.Text(), "=", 2)
if len(parts) == 2 {
key := strings.TrimSpace(parts[0])
value := strings.TrimSpace(parts[1])
m[key] = value
}
}
return m, nil
}
func InitUpwork() *Upwork {
headers := map[string]string{
"authority": "www.upwork.com",
"accept": "application/json, text/plain",
"accept-language": "en",
"content-type": "application/json",
"cache-control": "no-cache",
"pragma": "no-cache",
"referer": "https://www.upwork.com/search/jobs/",
"sec-fetch-site": "same-origin",
"sec-gpc": "1",
"x-requested-with": "XMLHttpRequest",
}
auth_headers, err := readEnv("upwork/.env")
if err != nil {
log.Fatal("Could not read .env file, please add .env file in upwork folder")
}
headers = mergeMaps(auth_headers, headers)
client := network.InitClient(headers)
upwork := Upwork{
UpworkHttpClient: client,
}
return &upwork
}

154
upwork/upworkPipeline.go Normal file
View File

@ -0,0 +1,154 @@
package upwork
import (
"encoding/json"
"fmt"
"log"
"os"
"time"
)
type UpworkPipeLine struct {
upworkClient *Upwork
}
func InitPipeline() *UpworkPipeLine {
return &UpworkPipeLine{
upworkClient: InitUpwork(),
}
}
func (u *UpworkPipeLine) appendToJSONL(data []interface{}, filename string) error {
file, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return err
}
defer file.Close()
for _, item := range data {
jsonLine, err := json.Marshal(item)
if err != nil {
return err
}
if _, err := file.Write(append(jsonLine, '\n')); err != nil {
return err
}
}
return nil
}
func (u *UpworkPipeLine) isResponseValid(data string) bool {
var resp map[string]interface{}
err := json.Unmarshal([]byte(data), &resp)
if err != nil {
return false
}
_, hasErrors := resp["errors"]
return !hasErrors
}
func (u *UpworkPipeLine) getTotalDocuments(query string, variables map[string]interface{}) (int, error) {
resp, err := u.upworkClient.SendRequest(query, variables)
if err != nil {
return 0, err
}
if !u.isResponseValid(resp) {
return 0, fmt.Errorf("invalid response")
}
var graphQLResp map[string]interface{}
err = json.Unmarshal([]byte(resp), &graphQLResp)
if err != nil {
return 0, err
}
paging, ok := graphQLResp["data"].(map[string]interface{})["search"].(map[string]interface{})["universalSearchNuxt"].(map[string]interface{})["userJobSearchV1"].(map[string]interface{})["paging"].(map[string]interface{})
if !ok {
return 0, fmt.Errorf("unexpected response structure")
}
total, ok := paging["total"].(float64)
if !ok {
return 0, fmt.Errorf("total is not a number")
}
return int(total), nil
}
func (u *UpworkPipeLine) handleRequest(query string, variables map[string]interface{}, filename string) error {
resp, err := u.upworkClient.SendRequest(query, variables)
if err != nil {
return err
}
if !u.isResponseValid(resp) {
//print the response
fmt.Println(resp)
return fmt.Errorf("invalid response returned")
}
var graphQLResp map[string]interface{}
err = json.Unmarshal([]byte(resp), &graphQLResp)
if err != nil {
return err
}
results, ok := graphQLResp["data"].(map[string]interface{})["search"].(map[string]interface{})["universalSearchNuxt"].(map[string]interface{})["userJobSearchV1"].(map[string]interface{})["results"].([]interface{})
if !ok {
return fmt.Errorf("unexpected response structure")
}
return u.appendToJSONL(results, filename)
}
func (u *UpworkPipeLine) Run(userQuery string) error {
query, err := readGraphQLQuery("JobSearchQuery.gql")
if err != nil {
return err
}
variables := map[string]interface{}{
"requestVariables": map[string]interface{}{
"userQuery": userQuery,
"sort": "recency",
"highlight": true,
"paging": map[string]interface{}{
"offset": 0,
"count": 50,
},
},
}
total_docs, err := u.getTotalDocuments(query, variables)
if err != nil {
return err
}
fmt.Printf("%s has a total of %d jobs\n", userQuery, total_docs)
iterations := total_docs / 50
// if iterations > 100 {
// iterations = 100
// }
fmt.Printf("A total of %d iterations will be performed\n", iterations)
now := time.Now()
dateString := now.Format("2006-01-02")
filename := fmt.Sprintf("upwork_jobs_%s.jsonl", dateString)
for i := 0; i < iterations; i++ {
fmt.Printf("Processing iteration %d of %d\n", i+1, iterations)
variables["requestVariables"].(map[string]interface{})["paging"].(map[string]interface{})["offset"] = i * 50
err := u.handleRequest(query, variables, filename)
if err != nil {
log.Printf("Error in iteration %d: %v", i+1, err)
}
}
fmt.Printf("Job data has been written to %s\n", filename)
return nil
}