From 04c25ef47603f6a45c8078fe590ae79da6e6a9b8 Mon Sep 17 00:00:00 2001 From: Hashir Omer Date: Fri, 6 Jan 2023 10:13:33 +0000 Subject: [PATCH] Getting cookies from .env file --- main.go | 6 +- upwork/types.go | 11 +++- upwork/upworkClient.go | 53 +++++++++++++++- upwork/upworkPipeline.go | 131 ++++++++++++++++++++++----------------- 4 files changed, 135 insertions(+), 66 deletions(-) diff --git a/main.go b/main.go index 7f70ff0..71fda9b 100644 --- a/main.go +++ b/main.go @@ -1,13 +1,11 @@ package main -import ( - "scrapers/upwork" -) +import "scrapers/upwork" func main() { p := upwork.InitPipeline() - err := p.Run("pdf") + err := p.Run("") if err != nil { panic(err) } diff --git a/upwork/types.go b/upwork/types.go index 2bce7d9..847af59 100644 --- a/upwork/types.go +++ b/upwork/types.go @@ -2,6 +2,13 @@ package upwork import "time" +type LoggedOutError struct { + Error struct { + Code int `json:"code"` + Message string `json:"message"` + } +} + type UpworkApiResponse struct { URL string `json:"url"` SearchGUID string `json:"searchGuid"` @@ -276,7 +283,6 @@ type UpworkApiResponse struct { Jordan int `json:"Jordan"` Bulgaria int `json:"Bulgaria"` Tunisia int `json:"Tunisia"` - CongoTheDemocraticRepublicOfThe int `json:"Congo, the Democratic Republic of the"` UnitedArabEmirates int `json:"United Arab Emirates"` Kenya int `json:"Kenya"` FrenchPolynesia int `json:"French Polynesia"` @@ -337,7 +343,7 @@ type UpworkApiResponse struct { Togo int `json:"Togo"` SouthernAsia int `json:"Southern Asia"` Philippines int `json:"Philippines"` - CoteDIvoire int `json:"Cote d'Ivoire"` + CoteDIvoire int `json:"Cote d\'Ivoire"` Uzbekistan int `json:"Uzbekistan"` Asia int `json:"Asia"` BritishVirginIslands int `json:"British Virgin Islands"` @@ -474,7 +480,6 @@ type UpworkApiResponse struct { Uganda int `json:"Uganda"` Mexico int `json:"Mexico"` Suriname int `json:"Suriname"` - Micronesia int `json:"Micronesia"` Greenland int `json:"Greenland"` } `json:"location"` Timezone struct { diff --git a/upwork/upworkClient.go b/upwork/upworkClient.go index 6dd8b88..2eb13fe 100644 --- a/upwork/upworkClient.go +++ b/upwork/upworkClient.go @@ -1,8 +1,12 @@ package upwork import ( + "bufio" "fmt" + "log" + "os" "scrapers/network" + "strings" ) type Upwork struct { @@ -16,8 +20,8 @@ type UrlArgs struct { } func (u Upwork) ConstructUrl(args UrlArgs) string { - url := "https://www.upwork.com/ab/jobs/search/url?q=%s&per_page=%d&sort=recency&payment_verified=1&page=%d" - return fmt.Sprintf(url, args.Query, args.Per_Page, args.Page) + url := "https://www.upwork.com/ab/jobs/search/url?per_page=%d&sort=recency&payment_verified=1&page=%d&q=%s" + return fmt.Sprintf(url, args.Per_Page, args.Page, args.Query) } func (u Upwork) SendRequest(url string) (string, error) { @@ -31,6 +35,45 @@ func (u Upwork) SendRequest(url string) (string, error) { } +func mergeMaps(m1, m2 map[string]string) map[string]string { + // Iterate over m2 and add its key-value pairs to m1 + for k, v := range m2 { + m1[k] = v + } + return m1 +} + +func readEnv(filename string) (map[string]string, error) { + + // Open the .env file + file, err := os.Open(filename) + if err != nil { + fmt.Print() + return nil, err + } + defer file.Close() + + // Create a map to store the key-value pairs + m := make(map[string]string) + + // Read the file line by line + scanner := bufio.NewScanner(file) + for scanner.Scan() { + // Split the line on the "=" character + parts := strings.SplitN(scanner.Text(), "=", 2) + if len(parts) == 2 { + // Trim leading and trailing whitespace from the key and value + key := strings.TrimSpace(parts[0]) + value := strings.TrimSpace(parts[1]) + + // Add the key-value pair to the map + m[key] = value + } + } + + return m, nil +} + func InitUpwork() *Upwork { headers := map[string]string{ "authority": "www.upwork.com", @@ -47,6 +90,12 @@ func InitUpwork() *Upwork { "x-odesk-user-agent": "oDesk LM", "x-requested-with": "XMLHttpRequest", } + auth_headers, err := readEnv("upwork/.env") + if err != nil { + log.Fatal("Could not read .env.auth file") + } + headers = mergeMaps(auth_headers, headers) + client := network.InitClient(headers) upwork := Upwork{ UpworkHttpClient: client, diff --git a/upwork/upworkPipeline.go b/upwork/upworkPipeline.go index fae6bd9..d662e3a 100644 --- a/upwork/upworkPipeline.go +++ b/upwork/upworkPipeline.go @@ -24,10 +24,6 @@ func InitPipeline() *UpworkPipeLine { func (u *UpworkPipeLine) CombineFiles() error { var all_jobs []interface{} - // all_filenames, err := filepath.Glob("data/*.json") - // if err != nil { - // panic(err) - // } for _, file := range u.filepaths { data, err := os.ReadFile(file) @@ -76,41 +72,60 @@ func (u *UpworkPipeLine) saveToFile(data []byte, filename string) error { } -func isJSON(s string) bool { +func isValidJSON(s string) bool { var js map[string]interface{} return json.Unmarshal([]byte(s), &js) == nil } -func isApiError(data string) bool { +// func print_json(p string) { +// b, err := json.MarshalIndent(p, "", " ") +// if err != nil { +// fmt.Println(err) +// return +// } +// fmt.Println(string(b)) +// } - var result map[string]interface{} +func isValidApiResponse(data string) bool { + + // Deserialize the JSON string into the Person struct + var api_error LoggedOutError + var api_response UpworkApiResponse + + // Deserialize the JSON string into the Person struct + is_api_error := json.Unmarshal([]byte(data), &api_error) + api_resp := json.Unmarshal([]byte(data), &api_response) + + //A known error occured + if is_api_error == nil && api_resp != nil { + + log.Print(api_error.Error.Message) + return false + //Unknown response format + } else if is_api_error != nil && api_resp != nil { + + fmt.Print("Unknown response format") + fmt.Print(data) + return false + + } else { + + return !api_response.SearchResults.JobSearchError - //Parse data as json - err := json.Unmarshal([]byte(data), &result) - if err != nil { - log.Print("The Api did not return expected response") - log.Print("The following was the response from API") - log.Print(data) - panic(err) } - //Get value from key - key := "searchResults" - value := result[key] - - //Check for errors - is_error := value.(map[string]interface{})["jobSearchError"] - - return is_error == true } -func (u *UpworkPipeLine) validateResponse(data string) bool { - return isJSON(data) && !isApiError(data) +func (u *UpworkPipeLine) isResponseValid(data string) bool { + + return isValidJSON(data) && isValidApiResponse(data) } func (u *UpworkPipeLine) getTotalDocuments(urlArgs UrlArgs) (int, error) { + + var API_Response UpworkApiResponse client := u.upworkClient url := client.ConstructUrl(urlArgs) resp, err := u.upworkClient.SendRequest(url) @@ -122,12 +137,10 @@ func (u *UpworkPipeLine) getTotalDocuments(urlArgs UrlArgs) (int, error) { } // check if response is valid - if !u.validateResponse(resp) { + if !u.isResponseValid(resp) { return 0, fmt.Errorf("invalid response") } - var API_Response UpworkApiResponse - json.Unmarshal([]byte(resp), &API_Response) total_docs := API_Response.SearchResults.Paging.Total @@ -144,7 +157,7 @@ func (u *UpworkPipeLine) handleRequest(urlArgs UrlArgs, iteration int) { log.Fatal(err) //check if response is valid - } else if u.validateResponse(resp) { + } else if u.isResponseValid(resp) { filename := fmt.Sprintf("data/%d.json", iteration) // Convert resp to array of bytes err = u.saveToFile([]byte(resp), filename) @@ -178,7 +191,13 @@ func (u *UpworkPipeLine) Run(query string) error { var perPage int var total_docs int - info_message := fmt.Sprintf("Finding Total Jobs for %s", query) + qum := query + + if qum == "" { + qum = "Empty String" + } + + info_message := fmt.Sprintf("Finding Total Jobs for %s", qum) fmt.Println(info_message) urlArgs := UrlArgs{ @@ -191,36 +210,34 @@ func (u *UpworkPipeLine) Run(query string) error { perPage = 50 total_docs, err = u.getTotalDocuments(urlArgs) - log.Print(total_docs) - if err == nil { - info_message := fmt.Sprintf("%s has a total of %d jobs", query, total_docs) - fmt.Println(info_message) - iteration = total_docs / perPage - - if iteration >= 100 { - iteration = 100 - } - - info_message = fmt.Sprintf("A total of %d iterations will be performed", iteration) - fmt.Println(info_message) - - //Found total iterations - u.handledataIteration(perPage, iteration, query) - err = u.CombineFiles() - os.RemoveAll("data") - - if err != nil { - panic(err) - - } - - return nil - - } else { - log.Fatal("Could not retrive total number of jobs") - panic(err) + if err != nil { + log.Print("Could not retrive total number of jobs") + log.Fatal(err) } + info_message = fmt.Sprintf("%s has a total of %d jobs", query, total_docs) + fmt.Println(info_message) + iteration = total_docs / perPage + + if iteration >= 100 { + iteration = 100 + } + + info_message = fmt.Sprintf("A total of %d iterations will be performed", iteration) + fmt.Println(info_message) + + //Found total iterations + u.handledataIteration(perPage, iteration, query) + err = u.CombineFiles() + os.RemoveAll("data") + + if err != nil { + panic(err) + + } + + return nil + } func (u *UpworkPipeLine) handledataIteration(p_per int, iters int, query string) {