mirror of
https://github.com/hashiromer/Upwork-Jobs-scraper-.git
synced 2025-12-29 16:16:01 +00:00
Getting cookies from .env file
This commit is contained in:
@@ -24,10 +24,6 @@ func InitPipeline() *UpworkPipeLine {
|
||||
func (u *UpworkPipeLine) CombineFiles() error {
|
||||
|
||||
var all_jobs []interface{}
|
||||
// all_filenames, err := filepath.Glob("data/*.json")
|
||||
// if err != nil {
|
||||
// panic(err)
|
||||
// }
|
||||
|
||||
for _, file := range u.filepaths {
|
||||
data, err := os.ReadFile(file)
|
||||
@@ -76,41 +72,60 @@ func (u *UpworkPipeLine) saveToFile(data []byte, filename string) error {
|
||||
|
||||
}
|
||||
|
||||
func isJSON(s string) bool {
|
||||
func isValidJSON(s string) bool {
|
||||
var js map[string]interface{}
|
||||
return json.Unmarshal([]byte(s), &js) == nil
|
||||
|
||||
}
|
||||
|
||||
func isApiError(data string) bool {
|
||||
// func print_json(p string) {
|
||||
// b, err := json.MarshalIndent(p, "", " ")
|
||||
// if err != nil {
|
||||
// fmt.Println(err)
|
||||
// return
|
||||
// }
|
||||
// fmt.Println(string(b))
|
||||
// }
|
||||
|
||||
var result map[string]interface{}
|
||||
func isValidApiResponse(data string) bool {
|
||||
|
||||
// Deserialize the JSON string into the Person struct
|
||||
var api_error LoggedOutError
|
||||
var api_response UpworkApiResponse
|
||||
|
||||
// Deserialize the JSON string into the Person struct
|
||||
is_api_error := json.Unmarshal([]byte(data), &api_error)
|
||||
api_resp := json.Unmarshal([]byte(data), &api_response)
|
||||
|
||||
//A known error occured
|
||||
if is_api_error == nil && api_resp != nil {
|
||||
|
||||
log.Print(api_error.Error.Message)
|
||||
return false
|
||||
//Unknown response format
|
||||
} else if is_api_error != nil && api_resp != nil {
|
||||
|
||||
fmt.Print("Unknown response format")
|
||||
fmt.Print(data)
|
||||
return false
|
||||
|
||||
} else {
|
||||
|
||||
return !api_response.SearchResults.JobSearchError
|
||||
|
||||
//Parse data as json
|
||||
err := json.Unmarshal([]byte(data), &result)
|
||||
if err != nil {
|
||||
log.Print("The Api did not return expected response")
|
||||
log.Print("The following was the response from API")
|
||||
log.Print(data)
|
||||
panic(err)
|
||||
}
|
||||
|
||||
//Get value from key
|
||||
key := "searchResults"
|
||||
value := result[key]
|
||||
|
||||
//Check for errors
|
||||
is_error := value.(map[string]interface{})["jobSearchError"]
|
||||
|
||||
return is_error == true
|
||||
}
|
||||
|
||||
func (u *UpworkPipeLine) validateResponse(data string) bool {
|
||||
return isJSON(data) && !isApiError(data)
|
||||
func (u *UpworkPipeLine) isResponseValid(data string) bool {
|
||||
|
||||
return isValidJSON(data) && isValidApiResponse(data)
|
||||
|
||||
}
|
||||
|
||||
func (u *UpworkPipeLine) getTotalDocuments(urlArgs UrlArgs) (int, error) {
|
||||
|
||||
var API_Response UpworkApiResponse
|
||||
client := u.upworkClient
|
||||
url := client.ConstructUrl(urlArgs)
|
||||
resp, err := u.upworkClient.SendRequest(url)
|
||||
@@ -122,12 +137,10 @@ func (u *UpworkPipeLine) getTotalDocuments(urlArgs UrlArgs) (int, error) {
|
||||
}
|
||||
|
||||
// check if response is valid
|
||||
if !u.validateResponse(resp) {
|
||||
if !u.isResponseValid(resp) {
|
||||
return 0, fmt.Errorf("invalid response")
|
||||
}
|
||||
|
||||
var API_Response UpworkApiResponse
|
||||
|
||||
json.Unmarshal([]byte(resp), &API_Response)
|
||||
|
||||
total_docs := API_Response.SearchResults.Paging.Total
|
||||
@@ -144,7 +157,7 @@ func (u *UpworkPipeLine) handleRequest(urlArgs UrlArgs, iteration int) {
|
||||
log.Fatal(err)
|
||||
|
||||
//check if response is valid
|
||||
} else if u.validateResponse(resp) {
|
||||
} else if u.isResponseValid(resp) {
|
||||
filename := fmt.Sprintf("data/%d.json", iteration)
|
||||
// Convert resp to array of bytes
|
||||
err = u.saveToFile([]byte(resp), filename)
|
||||
@@ -178,7 +191,13 @@ func (u *UpworkPipeLine) Run(query string) error {
|
||||
var perPage int
|
||||
var total_docs int
|
||||
|
||||
info_message := fmt.Sprintf("Finding Total Jobs for %s", query)
|
||||
qum := query
|
||||
|
||||
if qum == "" {
|
||||
qum = "Empty String"
|
||||
}
|
||||
|
||||
info_message := fmt.Sprintf("Finding Total Jobs for %s", qum)
|
||||
fmt.Println(info_message)
|
||||
|
||||
urlArgs := UrlArgs{
|
||||
@@ -191,36 +210,34 @@ func (u *UpworkPipeLine) Run(query string) error {
|
||||
perPage = 50
|
||||
total_docs, err = u.getTotalDocuments(urlArgs)
|
||||
|
||||
log.Print(total_docs)
|
||||
if err == nil {
|
||||
info_message := fmt.Sprintf("%s has a total of %d jobs", query, total_docs)
|
||||
fmt.Println(info_message)
|
||||
iteration = total_docs / perPage
|
||||
|
||||
if iteration >= 100 {
|
||||
iteration = 100
|
||||
}
|
||||
|
||||
info_message = fmt.Sprintf("A total of %d iterations will be performed", iteration)
|
||||
fmt.Println(info_message)
|
||||
|
||||
//Found total iterations
|
||||
u.handledataIteration(perPage, iteration, query)
|
||||
err = u.CombineFiles()
|
||||
os.RemoveAll("data")
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
} else {
|
||||
log.Fatal("Could not retrive total number of jobs")
|
||||
panic(err)
|
||||
if err != nil {
|
||||
log.Print("Could not retrive total number of jobs")
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
info_message = fmt.Sprintf("%s has a total of %d jobs", query, total_docs)
|
||||
fmt.Println(info_message)
|
||||
iteration = total_docs / perPage
|
||||
|
||||
if iteration >= 100 {
|
||||
iteration = 100
|
||||
}
|
||||
|
||||
info_message = fmt.Sprintf("A total of %d iterations will be performed", iteration)
|
||||
fmt.Println(info_message)
|
||||
|
||||
//Found total iterations
|
||||
u.handledataIteration(perPage, iteration, query)
|
||||
err = u.CombineFiles()
|
||||
os.RemoveAll("data")
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
func (u *UpworkPipeLine) handledataIteration(p_per int, iters int, query string) {
|
||||
|
||||
Reference in New Issue
Block a user