From 5d699cdde056fce24ec2da89538a307a2491d1d8 Mon Sep 17 00:00:00 2001 From: mzack Date: Wed, 29 Dec 2021 09:48:46 +0100 Subject: [PATCH 1/4] Adding support for full navigation history to headless matchers --- v2/pkg/protocols/headless/engine/page.go | 16 +++++++++++++ v2/pkg/protocols/headless/engine/rules.go | 29 ++++++++++++++++++++++- v2/pkg/protocols/headless/operators.go | 5 +++- v2/pkg/protocols/headless/request.go | 2 +- 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/v2/pkg/protocols/headless/engine/page.go b/v2/pkg/protocols/headless/engine/page.go index afd59fd29..ba83d3db4 100644 --- a/v2/pkg/protocols/headless/engine/page.go +++ b/v2/pkg/protocols/headless/engine/page.go @@ -2,6 +2,7 @@ package engine import ( "net/url" + "strings" "time" "github.com/go-rod/rod" @@ -14,6 +15,12 @@ type Page struct { rules []requestRule instance *Instance router *rod.HijackRouter + History []HistoryData +} + +type HistoryData struct { + RawRequest string + RawResponse string } // Run runs a list of actions by creating a new page in the browser. @@ -81,3 +88,12 @@ func (p *Page) URL() string { } return info.URL } + +func (p *Page) DumpHistory() string { + var historyDump strings.Builder + for _, historyData := range p.History { + historyDump.WriteString(historyData.RawRequest) + historyDump.WriteString(historyData.RawResponse) + } + return historyDump.String() +} diff --git a/v2/pkg/protocols/headless/engine/rules.go b/v2/pkg/protocols/headless/engine/rules.go index 8dc206876..17b66ee42 100644 --- a/v2/pkg/protocols/headless/engine/rules.go +++ b/v2/pkg/protocols/headless/engine/rules.go @@ -2,6 +2,8 @@ package engine import ( "fmt" + "net/http/httputil" + "strings" "github.com/go-rod/rod" ) @@ -10,7 +12,6 @@ import ( func (p *Page) routingRuleHandler(ctx *rod.Hijack) { // usually browsers don't use chunked transfer encoding, so we set the content-length nevertheless ctx.Request.Req().ContentLength = int64(len(ctx.Request.Body())) - for _, rule := range p.rules { if rule.Part != "request" { continue @@ -51,4 +52,30 @@ func (p *Page) routingRuleHandler(ctx *rod.Hijack) { ctx.Response.SetBody(rule.Args["body"]) } } + + // store history + req := ctx.Request.Req() + var rawReq string + if raw, err := httputil.DumpRequestOut(req, true); err == nil { + rawReq = string(raw) + } + + // attempts to rebuild the response + var rawResp strings.Builder + respPayloads := ctx.Response.Payload() + if respPayloads != nil { + rawResp.WriteString(fmt.Sprintf("HTTP/1.1 %d %s\n", respPayloads.ResponseCode, respPayloads.ResponsePhrase)) + for _, header := range respPayloads.ResponseHeaders { + rawResp.WriteString(fmt.Sprintf("%s: %s\n", header.Name, header.Value)) + } + rawResp.WriteString("\n") + rawResp.WriteString(ctx.Response.Body()) + } + + // dump request + historyData := HistoryData{ + RawRequest: rawReq, + RawResponse: rawResp.String(), + } + p.History = append(p.History, historyData) } diff --git a/v2/pkg/protocols/headless/operators.go b/v2/pkg/protocols/headless/operators.go index b9f9b4ccc..5d48371cf 100644 --- a/v2/pkg/protocols/headless/operators.go +++ b/v2/pkg/protocols/headless/operators.go @@ -54,6 +54,8 @@ func (request *Request) getMatchPart(part string, data output.InternalEvent) (st switch part { case "body", "resp", "": part = "data" + case "history": + part = "history" } item, ok := data[part] @@ -66,12 +68,13 @@ func (request *Request) getMatchPart(part string, data output.InternalEvent) (st } // responseToDSLMap converts a headless response to a map for use in DSL matching -func (request *Request) responseToDSLMap(resp, req, host, matched string) output.InternalEvent { +func (request *Request) responseToDSLMap(resp, req, host, matched string, history string) output.InternalEvent { return output.InternalEvent{ "host": host, "matched": matched, "req": req, "data": resp, + "history": history, "type": request.Type().String(), "template-id": request.options.TemplateID, "template-info": request.options.TemplateInfo, diff --git a/v2/pkg/protocols/headless/request.go b/v2/pkg/protocols/headless/request.go index 639909c4e..41c268843 100644 --- a/v2/pkg/protocols/headless/request.go +++ b/v2/pkg/protocols/headless/request.go @@ -66,7 +66,7 @@ func (request *Request) ExecuteWithResults(inputURL string, metadata, previous o if err == nil { responseBody, _ = html.HTML() } - outputEvent := request.responseToDSLMap(responseBody, reqBuilder.String(), inputURL, inputURL) + outputEvent := request.responseToDSLMap(responseBody, reqBuilder.String(), inputURL, inputURL, page.DumpHistory()) for k, v := range out { outputEvent[k] = v } From 0a6b84639be2c86440dba7750303f8003f52f411 Mon Sep 17 00:00:00 2001 From: mzack Date: Wed, 29 Dec 2021 09:51:50 +0100 Subject: [PATCH 2/4] adding missing comments --- v2/pkg/protocols/headless/engine/page.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/v2/pkg/protocols/headless/engine/page.go b/v2/pkg/protocols/headless/engine/page.go index ba83d3db4..7c7ad1cc4 100644 --- a/v2/pkg/protocols/headless/engine/page.go +++ b/v2/pkg/protocols/headless/engine/page.go @@ -18,6 +18,7 @@ type Page struct { History []HistoryData } +// HistoryData contains the page request/response pairs type HistoryData struct { RawRequest string RawResponse string @@ -89,6 +90,7 @@ func (p *Page) URL() string { return info.URL } +// DumpHistory returns the full page navigation history func (p *Page) DumpHistory() string { var historyDump strings.Builder for _, historyData := range p.History { From 7251a2ef602b12bbe87c80be2df5a5120832f837 Mon Sep 17 00:00:00 2001 From: mzack Date: Thu, 30 Dec 2021 12:59:42 +0100 Subject: [PATCH 3/4] implementing requested changes --- v2/pkg/protocols/headless/engine/page.go | 23 ++++++++++++++++++----- v2/pkg/protocols/headless/engine/rules.go | 8 +++++--- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/v2/pkg/protocols/headless/engine/page.go b/v2/pkg/protocols/headless/engine/page.go index 7c7ad1cc4..dbe9935fe 100644 --- a/v2/pkg/protocols/headless/engine/page.go +++ b/v2/pkg/protocols/headless/engine/page.go @@ -3,6 +3,7 @@ package engine import ( "net/url" "strings" + "sync" "time" "github.com/go-rod/rod" @@ -11,11 +12,12 @@ import ( // Page is a single page in an isolated browser instance type Page struct { - page *rod.Page - rules []requestRule - instance *Instance - router *rod.HijackRouter - History []HistoryData + page *rod.Page + rules []requestRule + instance *Instance + router *rod.HijackRouter + historyMutex sync.RWMutex + History []HistoryData } // HistoryData contains the page request/response pairs @@ -92,6 +94,9 @@ func (p *Page) URL() string { // DumpHistory returns the full page navigation history func (p *Page) DumpHistory() string { + p.historyMutex.RLock() + defer p.historyMutex.RUnlock() + var historyDump strings.Builder for _, historyData := range p.History { historyDump.WriteString(historyData.RawRequest) @@ -99,3 +104,11 @@ func (p *Page) DumpHistory() string { } return historyDump.String() } + +// addToHistory adds a request/response pair to the page history +func (p *Page) addToHistory(historyData HistoryData) { + p.historyMutex.Lock() + defer p.historyMutex.Unlock() + + p.History = append(p.History, historyData) +} diff --git a/v2/pkg/protocols/headless/engine/rules.go b/v2/pkg/protocols/headless/engine/rules.go index 17b66ee42..5aa0c91dd 100644 --- a/v2/pkg/protocols/headless/engine/rules.go +++ b/v2/pkg/protocols/headless/engine/rules.go @@ -64,9 +64,11 @@ func (p *Page) routingRuleHandler(ctx *rod.Hijack) { var rawResp strings.Builder respPayloads := ctx.Response.Payload() if respPayloads != nil { - rawResp.WriteString(fmt.Sprintf("HTTP/1.1 %d %s\n", respPayloads.ResponseCode, respPayloads.ResponsePhrase)) + rawResp.WriteString("HTTP/1.1 ") + rawResp.WriteString(fmt.Sprint(respPayloads.ResponseCode)) + rawResp.WriteString(" " + respPayloads.ResponsePhrase + "+\n") for _, header := range respPayloads.ResponseHeaders { - rawResp.WriteString(fmt.Sprintf("%s: %s\n", header.Name, header.Value)) + rawResp.WriteString(header.Name + ": " + header.Value + "\n") } rawResp.WriteString("\n") rawResp.WriteString(ctx.Response.Body()) @@ -77,5 +79,5 @@ func (p *Page) routingRuleHandler(ctx *rod.Hijack) { RawRequest: rawReq, RawResponse: rawResp.String(), } - p.History = append(p.History, historyData) + p.addToHistory(historyData) } From 1670bf874b390adf4f909b03a8a00631537075b8 Mon Sep 17 00:00:00 2001 From: mzack Date: Thu, 30 Dec 2021 13:04:08 +0100 Subject: [PATCH 4/4] fixing lint errors --- v2/pkg/protocols/headless/engine/page.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/v2/pkg/protocols/headless/engine/page.go b/v2/pkg/protocols/headless/engine/page.go index dbe9935fe..6543a8fb2 100644 --- a/v2/pkg/protocols/headless/engine/page.go +++ b/v2/pkg/protocols/headless/engine/page.go @@ -16,7 +16,7 @@ type Page struct { rules []requestRule instance *Instance router *rod.HijackRouter - historyMutex sync.RWMutex + historyMutex *sync.RWMutex History []HistoryData } @@ -40,7 +40,7 @@ func (i *Instance) Run(baseURL *url.URL, actions []*Action, timeout time.Duratio } } - createdPage := &Page{page: page, instance: i} + createdPage := &Page{page: page, instance: i, historyMutex: &sync.RWMutex{}} router := page.HijackRequests() if routerErr := router.Add("*", "", createdPage.routingRuleHandler); routerErr != nil { return nil, nil, routerErr