diff --git a/v2/pkg/protocols/http/build_request.go b/v2/pkg/protocols/http/build_request.go index b5e0e59a5..f7d961853 100644 --- a/v2/pkg/protocols/http/build_request.go +++ b/v2/pkg/protocols/http/build_request.go @@ -80,9 +80,8 @@ func (r *requestGenerator) Make(ctx context.Context, input *contextargs.Context, return nil, err } - data, parsed = baseURLWithTemplatePrefs(data, parsed) - isRawRequest := len(r.request.Raw) > 0 + data, parsed = baseURLWithTemplatePrefs(data, parsed, isRawRequest) // If the request is not a raw request, and the URL input path is suffixed with // a trailing slash, and our Input URL is also suffixed with a trailing slash, @@ -184,19 +183,61 @@ func (r *requestGenerator) Total() int { } // baseURLWithTemplatePrefs returns the url for BaseURL keeping -// the template port and path preference over the user provided one. -func baseURLWithTemplatePrefs(data string, parsed *url.URL) (string, *url.URL) { +// the template port along with any query parameters over the user provided one. +func baseURLWithTemplatePrefs(data string, parsed *url.URL, isRaw bool) (string, *url.URL) { // template port preference over input URL port if template has a port matches := urlWithPortRegex.FindAllStringSubmatch(data, -1) - if len(matches) == 0 { + if len(matches) > 0 { + port := matches[0][1] + parsed.Host = net.JoinHostPort(parsed.Hostname(), port) + data = strings.ReplaceAll(data, ":"+port, "") + if parsed.Path == "" { + parsed.Path = "/" + } + } + + if isRaw { + // do not swap parameters from parsedURL to base return data, parsed } - port := matches[0][1] - parsed.Host = net.JoinHostPort(parsed.Hostname(), port) - data = strings.ReplaceAll(data, ":"+port, "") - if parsed.Path == "" { - parsed.Path = "/" + + // transfer any parmas from URL to data( i.e {{BaseURL}} ) + params := parsed.Query() + if len(params) == 0 { + return data, parsed } + // remove any existing params from parsedInput (tracked using params) + // parsed.RawQuery = "" + + // ex: {{BaseURL}}/metrics?user=xxx + dataURLrelpath := strings.TrimLeft(data, "{{BaseURL}}") //nolint:all + + if dataURLrelpath == "" || dataURLrelpath == "/" { + // just attach raw query to data + dataURLrelpath += "?" + params.Encode() + } else { + // /?action=x or /metrics/ parse it + payloadpath, err := url.Parse(dataURLrelpath) + if err != nil { + // payload not possible to parse (edgecase) + dataURLrelpath += "?" + params.Encode() + } else { + payloadparams := payloadpath.Query() + if len(payloadparams) != 0 { + // ex: /?action=x + for k := range payloadparams { + params.Add(k, payloadparams.Get(k)) + } + } + //ex: /?admin=user&action=x + payloadpath.RawQuery = params.Encode() + dataURLrelpath = payloadpath.String() + } + + } + + data = "{{BaseURL}}" + dataURLrelpath + parsed.RawQuery = "" return data, parsed } diff --git a/v2/pkg/protocols/http/build_request_test.go b/v2/pkg/protocols/http/build_request_test.go index 7134a7168..40359233f 100644 --- a/v2/pkg/protocols/http/build_request_test.go +++ b/v2/pkg/protocols/http/build_request_test.go @@ -21,7 +21,7 @@ func TestBaseURLWithTemplatePrefs(t *testing.T) { parsed, _ := url.Parse(baseURL) data := "{{BaseURL}}:8000/newpath" - data, parsed = baseURLWithTemplatePrefs(data, parsed) + data, parsed = baseURLWithTemplatePrefs(data, parsed, false) require.Equal(t, "http://localhost:8000/test", parsed.String(), "could not get correct value") require.Equal(t, "{{BaseURL}}/newpath", data, "could not get correct data") } diff --git a/v2/pkg/protocols/http/raw/raw.go b/v2/pkg/protocols/http/raw/raw.go index 254dcf0db..154d212c5 100644 --- a/v2/pkg/protocols/http/raw/raw.go +++ b/v2/pkg/protocols/http/raw/raw.go @@ -7,9 +7,9 @@ import ( "fmt" "io" "net/url" - "path" "strings" + "github.com/projectdiscovery/nuclei/v2/pkg/protocols/http/utils" "github.com/projectdiscovery/rawhttp/client" stringsutil "github.com/projectdiscovery/utils/strings" ) @@ -27,18 +27,89 @@ type Request struct { // Parse parses the raw request as supplied by the user func Parse(request, baseURL string, unsafe bool) (*Request, error) { + // parse Input URL + inputURL, err := url.Parse(baseURL) + if err != nil { + return nil, fmt.Errorf("could not parse request URL: %w", err) + } + inputParams := inputURL.Query() + + // Joins input url and new url preserving query parameters + joinPath := func(relpath string) (string, error) { + newpath := "" + // Join path with input along with parameters + relUrl, relerr := url.Parse(relpath) + if relUrl == nil { + // special case when url.Parse fails + newpath = utils.JoinURLPath(inputURL.Path, relpath) + } else { + newpath = utils.JoinURLPath(inputURL.Path, relUrl.Path) + if len(relUrl.Query()) > 0 { + relParam := relUrl.Query() + for k := range relParam { + inputParams.Add(k, relParam.Get(k)) + } + } + } + if len(inputParams) > 0 { + newpath += "?" + inputParams.Encode() + } + return newpath, relerr + } + + rawrequest, err := readRawRequest(request, unsafe) + if err != nil { + return nil, err + } + + switch { + // If path is empty do not tamper input url (see doc) + // can be omitted but makes things clear + case rawrequest.Path == "": + rawrequest.Path, _ = joinPath("") + + // full url provided instead of rel path + case strings.HasPrefix(rawrequest.Path, "http") && !unsafe: + var parseErr error + rawrequest.Path, parseErr = joinPath(rawrequest.Path) + if parseErr != nil { + return nil, fmt.Errorf("could not parse url:%w", parseErr) + } + // If unsafe changes must be made in raw request string iteself + case unsafe: + prevPath := rawrequest.Path + unsafeRelativePath, _ := joinPath(rawrequest.Path) + // replace itself + rawrequest.UnsafeRawBytes = bytes.Replace(rawrequest.UnsafeRawBytes, []byte(prevPath), []byte(unsafeRelativePath), 1) + + default: + rawrequest.Path, _ = joinPath(rawrequest.Path) + + } + + if !unsafe { + if _, ok := rawrequest.Headers["Host"]; !ok { + rawrequest.Headers["Host"] = inputURL.Host + } + rawrequest.FullURL = fmt.Sprintf("%s://%s%s", inputURL.Scheme, strings.TrimSpace(inputURL.Host), rawrequest.Path) + } + + return rawrequest, nil + +} + +// reads raw request line by line following convention +func readRawRequest(request string, unsafe bool) (*Request, error) { rawRequest := &Request{ Headers: make(map[string]string), } - parsedURL, err := url.Parse(baseURL) - if err != nil { - return nil, fmt.Errorf("could not parse request URL: %w", err) - } - + // store body if it is unsafe request if unsafe { rawRequest.UnsafeRawBytes = []byte(request) } + + // parse raw request reader := bufio.NewReader(strings.NewReader(request)) read_line: s, err := reader.ReadString('\n') @@ -51,19 +122,24 @@ read_line: } parts := strings.Split(s, " ") - if len(parts) == 2 { - parts = []string{parts[0], "", parts[1]} + if len(parts) > 0 { + rawRequest.Method = parts[0] + if len(parts) == 2 && strings.Contains(parts[1], "HTTP") { + // When relative path is missing/ not specified it is considered that + // request is meant to be untampered at path + // Ex: GET HTTP/1.1 + parts = []string{parts[0], "", parts[1]} + } + if len(parts) < 3 && !unsafe { + // missing a field + return nil, fmt.Errorf("malformed request specified: %v", s) + } + + // relative path + rawRequest.Path = parts[1] + // Note: raw request does not URL Encode if needed `+` should be used + // this can be also be implemented } - if len(parts) < 3 && !unsafe { - return nil, fmt.Errorf("malformed request supplied") - } - // Check if we have also a path from the passed base URL and if yes, - // append that to the unsafe request as well. - if parsedURL.Path != "" && parts[1] != "" && parts[1] != parsedURL.Path { - rawRequest.UnsafeRawBytes = fixUnsafeRequestPath(parsedURL, parts[1], rawRequest.UnsafeRawBytes) - } - // Set the request Method - rawRequest.Method = parts[0] var multiPartRequest bool // Accepts all malformed headers @@ -104,46 +180,6 @@ read_line: } } - // Handle case with the full http url in path. In that case, - // ignore any host header that we encounter and use the path as request URL - if !unsafe && strings.HasPrefix(parts[1], "http") { - parsed, parseErr := url.Parse(parts[1]) - if parseErr != nil { - return nil, fmt.Errorf("could not parse request URL: %w", parseErr) - } - - rawRequest.Path = parsed.Path - if _, ok := rawRequest.Headers["Host"]; !ok { - rawRequest.Headers["Host"] = parsed.Host - } - } else if len(parts) > 1 { - rawRequest.Path = parts[1] - } - - hostURL := parsedURL.Host - if strings.HasSuffix(parsedURL.Path, "/") && strings.HasPrefix(rawRequest.Path, "/") { - parsedURL.Path = strings.TrimSuffix(parsedURL.Path, "/") - } - - if !unsafe { - if parsedURL.Path != rawRequest.Path { - rawRequest.Path = fmt.Sprintf("%s%s", parsedURL.Path, rawRequest.Path) - } - if strings.HasSuffix(rawRequest.Path, "//") { - rawRequest.Path = strings.TrimSuffix(rawRequest.Path, "/") - } - rawRequest.FullURL = fmt.Sprintf("%s://%s%s", parsedURL.Scheme, strings.TrimSpace(hostURL), rawRequest.Path) - if parsedURL.RawQuery != "" { - rawRequest.FullURL = fmt.Sprintf("%s?%s", rawRequest.FullURL, parsedURL.RawQuery) - } - - // If raw request doesn't have a Host header and isn't marked unsafe, - // this will generate the Host header from the parsed baseURL - if rawRequest.Headers["Host"] == "" { - rawRequest.Headers["Host"] = hostURL - } - } - // Set the request body b, err := io.ReadAll(reader) if err != nil { @@ -154,17 +190,7 @@ read_line: rawRequest.Data = strings.TrimSuffix(rawRequest.Data, "\r\n") } return rawRequest, nil -} -func fixUnsafeRequestPath(baseURL *url.URL, requestPath string, request []byte) []byte { - var fixedPath string - if stringsutil.HasPrefixAny(requestPath, "/") { - fixedPath = path.Join(baseURL.Path, requestPath) - } else { - fixedPath = fmt.Sprintf("%s%s", baseURL.Path, requestPath) - } - - return bytes.Replace(request, []byte(requestPath), []byte(fixedPath), 1) } // TryFillCustomHeaders after the Host header diff --git a/v2/pkg/protocols/http/raw/raw_test.go b/v2/pkg/protocols/http/raw/raw_test.go index 6978f367b..1ff9f4be6 100644 --- a/v2/pkg/protocols/http/raw/raw_test.go +++ b/v2/pkg/protocols/http/raw/raw_test.go @@ -29,17 +29,18 @@ Host: {{Hostname}}`, "https://example.com:8080/test", false) request, err := Parse(`GET ?username=test&password=test HTTP/1.1 Host: {{Hostname}}:123`, "https://example.com:8080/test", false) require.Nil(t, err, "could not parse GET request") - require.Equal(t, "https://example.com:8080/test?username=test&password=test", request.FullURL, "Could not parse request url correctly") + // url.values are sorted to avoid randomness of using maps + require.Equal(t, "https://example.com:8080/test?password=test&username=test", request.FullURL, "Could not parse request url correctly") request, err = Parse(`GET ?username=test&password=test HTTP/1.1 Host: {{Hostname}}:123`, "https://example.com:8080/test/", false) require.Nil(t, err, "could not parse GET request") - require.Equal(t, "https://example.com:8080/test/?username=test&password=test", request.FullURL, "Could not parse request url correctly") + require.Equal(t, "https://example.com:8080/test/?password=test&username=test", request.FullURL, "Could not parse request url correctly") request, err = Parse(`GET /?username=test&password=test HTTP/1.1 Host: {{Hostname}}:123`, "https://example.com:8080/test/", false) require.Nil(t, err, "could not parse GET request") - require.Equal(t, "https://example.com:8080/test/?username=test&password=test", request.FullURL, "Could not parse request url correctly") + require.Equal(t, "https://example.com:8080/test/?password=test&username=test", request.FullURL, "Could not parse request url correctly") }) } diff --git a/v2/pkg/protocols/http/utils/url.go b/v2/pkg/protocols/http/utils/url.go new file mode 100644 index 000000000..b69c5f417 --- /dev/null +++ b/v2/pkg/protocols/http/utils/url.go @@ -0,0 +1,36 @@ +package utils + +import ( + "fmt" + "path" + "strings" +) + +// Joins two relative paths and handles trailing slash edgecase +func JoinURLPath(elem1 string, elem2 string) string { + /* + Trailing Slash EdgeCase + Path.Join converts /test/ to /test + this should be handled manually + */ + if elem2 == "" { + return elem1 + } + if elem2 == "/" || elem2 == "/?" { + // check for extra slash + if strings.HasSuffix(elem1, "/") && strings.HasPrefix(elem2, "/") { + elem1 = strings.TrimRight(elem1, "/") + } + // merge and return + return fmt.Sprintf("%v%v", elem1, elem2) + } else { + if strings.HasPrefix(elem2, "?") { + // path2 is parameter and not a url append and return + return fmt.Sprintf("%v%v", elem1, elem2) + } + // Note: + // path.Join implicitly calls path.Clean so any relative paths are filtered + // if not encoded properly + return path.Join(elem1, elem2) + } +} diff --git a/v2/pkg/protocols/http/utils/url_test.go b/v2/pkg/protocols/http/utils/url_test.go new file mode 100644 index 000000000..30b087b20 --- /dev/null +++ b/v2/pkg/protocols/http/utils/url_test.go @@ -0,0 +1,29 @@ +package utils_test + +import ( + "fmt" + "path" + "testing" + + "github.com/projectdiscovery/nuclei/v2/pkg/protocols/http/utils" +) + +func TestURLJoin(t *testing.T) { + fmt.Println(path.Join("/wp-content", "/wp-content/admin.php")) + testcases := []struct { + URL1 string + URL2 string + ExpectedJoin string + }{ + {"/test/", "", "/test/"}, + {"/test", "/", "/test/"}, + {"/test", "?param=true", "/test?param=true"}, + {"/test/", "/", "/test/"}, + } + for _, v := range testcases { + res := utils.JoinURLPath(v.URL1, v.URL2) + if res != v.ExpectedJoin { + t.Errorf("failed to join urls expected %v but got %v", v.ExpectedJoin, res) + } + } +}