mirror of
https://github.com/projectdiscovery/nuclei.git
synced 2025-12-17 17:15:30 +00:00
* Enhance matcher compilation with caching for regex and DSL expressions to improve performance. Update template parsing to conditionally retain raw templates based on size constraints. * Implement caching for regex and DSL expressions in extractors and matchers to enhance performance. Introduce a buffer pool in raw requests to reduce memory allocations. Update template cache management for improved efficiency. * feat: improve concurrency to be bound * refactor: replace fmt.Sprintf with fmt.Fprintf for improved performance in header handling * feat: add regex matching tests and benchmarks for performance evaluation * feat: add prefix check in regex extraction to optimize matching process * feat: implement regex caching mechanism to enhance performance in extractors and matchers, along with tests and benchmarks for validation * feat: add unit tests for template execution in the core engine, enhancing test coverage and reliability * feat: enhance error handling in template execution and improve regex caching logic for better performance * Implement caching for regex and DSL expressions in the cache package, replacing previous sync.Map usage. Add unit tests for cache functionality, including eviction by capacity and retrieval of cached items. Update extractors and matchers to utilize the new cache system for improved performance and memory efficiency. * Add tests for SetCapacities in cache package to ensure cache behavior on capacity changes - Implemented TestSetCapacities_NoRebuildOnZero to verify that setting capacities to zero does not clear existing caches. - Added TestSetCapacities_BeforeFirstUse to confirm that initial cache settings are respected and not overridden by subsequent capacity changes. * Refactor matchers and update load test generator to use io package - Removed maxRegexScanBytes constant from match.go. - Replaced ioutil with io package in load_test.go for NopCloser usage. - Restored TestValidate_AllowsInlineMultiline in load_test.go to ensure inline validation functionality. * Add cancellation support in template execution and enhance test coverage - Updated executeTemplateWithTargets to respect context cancellation. - Introduced fakeTargetProvider and slowExecuter for testing. - Added Test_executeTemplateWithTargets_RespectsCancellation to validate cancellation behavior during template execution.
336 lines
9.7 KiB
Go
336 lines
9.7 KiB
Go
package raw
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"encoding/base64"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/projectdiscovery/gologger"
|
|
"github.com/projectdiscovery/nuclei/v3/pkg/authprovider/authx"
|
|
"github.com/projectdiscovery/rawhttp/client"
|
|
"github.com/projectdiscovery/utils/errkit"
|
|
stringsutil "github.com/projectdiscovery/utils/strings"
|
|
urlutil "github.com/projectdiscovery/utils/url"
|
|
)
|
|
|
|
var bufferPool = sync.Pool{New: func() any { return new(bytes.Buffer) }}
|
|
|
|
// Request defines a basic HTTP raw request
|
|
type Request struct {
|
|
FullURL string
|
|
Method string
|
|
Path string
|
|
Data string
|
|
Headers map[string]string
|
|
UnsafeHeaders client.Headers
|
|
UnsafeRawBytes []byte
|
|
}
|
|
|
|
// Parse parses the raw request as supplied by the user
|
|
func Parse(request string, inputURL *urlutil.URL, unsafe, disablePathAutomerge bool) (*Request, error) {
|
|
rawrequest, err := readRawRequest(request, unsafe)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
switch {
|
|
// If path is empty do not tamper input url (see doc)
|
|
// can be omitted but makes things clear
|
|
case rawrequest.Path == "":
|
|
if !disablePathAutomerge {
|
|
inputURL.Params.IncludeEquals = true
|
|
rawrequest.Path = inputURL.GetRelativePath()
|
|
}
|
|
|
|
// full url provided instead of rel path
|
|
case strings.HasPrefix(rawrequest.Path, "http") && !unsafe:
|
|
urlx, err := urlutil.ParseURL(rawrequest.Path, true)
|
|
if err != nil {
|
|
return nil, errkit.Wrapf(err, "failed to parse url %v from template", rawrequest.Path)
|
|
}
|
|
cloned := inputURL.Clone()
|
|
cloned.Params.IncludeEquals = true
|
|
if disablePathAutomerge {
|
|
cloned.Path = ""
|
|
}
|
|
parseErr := cloned.MergePath(urlx.GetRelativePath(), true)
|
|
if parseErr != nil {
|
|
return nil, errkit.Wrapf(parseErr, "could not automergepath for template path %v", urlx.GetRelativePath())
|
|
}
|
|
rawrequest.Path = cloned.GetRelativePath()
|
|
// If unsafe changes must be made in raw request string itself
|
|
case unsafe:
|
|
prevPath := rawrequest.Path
|
|
cloned := inputURL.Clone()
|
|
cloned.Params.IncludeEquals = true
|
|
unsafeRelativePath := ""
|
|
if (cloned.Path == "" || cloned.Path == "/") && !strings.HasPrefix(prevPath, "/") {
|
|
// Edgecase if raw unsafe request is
|
|
// GET 1337?with=param HTTP/1.1
|
|
if tmpurl, err := urlutil.ParseRelativePath(prevPath, true); err == nil && !tmpurl.Params.IsEmpty() {
|
|
// if raw request contains parameters
|
|
cloned.Params.Merge(tmpurl.Params.Encode())
|
|
unsafeRelativePath = strings.TrimPrefix(tmpurl.Path, "/") + "?" + cloned.Params.Encode()
|
|
} else {
|
|
// if raw request does not contain param
|
|
if !cloned.Params.IsEmpty() {
|
|
unsafeRelativePath = prevPath + "?" + cloned.Params.Encode()
|
|
} else {
|
|
unsafeRelativePath = prevPath
|
|
}
|
|
}
|
|
} else {
|
|
// Edgecase if raw request is
|
|
// GET / HTTP/1.1
|
|
//use case: https://github.com/projectdiscovery/nuclei/issues/4921
|
|
if rawrequest.Path == "/" && cloned.Path != "" {
|
|
rawrequest.Path = ""
|
|
}
|
|
|
|
if disablePathAutomerge {
|
|
cloned.Path = ""
|
|
}
|
|
err = cloned.MergePath(rawrequest.Path, true)
|
|
if err != nil {
|
|
return nil, errkit.Wrapf(err, "failed to automerge %v from unsafe template", rawrequest.Path)
|
|
}
|
|
unsafeRelativePath = cloned.GetRelativePath()
|
|
}
|
|
rawrequest.Path = cloned.GetRelativePath()
|
|
rawrequest.UnsafeRawBytes = bytes.Replace(rawrequest.UnsafeRawBytes, []byte(prevPath), []byte(unsafeRelativePath), 1)
|
|
|
|
default:
|
|
cloned := inputURL.Clone()
|
|
cloned.Params.IncludeEquals = true
|
|
// Edgecase if raw request is
|
|
// GET / HTTP/1.1
|
|
//use case: https://github.com/projectdiscovery/nuclei/issues/4921
|
|
if rawrequest.Path == "/" {
|
|
rawrequest.Path = ""
|
|
}
|
|
|
|
if disablePathAutomerge {
|
|
cloned.Path = ""
|
|
}
|
|
parseErr := cloned.MergePath(rawrequest.Path, true)
|
|
if parseErr != nil {
|
|
return nil, errkit.Wrapf(parseErr, "could not automergepath for template path %v", rawrequest.Path)
|
|
}
|
|
rawrequest.Path = cloned.GetRelativePath()
|
|
}
|
|
|
|
if !unsafe {
|
|
if _, ok := rawrequest.Headers["Host"]; !ok {
|
|
rawrequest.Headers["Host"] = inputURL.Host
|
|
}
|
|
cloned := inputURL.Clone()
|
|
cloned.Params.IncludeEquals = true
|
|
cloned.Path = ""
|
|
_ = cloned.MergePath(rawrequest.Path, true)
|
|
rawrequest.FullURL = cloned.String()
|
|
}
|
|
|
|
return rawrequest, nil
|
|
}
|
|
|
|
// ParseRawRequest parses the raw request as supplied by the user
|
|
// this function should only be used for self-contained requests
|
|
func ParseRawRequest(request string, unsafe bool) (*Request, error) {
|
|
req, err := readRawRequest(request, unsafe)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if strings.HasPrefix(req.Path, "http") {
|
|
urlx, err := urlutil.Parse(req.Path)
|
|
if err != nil {
|
|
return nil, errkit.Wrapf(err, "failed to parse url %v", req.Path)
|
|
}
|
|
req.Path = urlx.GetRelativePath()
|
|
req.FullURL = urlx.String()
|
|
} else {
|
|
|
|
if req.Path == "" {
|
|
return nil, errkit.New("path cannot be empty in self contained request")
|
|
}
|
|
// given url is relative construct one using Host Header
|
|
if _, ok := req.Headers["Host"]; !ok {
|
|
return nil, errkit.New("host header is required for relative path")
|
|
}
|
|
// Review: Current default scheme in self contained templates if relative path is provided is http
|
|
req.FullURL = fmt.Sprintf("%s://%s%s", urlutil.HTTP, strings.TrimSpace(req.Headers["Host"]), req.Path)
|
|
}
|
|
return req, nil
|
|
}
|
|
|
|
// reads raw request line by line following convention
|
|
func readRawRequest(request string, unsafe bool) (*Request, error) {
|
|
rawRequest := &Request{
|
|
Headers: make(map[string]string),
|
|
}
|
|
|
|
// store body if it is unsafe request
|
|
if unsafe {
|
|
rawRequest.UnsafeRawBytes = []byte(request)
|
|
}
|
|
|
|
// parse raw request
|
|
reader := bufio.NewReader(strings.NewReader(request))
|
|
read_line:
|
|
s, err := reader.ReadString('\n')
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not read request: %w", err)
|
|
}
|
|
// ignore all annotations
|
|
if stringsutil.HasPrefixAny(s, "@") {
|
|
goto read_line
|
|
}
|
|
|
|
parts := strings.Fields(s)
|
|
if len(parts) > 0 {
|
|
rawRequest.Method = parts[0]
|
|
if len(parts) == 2 && strings.Contains(parts[1], "HTTP") {
|
|
// When relative path is missing/ not specified it is considered that
|
|
// request is meant to be untampered at path
|
|
// Ex: GET HTTP/1.1
|
|
parts = []string{parts[0], "", parts[1]}
|
|
}
|
|
if len(parts) < 3 && !unsafe {
|
|
// missing a field
|
|
return nil, fmt.Errorf("malformed request specified: %v", s)
|
|
}
|
|
|
|
// relative path
|
|
rawRequest.Path = parts[1]
|
|
// Note: raw request does not URL Encode if needed `+` should be used
|
|
// this can be also be implemented
|
|
}
|
|
|
|
var multiPartRequest bool
|
|
// Accepts all malformed headers
|
|
var key, value string
|
|
for {
|
|
line, readErr := reader.ReadString('\n')
|
|
line = strings.TrimSpace(line)
|
|
|
|
if readErr != nil || line == "" {
|
|
if readErr != io.EOF {
|
|
break
|
|
}
|
|
}
|
|
|
|
p := strings.SplitN(line, ":", 2)
|
|
key = p[0]
|
|
if len(p) > 1 {
|
|
value = p[1]
|
|
}
|
|
if strings.Contains(key, "Content-Type") && strings.Contains(value, "multipart/") {
|
|
multiPartRequest = true
|
|
}
|
|
|
|
// in case of unsafe requests multiple headers should be accepted
|
|
// therefore use the full line as key
|
|
_, found := rawRequest.Headers[key]
|
|
if unsafe {
|
|
rawRequest.UnsafeHeaders = append(rawRequest.UnsafeHeaders, client.Header{Key: line})
|
|
}
|
|
|
|
if unsafe && found {
|
|
rawRequest.Headers[line] = ""
|
|
} else {
|
|
rawRequest.Headers[key] = strings.TrimSpace(value)
|
|
}
|
|
if readErr == io.EOF {
|
|
break
|
|
}
|
|
}
|
|
|
|
// Set the request body
|
|
b, err := io.ReadAll(reader)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not read request body: %w", err)
|
|
}
|
|
rawRequest.Data = string(b)
|
|
if !multiPartRequest {
|
|
rawRequest.Data = strings.TrimSuffix(rawRequest.Data, "\r\n")
|
|
}
|
|
return rawRequest, nil
|
|
|
|
}
|
|
|
|
// TryFillCustomHeaders after the Host header
|
|
func (r *Request) TryFillCustomHeaders(headers []string) error {
|
|
unsafeBytes := bytes.ToLower(r.UnsafeRawBytes)
|
|
// locate first host header
|
|
hostHeaderIndex := bytes.Index(unsafeBytes, []byte("host:"))
|
|
if hostHeaderIndex > 0 {
|
|
// attempt to locate next newline
|
|
newLineIndex := bytes.Index(unsafeBytes[hostHeaderIndex:], []byte("\r\n"))
|
|
if newLineIndex > 0 {
|
|
newLineIndex += hostHeaderIndex + 2
|
|
// insert custom headers
|
|
buf := bufferPool.Get().(*bytes.Buffer)
|
|
buf.Reset()
|
|
buf.Write(r.UnsafeRawBytes[:newLineIndex])
|
|
for _, header := range headers {
|
|
buf.WriteString(header)
|
|
buf.WriteString("\r\n")
|
|
}
|
|
buf.Write(r.UnsafeRawBytes[newLineIndex:])
|
|
r.UnsafeRawBytes = append([]byte(nil), buf.Bytes()...)
|
|
buf.Reset()
|
|
bufferPool.Put(buf)
|
|
return nil
|
|
}
|
|
return errors.New("no new line found at the end of host header")
|
|
}
|
|
|
|
return errors.New("no host header found")
|
|
}
|
|
|
|
// ApplyAuthStrategy applies the auth strategy to the request
|
|
func (r *Request) ApplyAuthStrategy(strategy authx.AuthStrategy) {
|
|
if strategy == nil {
|
|
return
|
|
}
|
|
switch s := strategy.(type) {
|
|
case *authx.QueryAuthStrategy:
|
|
parsed, err := urlutil.Parse(r.FullURL)
|
|
if err != nil {
|
|
gologger.Error().Msgf("auth strategy failed to parse url: %s got %v", r.FullURL, err)
|
|
return
|
|
}
|
|
for _, p := range s.Data.Params {
|
|
parsed.Params.Add(p.Key, p.Value)
|
|
}
|
|
case *authx.CookiesAuthStrategy:
|
|
buff := bufferPool.Get().(*bytes.Buffer)
|
|
buff.Reset()
|
|
for _, cookie := range s.Data.Cookies {
|
|
fmt.Fprintf(buff, "%s=%s; ", cookie.Key, cookie.Value)
|
|
}
|
|
if buff.Len() > 0 {
|
|
if val, ok := r.Headers["Cookie"]; ok {
|
|
r.Headers["Cookie"] = strings.TrimSuffix(strings.TrimSpace(val), ";") + "; " + buff.String()
|
|
} else {
|
|
r.Headers["Cookie"] = buff.String()
|
|
}
|
|
}
|
|
bufferPool.Put(buff)
|
|
case *authx.HeadersAuthStrategy:
|
|
for _, header := range s.Data.Headers {
|
|
r.Headers[header.Key] = header.Value
|
|
}
|
|
case *authx.BearerTokenAuthStrategy:
|
|
r.Headers["Authorization"] = "Bearer " + s.Data.Token
|
|
case *authx.BasicAuthStrategy:
|
|
r.Headers["Authorization"] = "Basic " + base64.StdEncoding.EncodeToString([]byte(s.Data.Username+":"+s.Data.Password))
|
|
default:
|
|
gologger.Warning().Msgf("[raw-request] unknown auth strategy: %T", s)
|
|
}
|
|
}
|