feat: added initial fuzz input url deduplication implementation

This commit is contained in:
Ice3man 2024-09-03 19:16:44 +05:30
parent 042b33de3d
commit fa6cac181e
6 changed files with 257 additions and 0 deletions

View File

@ -328,6 +328,7 @@ on extensive configurability, massive extensibility and ease of use.`)
flagSet.BoolVar(&fuzzFlag, "fuzz", false, "enable loading fuzzing templates (Deprecated: use -dast instead)"),
flagSet.BoolVar(&options.DAST, "dast", false, "enable / run dast (fuzz) nuclei templates"),
flagSet.BoolVarP(&options.DisplayFuzzPoints, "display-fuzz-points", "dfp", false, "display fuzz points in the output for debugging"),
flagSet.BoolVarP(&options.FuzzingDedupe, "fuzzing-dedupe", "fd", false, "deduplicate fuzzing url inputs"),
flagSet.IntVar(&options.FuzzParamFrequency, "fuzz-param-frequency", 10, "frequency of uninteresting parameters for fuzzing before skipping"),
flagSet.StringVarP(&options.FuzzAggressionLevel, "fuzz-aggression", "fa", "low", "fuzzing aggression level controls payload count for fuzz (low, medium, high)"),
)

View File

@ -152,6 +152,9 @@ func ValidateOptions(options *types.Options) error {
if options.Verbose && options.Silent {
return errors.New("both verbose and silent mode specified")
}
if options.FuzzingDedupe && options.Stream {
return errors.New("both fuzzing dedupe and stream mode specified")
}
if (options.HeadlessOptionalArguments != nil || options.ShowBrowser || options.UseInstalledChrome) && !options.Headless {
return errors.New("headless mode (-headless) is required if -ho, -sb, -sc or -lha are set")

View File

@ -0,0 +1,102 @@
// Package dedupe implements a duplicate URL deduplication mechanism
// for Nuclei DAST or Fuzzing inputs.
//
// It is used to remove similar or non-relevant inputs from fuzzing
// or DAST scans to reduce the number of requests made.
package dedupe
import (
"fmt"
"net/url"
"regexp"
"slices"
"strings"
mapsutil "github.com/projectdiscovery/utils/maps"
)
// FuzzingDeduper is a deduper for fuzzing inputs
//
// The normalization works as follows:
//
// - The path is normalized to remove any trailing slashes
// - The query is normalized by templating the query parameters with their names
// TODO: Doesn't handle different values, everything is stripped. Maybe make it more flexible?
// - Numeric IDs in the path are replaced with {numeric_id}
//
// This allows us to deduplicate URLs with different query parameters
// or orders but the same structure or key names.
type FuzzingDeduper struct {
items *mapsutil.SyncLockMap[string, struct{}]
}
// NewFuzzingDeduper creates a new fuzzing deduper
func NewFuzzingDeduper() *FuzzingDeduper {
return &FuzzingDeduper{
items: mapsutil.NewSyncLockMap[string, struct{}](),
}
}
// Add adds a new URL to the deduper
func (d *FuzzingDeduper) Add(URL string) bool {
generatedPattern, err := generatePattern(URL)
if err != nil {
return false
}
_, found := d.items.Get(generatedPattern)
if found {
return false
}
d.items.Set(generatedPattern, struct{}{})
return true
}
func generatePattern(urlStr string) (string, error) {
parsedURL, err := url.ParseRequestURI(urlStr)
if err != nil {
return "", err
}
path := normalizePath(parsedURL.Path)
query := extractQuery(parsedURL.Query())
var builder strings.Builder
builder.Grow(len(urlStr))
builder.WriteString(parsedURL.Scheme)
builder.WriteString("://")
builder.WriteString(parsedURL.Host)
builder.WriteString(path)
if query != "" {
builder.WriteString("?")
builder.WriteString(query)
}
pattern := builder.String()
return pattern, nil
}
var (
numericIDPathRegex = regexp.MustCompile(`/(\d+)(?:/|$)`)
)
func normalizePath(path string) string {
subMatches := numericIDPathRegex.FindAllStringSubmatch(path, -1)
for _, match := range subMatches {
path = strings.ReplaceAll(path, match[0], "/{numeric_id}")
}
return path
}
func extractQuery(query url.Values) string {
normalizedParams := make([]string, 0, len(query))
for k, v := range query {
if len(v) == 0 {
normalizedParams = append(normalizedParams, k)
} else {
normalizedParams = append(normalizedParams, fmt.Sprintf("%s={%s}", k, k))
}
}
slices.Sort(normalizedParams)
return strings.Join(normalizedParams, "&")
}

View File

@ -0,0 +1,137 @@
package dedupe
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
)
func TestFuzzingDeduper(t *testing.T) {
t.Run("Basic URL Deduplication", func(t *testing.T) {
tests := []struct {
name string
urls []string
expected []bool
}{
{
name: "Simple unique URLs",
urls: []string{"http://example.com/page1", "http://example.com/page2"},
expected: []bool{true, true},
},
{
name: "Duplicate URLs",
urls: []string{"http://example.com/page1", "http://example.com/page1"},
expected: []bool{true, false},
},
{
name: "URLs with different query param values",
urls: []string{"http://example.com/page?id=1", "http://example.com/page?id=2"},
expected: []bool{true, false},
},
{
name: "URLs with different query param orders",
urls: []string{"http://example.com/page?a=1&b=2", "http://example.com/page?b=2&a=1"},
expected: []bool{true, false},
},
{
name: "URLs with and without trailing slash",
urls: []string{"http://example.com/page/", "http://example.com/page"},
expected: []bool{true, true},
},
{
name: "URLs with different schemes",
urls: []string{"http://example.com", "https://example.com"},
expected: []bool{true, true},
},
{
name: "URLs with query params and without",
urls: []string{"http://example.com/page", "http://example.com/page?param=value"},
expected: []bool{true, true},
},
{
name: "Invalid URLs",
urls: []string{"http://example.com/page", "not a valid url"},
expected: []bool{true, false},
},
{
name: "URLs with empty query params",
urls: []string{"http://example.com/page?param1=&param2=", "http://example.com/page?param2=&param1="},
expected: []bool{true, false},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
deduper := NewFuzzingDeduper()
for i, url := range tt.urls {
result := deduper.Add(url)
require.Equal(t, tt.expected[i], result, "Add(%q) = %v, want %v", url, result, tt.expected[i])
}
})
}
})
t.Run("Large Set Deduplication", func(t *testing.T) {
deduper := NewFuzzingDeduper()
baseURL := "http://example.com/page?id=%d&param=%s"
for i := 0; i < 1000; i++ {
url := fmt.Sprintf(baseURL, i, "value")
result := deduper.Add(url)
if i == 0 {
require.True(t, result, "First URL should be added")
} else {
require.False(t, result, "Duplicate URL pattern should not be added: %s", url)
}
}
allItems := deduper.items.GetAll()
require.Len(t, allItems, 1, "Expected 1 unique URL pattern, got %d", len(allItems))
})
t.Run("Path Parameters", func(t *testing.T) {
deduper := NewFuzzingDeduper()
require.True(t, deduper.Add("https://example.com/page/1337"))
require.False(t, deduper.Add("https://example.com/page/1332"))
})
t.Run("TestPHP Vulnweb URLs", func(t *testing.T) {
urls := []string{
"http://testphp.vulnweb.com/hpp/?pp=12",
"http://testphp.vulnweb.com/hpp/params.php?p=valid&pp=12",
"http://testphp.vulnweb.com/artists.php?artist=3",
"http://testphp.vulnweb.com/artists.php?artist=1",
"http://testphp.vulnweb.com/artists.php?artist=2",
"http://testphp.vulnweb.com/listproducts.php?artist=3",
"http://testphp.vulnweb.com/listproducts.php?cat=4",
"http://testphp.vulnweb.com/listproducts.php?cat=3",
"http://testphp.vulnweb.com/listproducts.php?cat=2",
"http://testphp.vulnweb.com/listproducts.php?artist=2",
"http://testphp.vulnweb.com/listproducts.php?artist=1",
"http://testphp.vulnweb.com/listproducts.php?cat=1",
"http://testphp.vulnweb.com/showimage.php?file=./pictures/6.jpg",
"http://testphp.vulnweb.com/product.php?pic=6",
"http://testphp.vulnweb.com/showimage.php?file=./pictures/6.jpg&size=160",
}
expectedUnique := 8
deduper := NewFuzzingDeduper()
uniqueCount := 0
for _, url := range urls {
if deduper.Add(url) {
uniqueCount++
}
}
require.Equal(t, expectedUnique, uniqueCount, "Expected %d unique URLs, but got %d", expectedUnique, uniqueCount)
// Test for duplicates
for _, url := range urls {
require.False(t, deduper.Add(url), "URL should have been identified as duplicate: %s", url)
}
})
}

View File

@ -19,6 +19,7 @@ import (
"github.com/projectdiscovery/hmap/filekv"
"github.com/projectdiscovery/hmap/store/hybrid"
"github.com/projectdiscovery/mapcidr/asn"
"github.com/projectdiscovery/nuclei/v3/pkg/input/provider/dedupe"
providerTypes "github.com/projectdiscovery/nuclei/v3/pkg/input/types"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/protocolstate"
@ -48,6 +49,8 @@ type ListInputProvider struct {
hostMapStream *filekv.FileDB
hostMapStreamOnce sync.Once
sync.Once
fuzzDeduper *dedupe.FuzzingDeduper
}
// Options is a wrapper around types.Options structure
@ -78,6 +81,9 @@ func New(opts *Options) (*ListInputProvider, error) {
},
excludedHosts: make(map[string]struct{}),
}
if options.FuzzingDedupe {
input.fuzzDeduper = dedupe.NewFuzzingDeduper()
}
if options.Stream {
fkvOptions := filekv.DefaultOptions
fkvOptions.MaxItems = DefaultMaxDedupeItemsCount
@ -472,6 +478,12 @@ func (i *ListInputProvider) setItem(metaInput *contextargs.MetaInput) {
}
i.inputCount++ // tracks target count
if i.fuzzDeduper != nil {
if !i.fuzzDeduper.Add(metaInput.Target()) {
gologger.Verbose().Msgf("Ignoring duplicate fuzzing target: %s\n", metaInput.Target())
return
}
}
_ = i.hostMap.Set(key, nil)
if i.hostMapStream != nil {
i.setHostMapStream(key)

View File

@ -276,6 +276,8 @@ type Options struct {
StoreResponseDir string
// DisableRedirects disables following redirects for http request module
DisableRedirects bool
// FuzzingDedupe enables deduplication of input URLs for fuzzing
FuzzingDedupe bool
// SNI custom hostname
SNI string
// InputFileMode specifies the mode of input file (jsonl, burp, openapi, swagger, etc)