mirror of
https://github.com/projectdiscovery/nuclei.git
synced 2025-12-17 20:05:27 +00:00
feat: added initial fuzz input url deduplication implementation
This commit is contained in:
parent
042b33de3d
commit
fa6cac181e
@ -328,6 +328,7 @@ on extensive configurability, massive extensibility and ease of use.`)
|
||||
flagSet.BoolVar(&fuzzFlag, "fuzz", false, "enable loading fuzzing templates (Deprecated: use -dast instead)"),
|
||||
flagSet.BoolVar(&options.DAST, "dast", false, "enable / run dast (fuzz) nuclei templates"),
|
||||
flagSet.BoolVarP(&options.DisplayFuzzPoints, "display-fuzz-points", "dfp", false, "display fuzz points in the output for debugging"),
|
||||
flagSet.BoolVarP(&options.FuzzingDedupe, "fuzzing-dedupe", "fd", false, "deduplicate fuzzing url inputs"),
|
||||
flagSet.IntVar(&options.FuzzParamFrequency, "fuzz-param-frequency", 10, "frequency of uninteresting parameters for fuzzing before skipping"),
|
||||
flagSet.StringVarP(&options.FuzzAggressionLevel, "fuzz-aggression", "fa", "low", "fuzzing aggression level controls payload count for fuzz (low, medium, high)"),
|
||||
)
|
||||
|
||||
@ -152,6 +152,9 @@ func ValidateOptions(options *types.Options) error {
|
||||
if options.Verbose && options.Silent {
|
||||
return errors.New("both verbose and silent mode specified")
|
||||
}
|
||||
if options.FuzzingDedupe && options.Stream {
|
||||
return errors.New("both fuzzing dedupe and stream mode specified")
|
||||
}
|
||||
|
||||
if (options.HeadlessOptionalArguments != nil || options.ShowBrowser || options.UseInstalledChrome) && !options.Headless {
|
||||
return errors.New("headless mode (-headless) is required if -ho, -sb, -sc or -lha are set")
|
||||
|
||||
102
pkg/input/provider/dedupe/dedupe.go
Normal file
102
pkg/input/provider/dedupe/dedupe.go
Normal file
@ -0,0 +1,102 @@
|
||||
// Package dedupe implements a duplicate URL deduplication mechanism
|
||||
// for Nuclei DAST or Fuzzing inputs.
|
||||
//
|
||||
// It is used to remove similar or non-relevant inputs from fuzzing
|
||||
// or DAST scans to reduce the number of requests made.
|
||||
package dedupe
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
mapsutil "github.com/projectdiscovery/utils/maps"
|
||||
)
|
||||
|
||||
// FuzzingDeduper is a deduper for fuzzing inputs
|
||||
//
|
||||
// The normalization works as follows:
|
||||
//
|
||||
// - The path is normalized to remove any trailing slashes
|
||||
// - The query is normalized by templating the query parameters with their names
|
||||
// TODO: Doesn't handle different values, everything is stripped. Maybe make it more flexible?
|
||||
// - Numeric IDs in the path are replaced with {numeric_id}
|
||||
//
|
||||
// This allows us to deduplicate URLs with different query parameters
|
||||
// or orders but the same structure or key names.
|
||||
type FuzzingDeduper struct {
|
||||
items *mapsutil.SyncLockMap[string, struct{}]
|
||||
}
|
||||
|
||||
// NewFuzzingDeduper creates a new fuzzing deduper
|
||||
func NewFuzzingDeduper() *FuzzingDeduper {
|
||||
return &FuzzingDeduper{
|
||||
items: mapsutil.NewSyncLockMap[string, struct{}](),
|
||||
}
|
||||
}
|
||||
|
||||
// Add adds a new URL to the deduper
|
||||
func (d *FuzzingDeduper) Add(URL string) bool {
|
||||
generatedPattern, err := generatePattern(URL)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
_, found := d.items.Get(generatedPattern)
|
||||
if found {
|
||||
return false
|
||||
}
|
||||
d.items.Set(generatedPattern, struct{}{})
|
||||
return true
|
||||
}
|
||||
|
||||
func generatePattern(urlStr string) (string, error) {
|
||||
parsedURL, err := url.ParseRequestURI(urlStr)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
path := normalizePath(parsedURL.Path)
|
||||
query := extractQuery(parsedURL.Query())
|
||||
|
||||
var builder strings.Builder
|
||||
builder.Grow(len(urlStr))
|
||||
builder.WriteString(parsedURL.Scheme)
|
||||
builder.WriteString("://")
|
||||
builder.WriteString(parsedURL.Host)
|
||||
builder.WriteString(path)
|
||||
if query != "" {
|
||||
builder.WriteString("?")
|
||||
builder.WriteString(query)
|
||||
}
|
||||
pattern := builder.String()
|
||||
return pattern, nil
|
||||
}
|
||||
|
||||
var (
|
||||
numericIDPathRegex = regexp.MustCompile(`/(\d+)(?:/|$)`)
|
||||
)
|
||||
|
||||
func normalizePath(path string) string {
|
||||
subMatches := numericIDPathRegex.FindAllStringSubmatch(path, -1)
|
||||
for _, match := range subMatches {
|
||||
path = strings.ReplaceAll(path, match[0], "/{numeric_id}")
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
func extractQuery(query url.Values) string {
|
||||
normalizedParams := make([]string, 0, len(query))
|
||||
|
||||
for k, v := range query {
|
||||
if len(v) == 0 {
|
||||
normalizedParams = append(normalizedParams, k)
|
||||
} else {
|
||||
normalizedParams = append(normalizedParams, fmt.Sprintf("%s={%s}", k, k))
|
||||
}
|
||||
}
|
||||
slices.Sort(normalizedParams)
|
||||
return strings.Join(normalizedParams, "&")
|
||||
}
|
||||
137
pkg/input/provider/dedupe/dedupe_test.go
Normal file
137
pkg/input/provider/dedupe/dedupe_test.go
Normal file
@ -0,0 +1,137 @@
|
||||
package dedupe
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestFuzzingDeduper(t *testing.T) {
|
||||
t.Run("Basic URL Deduplication", func(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
urls []string
|
||||
expected []bool
|
||||
}{
|
||||
{
|
||||
name: "Simple unique URLs",
|
||||
urls: []string{"http://example.com/page1", "http://example.com/page2"},
|
||||
expected: []bool{true, true},
|
||||
},
|
||||
{
|
||||
name: "Duplicate URLs",
|
||||
urls: []string{"http://example.com/page1", "http://example.com/page1"},
|
||||
expected: []bool{true, false},
|
||||
},
|
||||
{
|
||||
name: "URLs with different query param values",
|
||||
urls: []string{"http://example.com/page?id=1", "http://example.com/page?id=2"},
|
||||
expected: []bool{true, false},
|
||||
},
|
||||
{
|
||||
name: "URLs with different query param orders",
|
||||
urls: []string{"http://example.com/page?a=1&b=2", "http://example.com/page?b=2&a=1"},
|
||||
expected: []bool{true, false},
|
||||
},
|
||||
{
|
||||
name: "URLs with and without trailing slash",
|
||||
urls: []string{"http://example.com/page/", "http://example.com/page"},
|
||||
expected: []bool{true, true},
|
||||
},
|
||||
{
|
||||
name: "URLs with different schemes",
|
||||
urls: []string{"http://example.com", "https://example.com"},
|
||||
expected: []bool{true, true},
|
||||
},
|
||||
{
|
||||
name: "URLs with query params and without",
|
||||
urls: []string{"http://example.com/page", "http://example.com/page?param=value"},
|
||||
expected: []bool{true, true},
|
||||
},
|
||||
{
|
||||
name: "Invalid URLs",
|
||||
urls: []string{"http://example.com/page", "not a valid url"},
|
||||
expected: []bool{true, false},
|
||||
},
|
||||
{
|
||||
name: "URLs with empty query params",
|
||||
urls: []string{"http://example.com/page?param1=¶m2=", "http://example.com/page?param2=¶m1="},
|
||||
expected: []bool{true, false},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
deduper := NewFuzzingDeduper()
|
||||
for i, url := range tt.urls {
|
||||
result := deduper.Add(url)
|
||||
require.Equal(t, tt.expected[i], result, "Add(%q) = %v, want %v", url, result, tt.expected[i])
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Large Set Deduplication", func(t *testing.T) {
|
||||
deduper := NewFuzzingDeduper()
|
||||
baseURL := "http://example.com/page?id=%d¶m=%s"
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
url := fmt.Sprintf(baseURL, i, "value")
|
||||
result := deduper.Add(url)
|
||||
if i == 0 {
|
||||
require.True(t, result, "First URL should be added")
|
||||
} else {
|
||||
require.False(t, result, "Duplicate URL pattern should not be added: %s", url)
|
||||
}
|
||||
}
|
||||
|
||||
allItems := deduper.items.GetAll()
|
||||
require.Len(t, allItems, 1, "Expected 1 unique URL pattern, got %d", len(allItems))
|
||||
})
|
||||
|
||||
t.Run("Path Parameters", func(t *testing.T) {
|
||||
deduper := NewFuzzingDeduper()
|
||||
|
||||
require.True(t, deduper.Add("https://example.com/page/1337"))
|
||||
require.False(t, deduper.Add("https://example.com/page/1332"))
|
||||
})
|
||||
|
||||
t.Run("TestPHP Vulnweb URLs", func(t *testing.T) {
|
||||
urls := []string{
|
||||
"http://testphp.vulnweb.com/hpp/?pp=12",
|
||||
"http://testphp.vulnweb.com/hpp/params.php?p=valid&pp=12",
|
||||
"http://testphp.vulnweb.com/artists.php?artist=3",
|
||||
"http://testphp.vulnweb.com/artists.php?artist=1",
|
||||
"http://testphp.vulnweb.com/artists.php?artist=2",
|
||||
"http://testphp.vulnweb.com/listproducts.php?artist=3",
|
||||
"http://testphp.vulnweb.com/listproducts.php?cat=4",
|
||||
"http://testphp.vulnweb.com/listproducts.php?cat=3",
|
||||
"http://testphp.vulnweb.com/listproducts.php?cat=2",
|
||||
"http://testphp.vulnweb.com/listproducts.php?artist=2",
|
||||
"http://testphp.vulnweb.com/listproducts.php?artist=1",
|
||||
"http://testphp.vulnweb.com/listproducts.php?cat=1",
|
||||
"http://testphp.vulnweb.com/showimage.php?file=./pictures/6.jpg",
|
||||
"http://testphp.vulnweb.com/product.php?pic=6",
|
||||
"http://testphp.vulnweb.com/showimage.php?file=./pictures/6.jpg&size=160",
|
||||
}
|
||||
|
||||
expectedUnique := 8
|
||||
|
||||
deduper := NewFuzzingDeduper()
|
||||
uniqueCount := 0
|
||||
|
||||
for _, url := range urls {
|
||||
if deduper.Add(url) {
|
||||
uniqueCount++
|
||||
}
|
||||
}
|
||||
|
||||
require.Equal(t, expectedUnique, uniqueCount, "Expected %d unique URLs, but got %d", expectedUnique, uniqueCount)
|
||||
|
||||
// Test for duplicates
|
||||
for _, url := range urls {
|
||||
require.False(t, deduper.Add(url), "URL should have been identified as duplicate: %s", url)
|
||||
}
|
||||
})
|
||||
}
|
||||
@ -19,6 +19,7 @@ import (
|
||||
"github.com/projectdiscovery/hmap/filekv"
|
||||
"github.com/projectdiscovery/hmap/store/hybrid"
|
||||
"github.com/projectdiscovery/mapcidr/asn"
|
||||
"github.com/projectdiscovery/nuclei/v3/pkg/input/provider/dedupe"
|
||||
providerTypes "github.com/projectdiscovery/nuclei/v3/pkg/input/types"
|
||||
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs"
|
||||
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/protocolstate"
|
||||
@ -48,6 +49,8 @@ type ListInputProvider struct {
|
||||
hostMapStream *filekv.FileDB
|
||||
hostMapStreamOnce sync.Once
|
||||
sync.Once
|
||||
|
||||
fuzzDeduper *dedupe.FuzzingDeduper
|
||||
}
|
||||
|
||||
// Options is a wrapper around types.Options structure
|
||||
@ -78,6 +81,9 @@ func New(opts *Options) (*ListInputProvider, error) {
|
||||
},
|
||||
excludedHosts: make(map[string]struct{}),
|
||||
}
|
||||
if options.FuzzingDedupe {
|
||||
input.fuzzDeduper = dedupe.NewFuzzingDeduper()
|
||||
}
|
||||
if options.Stream {
|
||||
fkvOptions := filekv.DefaultOptions
|
||||
fkvOptions.MaxItems = DefaultMaxDedupeItemsCount
|
||||
@ -472,6 +478,12 @@ func (i *ListInputProvider) setItem(metaInput *contextargs.MetaInput) {
|
||||
}
|
||||
|
||||
i.inputCount++ // tracks target count
|
||||
if i.fuzzDeduper != nil {
|
||||
if !i.fuzzDeduper.Add(metaInput.Target()) {
|
||||
gologger.Verbose().Msgf("Ignoring duplicate fuzzing target: %s\n", metaInput.Target())
|
||||
return
|
||||
}
|
||||
}
|
||||
_ = i.hostMap.Set(key, nil)
|
||||
if i.hostMapStream != nil {
|
||||
i.setHostMapStream(key)
|
||||
|
||||
@ -276,6 +276,8 @@ type Options struct {
|
||||
StoreResponseDir string
|
||||
// DisableRedirects disables following redirects for http request module
|
||||
DisableRedirects bool
|
||||
// FuzzingDedupe enables deduplication of input URLs for fuzzing
|
||||
FuzzingDedupe bool
|
||||
// SNI custom hostname
|
||||
SNI string
|
||||
// InputFileMode specifies the mode of input file (jsonl, burp, openapi, swagger, etc)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user