mirror of
https://github.com/projectdiscovery/nuclei.git
synced 2025-12-17 21:55:26 +00:00
feat: added initial fuzz input url deduplication implementation
This commit is contained in:
parent
042b33de3d
commit
fa6cac181e
@ -328,6 +328,7 @@ on extensive configurability, massive extensibility and ease of use.`)
|
|||||||
flagSet.BoolVar(&fuzzFlag, "fuzz", false, "enable loading fuzzing templates (Deprecated: use -dast instead)"),
|
flagSet.BoolVar(&fuzzFlag, "fuzz", false, "enable loading fuzzing templates (Deprecated: use -dast instead)"),
|
||||||
flagSet.BoolVar(&options.DAST, "dast", false, "enable / run dast (fuzz) nuclei templates"),
|
flagSet.BoolVar(&options.DAST, "dast", false, "enable / run dast (fuzz) nuclei templates"),
|
||||||
flagSet.BoolVarP(&options.DisplayFuzzPoints, "display-fuzz-points", "dfp", false, "display fuzz points in the output for debugging"),
|
flagSet.BoolVarP(&options.DisplayFuzzPoints, "display-fuzz-points", "dfp", false, "display fuzz points in the output for debugging"),
|
||||||
|
flagSet.BoolVarP(&options.FuzzingDedupe, "fuzzing-dedupe", "fd", false, "deduplicate fuzzing url inputs"),
|
||||||
flagSet.IntVar(&options.FuzzParamFrequency, "fuzz-param-frequency", 10, "frequency of uninteresting parameters for fuzzing before skipping"),
|
flagSet.IntVar(&options.FuzzParamFrequency, "fuzz-param-frequency", 10, "frequency of uninteresting parameters for fuzzing before skipping"),
|
||||||
flagSet.StringVarP(&options.FuzzAggressionLevel, "fuzz-aggression", "fa", "low", "fuzzing aggression level controls payload count for fuzz (low, medium, high)"),
|
flagSet.StringVarP(&options.FuzzAggressionLevel, "fuzz-aggression", "fa", "low", "fuzzing aggression level controls payload count for fuzz (low, medium, high)"),
|
||||||
)
|
)
|
||||||
|
|||||||
@ -152,6 +152,9 @@ func ValidateOptions(options *types.Options) error {
|
|||||||
if options.Verbose && options.Silent {
|
if options.Verbose && options.Silent {
|
||||||
return errors.New("both verbose and silent mode specified")
|
return errors.New("both verbose and silent mode specified")
|
||||||
}
|
}
|
||||||
|
if options.FuzzingDedupe && options.Stream {
|
||||||
|
return errors.New("both fuzzing dedupe and stream mode specified")
|
||||||
|
}
|
||||||
|
|
||||||
if (options.HeadlessOptionalArguments != nil || options.ShowBrowser || options.UseInstalledChrome) && !options.Headless {
|
if (options.HeadlessOptionalArguments != nil || options.ShowBrowser || options.UseInstalledChrome) && !options.Headless {
|
||||||
return errors.New("headless mode (-headless) is required if -ho, -sb, -sc or -lha are set")
|
return errors.New("headless mode (-headless) is required if -ho, -sb, -sc or -lha are set")
|
||||||
|
|||||||
102
pkg/input/provider/dedupe/dedupe.go
Normal file
102
pkg/input/provider/dedupe/dedupe.go
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
// Package dedupe implements a duplicate URL deduplication mechanism
|
||||||
|
// for Nuclei DAST or Fuzzing inputs.
|
||||||
|
//
|
||||||
|
// It is used to remove similar or non-relevant inputs from fuzzing
|
||||||
|
// or DAST scans to reduce the number of requests made.
|
||||||
|
package dedupe
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/url"
|
||||||
|
"regexp"
|
||||||
|
"slices"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
mapsutil "github.com/projectdiscovery/utils/maps"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FuzzingDeduper is a deduper for fuzzing inputs
|
||||||
|
//
|
||||||
|
// The normalization works as follows:
|
||||||
|
//
|
||||||
|
// - The path is normalized to remove any trailing slashes
|
||||||
|
// - The query is normalized by templating the query parameters with their names
|
||||||
|
// TODO: Doesn't handle different values, everything is stripped. Maybe make it more flexible?
|
||||||
|
// - Numeric IDs in the path are replaced with {numeric_id}
|
||||||
|
//
|
||||||
|
// This allows us to deduplicate URLs with different query parameters
|
||||||
|
// or orders but the same structure or key names.
|
||||||
|
type FuzzingDeduper struct {
|
||||||
|
items *mapsutil.SyncLockMap[string, struct{}]
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFuzzingDeduper creates a new fuzzing deduper
|
||||||
|
func NewFuzzingDeduper() *FuzzingDeduper {
|
||||||
|
return &FuzzingDeduper{
|
||||||
|
items: mapsutil.NewSyncLockMap[string, struct{}](),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add adds a new URL to the deduper
|
||||||
|
func (d *FuzzingDeduper) Add(URL string) bool {
|
||||||
|
generatedPattern, err := generatePattern(URL)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
_, found := d.items.Get(generatedPattern)
|
||||||
|
if found {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
d.items.Set(generatedPattern, struct{}{})
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func generatePattern(urlStr string) (string, error) {
|
||||||
|
parsedURL, err := url.ParseRequestURI(urlStr)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
path := normalizePath(parsedURL.Path)
|
||||||
|
query := extractQuery(parsedURL.Query())
|
||||||
|
|
||||||
|
var builder strings.Builder
|
||||||
|
builder.Grow(len(urlStr))
|
||||||
|
builder.WriteString(parsedURL.Scheme)
|
||||||
|
builder.WriteString("://")
|
||||||
|
builder.WriteString(parsedURL.Host)
|
||||||
|
builder.WriteString(path)
|
||||||
|
if query != "" {
|
||||||
|
builder.WriteString("?")
|
||||||
|
builder.WriteString(query)
|
||||||
|
}
|
||||||
|
pattern := builder.String()
|
||||||
|
return pattern, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
numericIDPathRegex = regexp.MustCompile(`/(\d+)(?:/|$)`)
|
||||||
|
)
|
||||||
|
|
||||||
|
func normalizePath(path string) string {
|
||||||
|
subMatches := numericIDPathRegex.FindAllStringSubmatch(path, -1)
|
||||||
|
for _, match := range subMatches {
|
||||||
|
path = strings.ReplaceAll(path, match[0], "/{numeric_id}")
|
||||||
|
}
|
||||||
|
return path
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractQuery(query url.Values) string {
|
||||||
|
normalizedParams := make([]string, 0, len(query))
|
||||||
|
|
||||||
|
for k, v := range query {
|
||||||
|
if len(v) == 0 {
|
||||||
|
normalizedParams = append(normalizedParams, k)
|
||||||
|
} else {
|
||||||
|
normalizedParams = append(normalizedParams, fmt.Sprintf("%s={%s}", k, k))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slices.Sort(normalizedParams)
|
||||||
|
return strings.Join(normalizedParams, "&")
|
||||||
|
}
|
||||||
137
pkg/input/provider/dedupe/dedupe_test.go
Normal file
137
pkg/input/provider/dedupe/dedupe_test.go
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
package dedupe
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFuzzingDeduper(t *testing.T) {
|
||||||
|
t.Run("Basic URL Deduplication", func(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
urls []string
|
||||||
|
expected []bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Simple unique URLs",
|
||||||
|
urls: []string{"http://example.com/page1", "http://example.com/page2"},
|
||||||
|
expected: []bool{true, true},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Duplicate URLs",
|
||||||
|
urls: []string{"http://example.com/page1", "http://example.com/page1"},
|
||||||
|
expected: []bool{true, false},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "URLs with different query param values",
|
||||||
|
urls: []string{"http://example.com/page?id=1", "http://example.com/page?id=2"},
|
||||||
|
expected: []bool{true, false},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "URLs with different query param orders",
|
||||||
|
urls: []string{"http://example.com/page?a=1&b=2", "http://example.com/page?b=2&a=1"},
|
||||||
|
expected: []bool{true, false},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "URLs with and without trailing slash",
|
||||||
|
urls: []string{"http://example.com/page/", "http://example.com/page"},
|
||||||
|
expected: []bool{true, true},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "URLs with different schemes",
|
||||||
|
urls: []string{"http://example.com", "https://example.com"},
|
||||||
|
expected: []bool{true, true},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "URLs with query params and without",
|
||||||
|
urls: []string{"http://example.com/page", "http://example.com/page?param=value"},
|
||||||
|
expected: []bool{true, true},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Invalid URLs",
|
||||||
|
urls: []string{"http://example.com/page", "not a valid url"},
|
||||||
|
expected: []bool{true, false},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "URLs with empty query params",
|
||||||
|
urls: []string{"http://example.com/page?param1=¶m2=", "http://example.com/page?param2=¶m1="},
|
||||||
|
expected: []bool{true, false},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
deduper := NewFuzzingDeduper()
|
||||||
|
for i, url := range tt.urls {
|
||||||
|
result := deduper.Add(url)
|
||||||
|
require.Equal(t, tt.expected[i], result, "Add(%q) = %v, want %v", url, result, tt.expected[i])
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Large Set Deduplication", func(t *testing.T) {
|
||||||
|
deduper := NewFuzzingDeduper()
|
||||||
|
baseURL := "http://example.com/page?id=%d¶m=%s"
|
||||||
|
|
||||||
|
for i := 0; i < 1000; i++ {
|
||||||
|
url := fmt.Sprintf(baseURL, i, "value")
|
||||||
|
result := deduper.Add(url)
|
||||||
|
if i == 0 {
|
||||||
|
require.True(t, result, "First URL should be added")
|
||||||
|
} else {
|
||||||
|
require.False(t, result, "Duplicate URL pattern should not be added: %s", url)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
allItems := deduper.items.GetAll()
|
||||||
|
require.Len(t, allItems, 1, "Expected 1 unique URL pattern, got %d", len(allItems))
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Path Parameters", func(t *testing.T) {
|
||||||
|
deduper := NewFuzzingDeduper()
|
||||||
|
|
||||||
|
require.True(t, deduper.Add("https://example.com/page/1337"))
|
||||||
|
require.False(t, deduper.Add("https://example.com/page/1332"))
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("TestPHP Vulnweb URLs", func(t *testing.T) {
|
||||||
|
urls := []string{
|
||||||
|
"http://testphp.vulnweb.com/hpp/?pp=12",
|
||||||
|
"http://testphp.vulnweb.com/hpp/params.php?p=valid&pp=12",
|
||||||
|
"http://testphp.vulnweb.com/artists.php?artist=3",
|
||||||
|
"http://testphp.vulnweb.com/artists.php?artist=1",
|
||||||
|
"http://testphp.vulnweb.com/artists.php?artist=2",
|
||||||
|
"http://testphp.vulnweb.com/listproducts.php?artist=3",
|
||||||
|
"http://testphp.vulnweb.com/listproducts.php?cat=4",
|
||||||
|
"http://testphp.vulnweb.com/listproducts.php?cat=3",
|
||||||
|
"http://testphp.vulnweb.com/listproducts.php?cat=2",
|
||||||
|
"http://testphp.vulnweb.com/listproducts.php?artist=2",
|
||||||
|
"http://testphp.vulnweb.com/listproducts.php?artist=1",
|
||||||
|
"http://testphp.vulnweb.com/listproducts.php?cat=1",
|
||||||
|
"http://testphp.vulnweb.com/showimage.php?file=./pictures/6.jpg",
|
||||||
|
"http://testphp.vulnweb.com/product.php?pic=6",
|
||||||
|
"http://testphp.vulnweb.com/showimage.php?file=./pictures/6.jpg&size=160",
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedUnique := 8
|
||||||
|
|
||||||
|
deduper := NewFuzzingDeduper()
|
||||||
|
uniqueCount := 0
|
||||||
|
|
||||||
|
for _, url := range urls {
|
||||||
|
if deduper.Add(url) {
|
||||||
|
uniqueCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
require.Equal(t, expectedUnique, uniqueCount, "Expected %d unique URLs, but got %d", expectedUnique, uniqueCount)
|
||||||
|
|
||||||
|
// Test for duplicates
|
||||||
|
for _, url := range urls {
|
||||||
|
require.False(t, deduper.Add(url), "URL should have been identified as duplicate: %s", url)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
@ -19,6 +19,7 @@ import (
|
|||||||
"github.com/projectdiscovery/hmap/filekv"
|
"github.com/projectdiscovery/hmap/filekv"
|
||||||
"github.com/projectdiscovery/hmap/store/hybrid"
|
"github.com/projectdiscovery/hmap/store/hybrid"
|
||||||
"github.com/projectdiscovery/mapcidr/asn"
|
"github.com/projectdiscovery/mapcidr/asn"
|
||||||
|
"github.com/projectdiscovery/nuclei/v3/pkg/input/provider/dedupe"
|
||||||
providerTypes "github.com/projectdiscovery/nuclei/v3/pkg/input/types"
|
providerTypes "github.com/projectdiscovery/nuclei/v3/pkg/input/types"
|
||||||
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs"
|
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs"
|
||||||
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/protocolstate"
|
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/protocolstate"
|
||||||
@ -48,6 +49,8 @@ type ListInputProvider struct {
|
|||||||
hostMapStream *filekv.FileDB
|
hostMapStream *filekv.FileDB
|
||||||
hostMapStreamOnce sync.Once
|
hostMapStreamOnce sync.Once
|
||||||
sync.Once
|
sync.Once
|
||||||
|
|
||||||
|
fuzzDeduper *dedupe.FuzzingDeduper
|
||||||
}
|
}
|
||||||
|
|
||||||
// Options is a wrapper around types.Options structure
|
// Options is a wrapper around types.Options structure
|
||||||
@ -78,6 +81,9 @@ func New(opts *Options) (*ListInputProvider, error) {
|
|||||||
},
|
},
|
||||||
excludedHosts: make(map[string]struct{}),
|
excludedHosts: make(map[string]struct{}),
|
||||||
}
|
}
|
||||||
|
if options.FuzzingDedupe {
|
||||||
|
input.fuzzDeduper = dedupe.NewFuzzingDeduper()
|
||||||
|
}
|
||||||
if options.Stream {
|
if options.Stream {
|
||||||
fkvOptions := filekv.DefaultOptions
|
fkvOptions := filekv.DefaultOptions
|
||||||
fkvOptions.MaxItems = DefaultMaxDedupeItemsCount
|
fkvOptions.MaxItems = DefaultMaxDedupeItemsCount
|
||||||
@ -472,6 +478,12 @@ func (i *ListInputProvider) setItem(metaInput *contextargs.MetaInput) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
i.inputCount++ // tracks target count
|
i.inputCount++ // tracks target count
|
||||||
|
if i.fuzzDeduper != nil {
|
||||||
|
if !i.fuzzDeduper.Add(metaInput.Target()) {
|
||||||
|
gologger.Verbose().Msgf("Ignoring duplicate fuzzing target: %s\n", metaInput.Target())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
_ = i.hostMap.Set(key, nil)
|
_ = i.hostMap.Set(key, nil)
|
||||||
if i.hostMapStream != nil {
|
if i.hostMapStream != nil {
|
||||||
i.setHostMapStream(key)
|
i.setHostMapStream(key)
|
||||||
|
|||||||
@ -276,6 +276,8 @@ type Options struct {
|
|||||||
StoreResponseDir string
|
StoreResponseDir string
|
||||||
// DisableRedirects disables following redirects for http request module
|
// DisableRedirects disables following redirects for http request module
|
||||||
DisableRedirects bool
|
DisableRedirects bool
|
||||||
|
// FuzzingDedupe enables deduplication of input URLs for fuzzing
|
||||||
|
FuzzingDedupe bool
|
||||||
// SNI custom hostname
|
// SNI custom hostname
|
||||||
SNI string
|
SNI string
|
||||||
// InputFileMode specifies the mode of input file (jsonl, burp, openapi, swagger, etc)
|
// InputFileMode specifies the mode of input file (jsonl, burp, openapi, swagger, etc)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user