Added probing for URL + input based on protocol (#2614)

* Added workflow names based condition

* Added conditional filtering to workflow executor

* Replaced names with single name stringslice

* Added probing for URL + input based on protocol

* Remove debug comments

* Fixed typo

* Fixed failing tests

* Fixed workflow matcher condition + tests

* Fixed workflow item name

* Switch to if-else

* Fixed review comment strict

* Increase bulk size

* Added default port for SSL protocol + misc changes

* Fixed failing tests

* Fixed misc changes to executer

* Fixed failing self-contained and offlinehttp tests

* Fixed atomic increment operation

* misc update

* Fixed failing builds

Co-authored-by: sandeep <8293321+ehsandeep@users.noreply.github.com>
This commit is contained in:
Ice3man 2022-10-20 17:23:00 +05:30 committed by GitHub
parent 0149e94f90
commit 363ffb75db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 395 additions and 20 deletions

View File

@ -230,7 +230,8 @@ on extensive configurability, massive extensibility and ease of use.`)
flagSet.BoolVarP(&options.StopAtFirstMatch, "stop-at-first-path", "spm", false, "stop processing HTTP requests after the first match (may break template/workflow logic)"),
flagSet.BoolVar(&options.Stream, "stream", false, "stream mode - start elaborating without sorting the input"),
flagSet.DurationVarP(&options.InputReadTimeout, "input-read-timeout", "irt", time.Duration(3*time.Minute), "timeout on input read"),
flagSet.BoolVar(&options.DisableStdin, "no-stdin", false, "Disable Stdin processing"),
flagSet.BoolVarP(&options.DisableHTTPProbe, "no-httpx", "nh", false, "disable httpx probing for non-url input"),
flagSet.BoolVar(&options.DisableStdin, "no-stdin", false, "disable stdin processing"),
)
flagSet.CreateGroup("headless", "Headless",

View File

@ -33,8 +33,9 @@ func (r *Runner) runCloudEnumeration(store *loader.Store) (*atomic.Bool, error)
results := &atomic.Bool{}
targets := make([]string, 0, r.hmapInputProvider.Count())
r.hmapInputProvider.Scan(func(value string) {
r.hmapInputProvider.Scan(func(value string) bool {
targets = append(targets, value)
return true
})
templates := make([]string, 0, len(store.Templates()))
for _, template := range store.Templates() {

View File

@ -0,0 +1,84 @@
package runner
import (
"fmt"
"io"
"strings"
"sync/atomic"
"github.com/pkg/errors"
"github.com/projectdiscovery/gologger"
"github.com/projectdiscovery/hmap/store/hybrid"
"github.com/projectdiscovery/nuclei/v2/pkg/protocols/http/httpclientpool"
"github.com/projectdiscovery/retryablehttp-go"
"github.com/remeh/sizedwaitgroup"
)
const probeBulkSize = 50
// initializeTemplatesHTTPInput initializes the http form of input
// for any loaded http templates if input is in non-standard format.
func (r *Runner) initializeTemplatesHTTPInput() (*hybrid.HybridMap, error) {
hm, err := hybrid.New(hybrid.DefaultDiskOptions)
if err != nil {
return nil, errors.Wrap(err, "could not create temporary input file")
}
httpclient, err := httpclientpool.Get(r.options, &httpclientpool.Configuration{})
if err != nil {
return nil, errors.Wrap(err, "could not get http client")
}
gologger.Info().Msgf("Running httpx on input to execute http based template")
var bulkSize = probeBulkSize
if r.options.BulkSize > probeBulkSize {
bulkSize = r.options.BulkSize
}
// Probe the non-standard URLs and store them in cache
swg := sizedwaitgroup.New(bulkSize)
count := int32(0)
r.hmapInputProvider.Scan(func(value string) bool {
if strings.HasPrefix(value, "http://") || strings.HasPrefix(value, "https://") {
return true
}
swg.Add()
go func(input string) {
defer swg.Done()
if result := probeURL(input, httpclient); result != "" {
atomic.AddInt32(&count, 1)
_ = hm.Set(input, []byte(result))
}
}(value)
return true
})
swg.Wait()
gologger.Info().Msgf("Discovered %d URL from input", atomic.LoadInt32(&count))
return hm, nil
}
var (
drainReqSize = int64(8 * 1024)
httpSchemes = []string{"https", "http"}
)
// probeURL probes the scheme for a URL. first HTTPS is tried
// and if any errors occur http is tried. If none succeeds, probing
// is abandoned for such URLs.
func probeURL(input string, httpclient *retryablehttp.Client) string {
for _, scheme := range httpSchemes {
formedURL := fmt.Sprintf("%s://%s", scheme, input)
resp, err := httpclient.Get(formedURL)
if resp != nil {
_, _ = io.CopyN(io.Discard, resp.Body, drainReqSize)
resp.Body.Close()
}
if err != nil {
continue
}
return formedURL
}
return ""
}

View File

@ -28,6 +28,7 @@ import (
"github.com/projectdiscovery/nuclei/v2/pkg/catalog/loader"
"github.com/projectdiscovery/nuclei/v2/pkg/core"
"github.com/projectdiscovery/nuclei/v2/pkg/core/inputs/hybrid"
"github.com/projectdiscovery/nuclei/v2/pkg/input"
"github.com/projectdiscovery/nuclei/v2/pkg/output"
"github.com/projectdiscovery/nuclei/v2/pkg/parsers"
"github.com/projectdiscovery/nuclei/v2/pkg/progress"
@ -366,6 +367,7 @@ func (r *Runner) RunEnumeration() error {
Colorizer: r.colorizer,
ResumeCfg: r.resumeCfg,
ExcludeMatchers: excludematchers.New(r.options.ExcludeMatchers),
InputHelper: input.NewHelper(),
}
engine := core.New(r.options)
engine.SetExecuterOptions(executerOpts)
@ -405,8 +407,18 @@ func (r *Runner) RunEnumeration() error {
}
r.displayExecutionInfo(store)
var results *atomic.Bool
// If not explicitly disabled, check if http based protocols
// are used and if inputs are non-http to pre-perform probing
// of urls and storing them for execution.
if !r.options.DisableHTTPProbe && loader.IsHTTPBasedProtocolUsed(store) && r.isInputNonHTTP() {
inputHelpers, err := r.initializeTemplatesHTTPInput()
if err != nil {
return errors.Wrap(err, "could not probe http input")
}
executerOpts.InputHelper.InputsHTTP = inputHelpers
}
var results *atomic.Bool
if r.options.Cloud {
gologger.Info().Msgf("Running scan on cloud with URL %s", r.options.CloudURL)
results, err = r.runCloudEnumeration(store)
@ -422,6 +434,9 @@ func (r *Runner) RunEnumeration() error {
}
r.progress.Stop()
if executerOpts.InputHelper != nil {
_ = executerOpts.InputHelper.Close()
}
if r.issuesClient != nil {
r.issuesClient.Close()
}
@ -435,6 +450,18 @@ func (r *Runner) RunEnumeration() error {
return err
}
func (r *Runner) isInputNonHTTP() bool {
var nonURLInput bool
r.hmapInputProvider.Scan(func(value string) bool {
if !strings.Contains(value, "://") {
nonURLInput = true
return false
}
return true
})
return nonURLInput
}
func (r *Runner) executeSmartWorkflowInput(executerOpts protocols.ExecuterOptions, store *loader.Store, engine *core.Engine) (*atomic.Bool, error) {
r.progress.Init(r.hmapInputProvider.Count(), 0, 0)

View File

@ -337,3 +337,42 @@ func (store *Store) LoadTemplatesWithTags(templatesList, tags []string) []*templ
}
return loadedTemplates
}
// IsHTTPBasedProtocolUsed returns true if http/headless protocol is being used for
// any templates.
func IsHTTPBasedProtocolUsed(store *Store) bool {
templates := store.Templates()
for _, template := range templates {
if len(template.RequestsHTTP) > 0 || len(template.RequestsHeadless) > 0 {
return true
}
if len(template.Workflows) > 0 {
if workflowContainsProtocol(template.Workflows) {
return true
}
}
}
return false
}
func workflowContainsProtocol(workflow []*workflows.WorkflowTemplate) bool {
for _, workflow := range workflow {
for _, template := range workflow.Matchers {
if workflowContainsProtocol(template.Subtemplates) {
return true
}
}
for _, template := range workflow.Subtemplates {
if workflowContainsProtocol(template.Subtemplates) {
return true
}
}
for _, executer := range workflow.Executers {
if executer.TemplateType == templateTypes.HTTPProtocol || executer.TemplateType == templateTypes.HeadlessProtocol {
return true
}
}
}
return false
}

View File

@ -29,7 +29,7 @@ type InputProvider interface {
Count() int64
// Scan iterates the input and each found item is passed to the
// callback consumer.
Scan(callback func(value string))
Scan(callback func(value string) bool)
}
// New returns a new Engine instance

View File

@ -98,7 +98,7 @@ func (e *Engine) executeModelWithInput(templateType types.ProtocolType, template
currentInfo.Unlock()
}
target.Scan(func(scannedValue string) {
target.Scan(func(scannedValue string) bool {
// Best effort to track the host progression
// skips indexes lower than the minimum in-flight at interruption time
var skip bool
@ -123,7 +123,7 @@ func (e *Engine) executeModelWithInput(templateType types.ProtocolType, template
// Skip if the host has had errors
if e.executerOpts.HostErrorsCache != nil && e.executerOpts.HostErrorsCache.Check(scannedValue) {
return
return true
}
wg.WaitGroup.Add()
@ -151,6 +151,7 @@ func (e *Engine) executeModelWithInput(templateType types.ProtocolType, template
}(index, skip, scannedValue)
index++
return true
})
wg.WaitGroup.Wait()
@ -187,10 +188,10 @@ func (e *Engine) ExecuteWithResults(templatesList []*templates.Template, target
func (e *Engine) executeModelWithInputAndResult(templateType types.ProtocolType, template *templates.Template, target InputProvider, results *atomic.Bool, callback func(*output.ResultEvent)) {
wg := e.workPool.InputPool(templateType)
target.Scan(func(scannedValue string) {
target.Scan(func(scannedValue string) bool {
// Skip if the host has had errors
if e.executerOpts.HostErrorsCache != nil && e.executerOpts.HostErrorsCache.Check(scannedValue) {
return
return true
}
wg.WaitGroup.Add()
@ -216,6 +217,7 @@ func (e *Engine) executeModelWithInputAndResult(templateType types.ProtocolType,
}
results.CompareAndSwap(false, match)
}(scannedValue)
return true
})
wg.WaitGroup.Wait()
}

View File

@ -142,9 +142,11 @@ func (i *Input) Count() int64 {
// Scan iterates the input and each found item is passed to the
// callback consumer.
func (i *Input) Scan(callback func(value string)) {
func (i *Input) Scan(callback func(value string) bool) {
callbackFunc := func(k, _ []byte) error {
callback(string(k))
if !callback(string(k)) {
return io.EOF
}
return nil
}
if i.hostMapStream != nil {

View File

@ -10,8 +10,10 @@ func (s *SimpleInputProvider) Count() int64 {
}
// Scan calls a callback function till the input provider is exhausted
func (s *SimpleInputProvider) Scan(callback func(value string)) {
func (s *SimpleInputProvider) Scan(callback func(value string) bool) {
for _, v := range s.Inputs {
callback(v)
if !callback(v) {
return
}
}
}

129
v2/pkg/input/input.go Normal file
View File

@ -0,0 +1,129 @@
package input
import (
"net"
"net/url"
"os"
"path/filepath"
"strings"
"github.com/projectdiscovery/hmap/store/hybrid"
templateTypes "github.com/projectdiscovery/nuclei/v2/pkg/templates/types"
)
// Helper is a structure for helping with input transformation
type Helper struct {
InputsHTTP *hybrid.HybridMap
}
// NewHelper returns a new inpt helper instance
func NewHelper() *Helper {
helper := &Helper{}
return helper
}
// Close closes the resources associated with input helper
func (h *Helper) Close() error {
var err error
if h.InputsHTTP != nil {
err = h.InputsHTTP.Close()
}
return err
}
// Transform transforms an input based on protocol type and returns
// appropriate input based on it.
func (h *Helper) Transform(input string, protocol templateTypes.ProtocolType) string {
switch protocol {
case templateTypes.DNSProtocol, templateTypes.WHOISProtocol:
return h.convertInputToType(input, inputTypeHost, "")
case templateTypes.FileProtocol, templateTypes.OfflineHTTPProtocol:
return h.convertInputToType(input, inputTypeFilepath, "")
case templateTypes.HTTPProtocol, templateTypes.HeadlessProtocol:
return h.convertInputToType(input, inputTypeURL, "")
case templateTypes.NetworkProtocol:
return h.convertInputToType(input, inputTypeHostPort, "")
case templateTypes.SSLProtocol:
return h.convertInputToType(input, inputTypeHostPort, "443")
case templateTypes.WebsocketProtocol:
return h.convertInputToType(input, inputTypeWebsocket, "")
}
return input
}
type inputType int
const (
inputTypeHost inputType = iota + 1
inputTypeURL
inputTypeFilepath
inputTypeHostPort
inputTypeWebsocket
)
// convertInputToType converts an input based on an inputType.
// Various formats are supported for inputs and their transformation
func (h *Helper) convertInputToType(input string, inputType inputType, defaultPort string) string {
notURL := !strings.Contains(input, "://")
parsed, _ := url.Parse(input)
var host, port string
if !notURL {
host, port, _ = net.SplitHostPort(parsed.Host)
} else {
host, port, _ = net.SplitHostPort(input)
}
if inputType == inputTypeFilepath {
if port != "" {
return ""
}
if filepath.IsAbs(input) {
return input
}
if absPath, _ := filepath.Abs(input); absPath != "" && fileOrFolderExists(absPath) {
return input
}
if _, err := filepath.Match(input, ""); err != filepath.ErrBadPattern && notURL {
return input
}
} else if inputType == inputTypeHost {
if host != "" {
return host
}
if !notURL {
return parsed.Hostname()
} else {
return input
}
} else if inputType == inputTypeURL {
if parsed != nil && (parsed.Scheme == "http" || parsed.Scheme == "https") {
return input
}
if h.InputsHTTP != nil {
if probed, ok := h.InputsHTTP.Get(input); ok {
return string(probed)
}
}
} else if inputType == inputTypeHostPort {
if host != "" && port != "" {
return net.JoinHostPort(host, port)
}
if parsed != nil && port == "" && parsed.Scheme == "https" {
return net.JoinHostPort(parsed.Host, "443")
}
if defaultPort != "" {
return net.JoinHostPort(input, defaultPort)
}
} else if inputType == inputTypeWebsocket {
if parsed != nil && (parsed.Scheme == "ws" || parsed.Scheme == "wss") {
return input
}
}
return ""
}
func fileOrFolderExists(filename string) bool {
_, err := os.Stat(filename)
return !os.IsNotExist(err)
}

View File

@ -0,0 +1,64 @@
package input
import (
"testing"
"github.com/projectdiscovery/hmap/store/hybrid"
"github.com/stretchr/testify/require"
)
func TestConvertInputToType(t *testing.T) {
helper := &Helper{}
hm, err := hybrid.New(hybrid.DefaultDiskOptions)
require.NoError(t, err, "could not create hybrid map")
helper.InputsHTTP = hm
defer hm.Close()
_ = hm.Set("google.com", []byte("https://google.com"))
tests := []struct {
input string
inputType inputType
result string
defaultPort string
}{
// host
{"google.com", inputTypeHost, "google.com", ""},
{"google.com:443", inputTypeHost, "google.com", ""},
{"https://google.com", inputTypeHost, "google.com", ""},
{"https://google.com:443", inputTypeHost, "google.com", ""},
// url
{"test.com", inputTypeURL, "", ""},
{"google.com", inputTypeURL, "https://google.com", ""},
{"https://google.com", inputTypeURL, "https://google.com", ""},
// file
{"google.com:443", inputTypeFilepath, "", ""},
{"https://google.com:443", inputTypeFilepath, "", ""},
{"/example/path", inputTypeFilepath, "/example/path", ""},
{"input_test.go", inputTypeFilepath, "input_test.go", ""},
{"../input", inputTypeFilepath, "../input", ""},
{"input_test.*", inputTypeFilepath, "input_test.*", ""},
// host-port
{"google.com", inputTypeHostPort, "", ""},
{"google.com:443", inputTypeHostPort, "google.com:443", ""},
{"https://google.com", inputTypeHostPort, "google.com:443", ""},
{"https://google.com:443", inputTypeHostPort, "google.com:443", ""},
// host-port with default port
{"google.com", inputTypeHostPort, "google.com:443", "443"},
// websocket
{"google.com", inputTypeWebsocket, "", ""},
{"google.com:443", inputTypeWebsocket, "", ""},
{"https://google.com:443", inputTypeWebsocket, "", ""},
{"wss://google.com", inputTypeWebsocket, "wss://google.com", ""},
}
for _, test := range tests {
result := helper.convertInputToType(test.input, test.inputType, test.defaultPort)
require.Equal(t, test.result, result, "could not get correct result %+v", test)
}
}

View File

@ -132,13 +132,14 @@ func (s *Service) executeWappalyzerTechDetection() error {
// Iterate through each target making http request and identifying fingerprints
inputPool := s.engine.WorkPool().InputPool(types.HTTPProtocol)
s.target.Scan(func(value string) {
s.target.Scan(func(value string) bool {
inputPool.WaitGroup.Add()
go func(input string) {
defer inputPool.WaitGroup.Done()
s.processWappalyzerInputPair(input)
}(value)
return true
})
inputPool.WaitGroup.Wait()
return nil

View File

@ -70,7 +70,14 @@ func (e *Executer) Execute(input *contextargs.Context) (bool, error) {
}
previous := make(map[string]interface{})
for _, req := range e.requests {
err := req.ExecuteWithResults(input, dynamicValues, previous, func(event *output.InternalWrappedEvent) {
inputItem := *input
if e.options.InputHelper != nil && input.Input != "" {
if inputItem.Input = e.options.InputHelper.Transform(input.Input, req.Type()); inputItem.Input == "" {
return false, nil
}
}
err := req.ExecuteWithResults(&inputItem, dynamicValues, previous, func(event *output.InternalWrappedEvent) {
ID := req.GetID()
if ID != "" {
builder := &strings.Builder{}
@ -127,7 +134,14 @@ func (e *Executer) ExecuteWithResults(input *contextargs.Context, callback proto
for _, req := range e.requests {
req := req
err := req.ExecuteWithResults(input, dynamicValues, previous, func(event *output.InternalWrappedEvent) {
inputItem := *input
if e.options.InputHelper != nil && input.Input != "" {
if inputItem.Input = e.options.InputHelper.Transform(input.Input, req.Type()); inputItem.Input == "" {
return nil
}
}
err := req.ExecuteWithResults(&inputItem, dynamicValues, previous, func(event *output.InternalWrappedEvent) {
ID := req.GetID()
if ID != "" {
builder := &strings.Builder{}

View File

@ -25,7 +25,7 @@ const maxSize = 5 * 1024 * 1024
// Type returns the type of the protocol request
func (request *Request) Type() templateTypes.ProtocolType {
return templateTypes.HTTPProtocol
return templateTypes.OfflineHTTPProtocol
}
// ExecuteWithResults executes the protocol requests and returns results instead of writing them.

View File

@ -6,6 +6,7 @@ import (
"github.com/logrusorgru/aurora"
"github.com/projectdiscovery/nuclei/v2/pkg/catalog"
"github.com/projectdiscovery/nuclei/v2/pkg/input"
"github.com/projectdiscovery/nuclei/v2/pkg/model"
"github.com/projectdiscovery/nuclei/v2/pkg/operators"
"github.com/projectdiscovery/nuclei/v2/pkg/operators/extractors"
@ -70,6 +71,8 @@ type ExecuterOptions struct {
Variables variables.Variable
// ExcludeMatchers is the list of matchers to exclude
ExcludeMatchers *excludematchers.ExcludeMatchers
// InputHelper is a helper for input normalization
InputHelper *input.Helper
Operators []*operators.Operators // only used by offlinehttp module

View File

@ -24,6 +24,7 @@ const (
FileProtocol
// name:http
HTTPProtocol
OfflineHTTPProtocol
// name:headless
HeadlessProtocol
// name:network

View File

@ -86,8 +86,9 @@ func parseWorkflowTemplate(workflow *workflows.WorkflowTemplate, preprocessor Pr
finalTemplates, _ := ClusterTemplates(workflowTemplates, options.Copy())
for _, template := range finalTemplates {
workflow.Executers = append(workflow.Executers, &workflows.ProtocolExecuterPair{
Executer: template.Executer,
Options: options,
Executer: template.Executer,
Options: options,
TemplateType: template.Type(),
})
}

View File

@ -161,6 +161,8 @@ type Options struct {
DebugRequests bool
// DebugResponse mode allows debugging response for the engine
DebugResponse bool
// DisableHTTPProbe disables http probing feature of input normalization
DisableHTTPProbe bool
// LeaveDefaultPorts skips normalization of default ports
LeaveDefaultPorts bool
// AutomaticScan enables automatic tech based template execution

View File

@ -6,6 +6,7 @@ import (
"github.com/projectdiscovery/nuclei/v2/pkg/model/types/stringslice"
"github.com/projectdiscovery/nuclei/v2/pkg/operators"
"github.com/projectdiscovery/nuclei/v2/pkg/protocols"
templateTypes "github.com/projectdiscovery/nuclei/v2/pkg/templates/types"
)
// Workflow is a workflow to execute with chained requests, etc.
@ -42,8 +43,9 @@ type WorkflowTemplate struct {
// ProtocolExecuterPair is a pair of protocol executer and its options
type ProtocolExecuterPair struct {
Executer protocols.Executer
Options *protocols.ExecuterOptions
Executer protocols.Executer
Options *protocols.ExecuterOptions
TemplateType templateTypes.ProtocolType
}
// Matcher performs conditional matching on the workflow template results.