2022-03-14 12:32:05 +05:30
package automaticscan
2022-01-18 20:59:37 +05:30
import (
2024-04-25 15:37:56 +05:30
"context"
2022-01-18 20:59:37 +05:30
"io"
"net/http"
2022-04-07 17:55:08 +05:30
"os"
"path/filepath"
2022-01-18 20:59:37 +05:30
"strings"
2024-02-01 15:19:20 +08:00
"sync"
"sync/atomic"
2022-01-18 20:59:37 +05:30
2024-02-01 15:19:20 +08:00
"github.com/logrusorgru/aurora"
2022-01-18 20:59:37 +05:30
"github.com/pkg/errors"
"github.com/projectdiscovery/gologger"
2023-10-17 17:44:13 +05:30
"github.com/projectdiscovery/nuclei/v3/pkg/catalog/config"
"github.com/projectdiscovery/nuclei/v3/pkg/catalog/loader"
"github.com/projectdiscovery/nuclei/v3/pkg/core"
2024-03-14 03:08:53 +05:30
"github.com/projectdiscovery/nuclei/v3/pkg/input/provider"
2024-02-01 15:19:20 +08:00
"github.com/projectdiscovery/nuclei/v3/pkg/output"
2023-10-17 17:44:13 +05:30
"github.com/projectdiscovery/nuclei/v3/pkg/protocols"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs"
2024-02-01 15:19:20 +08:00
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/helpers/writer"
2023-10-17 17:44:13 +05:30
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/http/httpclientpool"
httputil "github.com/projectdiscovery/nuclei/v3/pkg/protocols/utils/http"
2024-02-01 15:19:20 +08:00
"github.com/projectdiscovery/nuclei/v3/pkg/scan"
2023-10-17 17:44:13 +05:30
"github.com/projectdiscovery/nuclei/v3/pkg/templates"
2024-02-01 15:19:20 +08:00
"github.com/projectdiscovery/nuclei/v3/pkg/testutils"
2022-01-18 20:59:37 +05:30
"github.com/projectdiscovery/retryablehttp-go"
2024-02-01 00:42:38 +03:00
"github.com/projectdiscovery/useragent"
2024-02-01 15:19:20 +08:00
mapsutil "github.com/projectdiscovery/utils/maps"
2022-11-06 21:24:23 +01:00
sliceutil "github.com/projectdiscovery/utils/slice"
2024-02-01 15:19:20 +08:00
stringsutil "github.com/projectdiscovery/utils/strings"
2024-04-03 17:50:57 +02:00
syncutil "github.com/projectdiscovery/utils/sync"
2024-05-15 15:34:59 +02:00
unitutils "github.com/projectdiscovery/utils/unit"
2022-01-18 20:59:37 +05:30
wappalyzer "github.com/projectdiscovery/wappalyzergo"
2022-04-07 17:55:08 +05:30
"gopkg.in/yaml.v2"
2022-01-18 20:59:37 +05:30
)
2024-02-01 15:19:20 +08:00
const (
mappingFilename = "wappalyzer-mapping.yml"
2024-05-15 15:34:59 +02:00
maxDefaultBody = 4 * unitutils . Mega
2024-02-01 15:19:20 +08:00
)
2022-01-18 20:59:37 +05:30
2022-03-14 12:32:05 +05:30
// Options contains configuration options for automatic scan service
2022-01-18 20:59:37 +05:30
type Options struct {
2023-05-31 16:58:10 -04:00
ExecuterOpts protocols . ExecutorOptions
2022-01-18 20:59:37 +05:30
Store * loader . Store
Engine * core . Engine
2024-03-14 03:08:53 +05:30
Target provider . InputProvider
2022-01-18 20:59:37 +05:30
}
2024-02-01 15:19:20 +08:00
// Service is a service for automatic scan execution
type Service struct {
opts protocols . ExecutorOptions
store * loader . Store
engine * core . Engine
2024-03-14 03:08:53 +05:30
target provider . InputProvider
2024-02-01 15:19:20 +08:00
wappalyzer * wappalyzer . Wappalyze
httpclient * retryablehttp . Client
templateDirs [ ] string // root Template Directories
technologyMappings map [ string ] string
techTemplates [ ] * templates . Template
ServiceOpts Options
hasResults * atomic . Bool
}
2022-04-07 17:55:08 +05:30
2022-04-19 16:14:49 +05:30
// New takes options and returns a new automatic scan service
2022-01-18 20:59:37 +05:30
func New ( opts Options ) ( * Service , error ) {
wappalyzer , err := wappalyzer . New ( )
if err != nil {
return nil , err
}
2022-03-08 12:43:24 +05:30
2024-02-01 15:19:20 +08:00
// load extra mapping from nuclei-templates for normalization
2022-04-07 17:55:08 +05:30
var mappingData map [ string ] string
2024-02-01 15:19:20 +08:00
mappingFile := filepath . Join ( config . DefaultConfig . GetTemplateDir ( ) , mappingFilename )
2023-09-04 10:24:34 +02:00
if file , err := os . Open ( mappingFile ) ; err == nil {
_ = yaml . NewDecoder ( file ) . Decode ( & mappingData )
file . Close ( )
2022-04-07 17:55:08 +05:30
}
if opts . ExecuterOpts . Options . Verbose {
gologger . Verbose ( ) . Msgf ( "Normalized mapping (%d): %v\n" , len ( mappingData ) , mappingData )
}
2022-05-10 17:26:46 +05:30
2024-02-01 15:19:20 +08:00
// get template directories
templateDirs , err := getTemplateDirs ( opts )
if err != nil {
return nil , err
2022-04-19 17:45:35 -05:00
}
2024-02-01 15:19:20 +08:00
// load tech detect templates
techDetectTemplates , err := LoadTemplatesWithTags ( opts , templateDirs , [ ] string { "tech" , "detect" , "favicon" } , true )
if err != nil {
return nil , err
2022-03-08 12:43:24 +05:30
}
httpclient , err := httpclientpool . Get ( opts . ExecuterOpts . Options , & httpclientpool . Configuration {
2023-09-04 10:24:34 +02:00
Connection : & httpclientpool . ConnectionConfiguration {
DisableKeepAlive : httputil . ShouldDisableKeepAlive ( opts . ExecuterOpts . Options ) ,
} ,
2022-03-08 12:43:24 +05:30
} )
if err != nil {
return nil , errors . Wrap ( err , "could not get http client" )
}
2022-01-18 20:59:37 +05:30
return & Service {
2022-04-07 17:55:08 +05:30
opts : opts . ExecuterOpts ,
store : opts . Store ,
engine : opts . Engine ,
target : opts . Target ,
wappalyzer : wappalyzer ,
2024-02-01 15:19:20 +08:00
templateDirs : templateDirs , // fix this
2022-04-07 17:55:08 +05:30
httpclient : httpclient ,
technologyMappings : mappingData ,
2024-02-01 15:19:20 +08:00
techTemplates : techDetectTemplates ,
ServiceOpts : opts ,
hasResults : & atomic . Bool { } ,
2022-01-18 20:59:37 +05:30
} , nil
}
// Close closes the service
func ( s * Service ) Close ( ) bool {
2024-02-01 15:19:20 +08:00
return s . hasResults . Load ( )
2022-01-18 20:59:37 +05:30
}
2024-02-01 15:19:20 +08:00
// Execute automatic scan on each target with -bs host concurrency
func ( s * Service ) Execute ( ) error {
gologger . Info ( ) . Msgf ( "Executing Automatic scan on %d target[s]" , s . target . Count ( ) )
// setup host concurrency
2024-04-03 17:50:57 +02:00
sg , err := syncutil . New ( syncutil . WithSize ( s . opts . Options . BulkSize ) )
if err != nil {
return err
}
2024-03-14 03:08:53 +05:30
s . target . Iterate ( func ( value * contextargs . MetaInput ) bool {
2024-02-01 15:19:20 +08:00
sg . Add ( )
go func ( input * contextargs . MetaInput ) {
defer sg . Done ( )
s . executeAutomaticScanOnTarget ( input )
} ( value )
return true
} )
sg . Wait ( )
return nil
2022-01-18 20:59:37 +05:30
}
2024-02-01 15:19:20 +08:00
// executeAutomaticScanOnTarget executes automatic scan on given target
func ( s * Service ) executeAutomaticScanOnTarget ( input * contextargs . MetaInput ) {
// get tags using wappalyzer
tagsFromWappalyzer := s . getTagsUsingWappalyzer ( input )
// get tags using detection templates
tagsFromDetectTemplates , matched := s . getTagsUsingDetectionTemplates ( input )
if matched > 0 {
s . hasResults . Store ( true )
}
2022-01-18 20:59:37 +05:30
2024-02-01 15:19:20 +08:00
// create combined final tags
finalTags := [ ] string { }
for _ , tags := range append ( tagsFromWappalyzer , tagsFromDetectTemplates ... ) {
if stringsutil . EqualFoldAny ( tags , "tech" , "waf" , "favicon" ) {
continue
}
finalTags = append ( finalTags , tags )
}
finalTags = sliceutil . Dedupe ( finalTags )
2022-01-18 20:59:37 +05:30
2024-02-01 15:19:20 +08:00
gologger . Info ( ) . Msgf ( "Found %d tags and %d matches on detection templates on %v [wappalyzer: %d, detection: %d]\n" , len ( finalTags ) , matched , input . Input , len ( tagsFromWappalyzer ) , len ( tagsFromDetectTemplates ) )
2022-01-18 20:59:37 +05:30
2024-02-01 15:19:20 +08:00
// also include any extra tags passed by user
finalTags = append ( finalTags , s . opts . Options . Tags ... )
finalTags = sliceutil . Dedupe ( finalTags )
2022-01-18 20:59:37 +05:30
2024-02-01 15:19:20 +08:00
if len ( finalTags ) == 0 {
gologger . Warning ( ) . Msgf ( "Skipping automatic scan since no tags were found on %v\n" , input . Input )
return
}
2024-02-02 01:48:22 +05:30
if s . opts . Options . VerboseVerbose {
gologger . Print ( ) . Msgf ( "Final tags identified for %v: %+v\n" , input . Input , finalTags )
}
2022-11-09 14:18:56 +01:00
2024-02-01 15:19:20 +08:00
finalTemplates , err := LoadTemplatesWithTags ( s . ServiceOpts , s . templateDirs , finalTags , false )
if err != nil {
gologger . Error ( ) . Msgf ( "%v Error loading templates: %s\n" , input . Input , err )
return
}
gologger . Info ( ) . Msgf ( "Executing %d templates on %v" , len ( finalTemplates ) , input . Input )
eng := core . New ( s . opts . Options )
execOptions := s . opts . Copy ( )
execOptions . Progress = & testutils . MockProgressClient { } // stats are not supported yet due to centralized logic and cannot be reinitialized
eng . SetExecuterOptions ( execOptions )
2024-03-14 03:08:53 +05:30
2024-04-25 15:37:56 +05:30
tmp := eng . ExecuteScanWithOpts ( context . Background ( ) , finalTemplates , provider . NewSimpleInputProviderWithUrls ( input . Input ) , true )
2024-02-01 15:19:20 +08:00
s . hasResults . Store ( tmp . Load ( ) )
2022-01-18 20:59:37 +05:30
}
2024-02-01 15:19:20 +08:00
// getTagsUsingWappalyzer returns tags using wappalyzer by fingerprinting target
// and utilizing the mapping data
func ( s * Service ) getTagsUsingWappalyzer ( input * contextargs . MetaInput ) [ ] string {
2022-11-09 14:18:56 +01:00
req , err := retryablehttp . NewRequest ( http . MethodGet , input . Input , nil )
2022-03-08 12:43:24 +05:30
if err != nil {
2024-02-01 15:19:20 +08:00
return nil
2022-03-08 12:43:24 +05:30
}
2024-02-01 00:42:38 +03:00
userAgent := useragent . PickRandom ( )
req . Header . Set ( "User-Agent" , userAgent . Raw )
2022-01-18 20:59:37 +05:30
2022-03-08 12:43:24 +05:30
resp , err := s . httpclient . Do ( req )
if err != nil {
2024-02-01 15:19:20 +08:00
return nil
2022-01-18 20:59:37 +05:30
}
2024-02-01 15:19:20 +08:00
defer resp . Body . Close ( )
data , err := io . ReadAll ( io . LimitReader ( resp . Body , maxDefaultBody ) )
2022-03-08 12:43:24 +05:30
if err != nil {
2024-02-01 15:19:20 +08:00
return nil
2022-03-08 12:43:24 +05:30
}
2022-01-18 20:59:37 +05:30
2024-02-01 15:19:20 +08:00
// fingerprint headers and body
2022-03-08 12:43:24 +05:30
fingerprints := s . wappalyzer . Fingerprint ( resp . Header , data )
2022-04-07 17:55:08 +05:30
normalized := make ( map [ string ] struct { } )
2022-03-08 12:43:24 +05:30
for k := range fingerprints {
2022-05-09 11:02:21 +05:30
normalized [ normalizeAppName ( k ) ] = struct { } { }
2022-04-07 17:55:08 +05:30
}
2024-02-01 15:19:20 +08:00
gologger . Verbose ( ) . Msgf ( "Found %d fingerprints for %s\n" , len ( normalized ) , input . Input )
2022-04-07 17:55:08 +05:30
2024-02-01 15:19:20 +08:00
// normalize fingerprints using mapping data
2022-04-07 17:55:08 +05:30
for k := range normalized {
// Replace values with mapping data
if value , ok := s . technologyMappings [ k ] ; ok {
delete ( normalized , k )
normalized [ value ] = struct { } { }
}
}
2024-02-01 15:19:20 +08:00
// more post processing
2022-04-07 17:55:08 +05:30
items := make ( [ ] string , 0 , len ( normalized ) )
for k := range normalized {
2022-03-14 13:01:28 +05:30
if strings . Contains ( k , " " ) {
parts := strings . Split ( strings . ToLower ( k ) , " " )
items = append ( items , parts ... )
} else {
items = append ( items , strings . ToLower ( k ) )
}
2022-01-18 20:59:37 +05:30
}
2024-02-01 15:19:20 +08:00
return sliceutil . Dedupe ( items )
}
2022-03-14 16:25:27 +05:30
2024-02-01 15:19:20 +08:00
// getTagsUsingDetectionTemplates returns tags using detection templates
func ( s * Service ) getTagsUsingDetectionTemplates ( input * contextargs . MetaInput ) ( [ ] string , int ) {
2024-04-25 15:37:56 +05:30
ctx := context . Background ( )
ctxArgs := contextargs . NewWithInput ( ctx , input . Input )
2022-03-14 16:25:27 +05:30
2024-02-01 15:19:20 +08:00
// execute tech detection templates on target
tags := map [ string ] struct { } { }
m := & sync . Mutex { }
2024-04-03 17:50:57 +02:00
sg , _ := syncutil . New ( syncutil . WithSize ( s . opts . Options . TemplateThreads ) )
2024-02-01 15:19:20 +08:00
counter := atomic . Uint32 { }
for _ , t := range s . techTemplates {
sg . Add ( )
go func ( template * templates . Template ) {
defer sg . Done ( )
2024-04-25 15:37:56 +05:30
ctx := scan . NewScanContext ( ctx , ctxArgs )
2024-02-01 15:19:20 +08:00
ctx . OnResult = func ( event * output . InternalWrappedEvent ) {
if event == nil {
return
}
2024-02-02 01:48:22 +05:30
if event . HasOperatorResult ( ) {
2024-02-01 15:19:20 +08:00
// match found
// find unique tags
m . Lock ( )
2024-02-02 01:48:22 +05:30
for _ , v := range event . Results {
if v . MatcherName != "" {
tags [ v . MatcherName ] = struct { } { }
2024-02-01 15:19:20 +08:00
}
2024-02-02 01:48:22 +05:30
for _ , tag := range v . Info . Tags . ToSlice ( ) {
// we shouldn't add all tags since tags also contain protocol type tags
// and are not just limited to products or technologies
// ex: tags: js,mssql,detect,network
// A good trick for this is check if tag is present in template-id
if ! strings . Contains ( template . ID , tag ) && ! strings . Contains ( strings . ToLower ( template . Info . Name ) , tag ) {
// unlikely this is relevant
continue
}
if _ , ok := tags [ tag ] ; ! ok {
tags [ tag ] = struct { } { }
}
// matcher names are also relevant in tech detection templates (ex: tech-detect)
for k := range event . OperatorsResult . Matches {
if _ , ok := tags [ k ] ; ! ok {
tags [ k ] = struct { } { }
}
2024-02-01 15:19:20 +08:00
}
}
}
m . Unlock ( )
_ = counter . Add ( 1 )
// TBD: should we show or hide tech detection results? what about matcher-status flag?
_ = writer . WriteResult ( event , s . opts . Output , s . opts . Progress , s . opts . IssuesClient )
}
}
_ , err := template . Executer . ExecuteWithResults ( ctx )
if err != nil {
gologger . Verbose ( ) . Msgf ( "[%s] error executing template: %s\n" , aurora . BrightYellow ( template . ID ) , err )
return
}
} ( t )
2022-01-18 20:59:37 +05:30
}
2024-02-01 15:19:20 +08:00
sg . Wait ( )
return mapsutil . GetKeys ( tags ) , int ( counter . Load ( ) )
2022-01-18 20:59:37 +05:30
}
2022-03-14 16:25:27 +05:30
2024-02-01 15:19:20 +08:00
// normalizeAppName normalizes app name
2022-05-09 11:02:21 +05:30
func normalizeAppName ( appName string ) string {
if strings . Contains ( appName , ":" ) {
if parts := strings . Split ( appName , ":" ) ; len ( parts ) == 2 {
appName = parts [ 0 ]
}
}
return strings . ToLower ( appName )
}