2021-08-16 21:24:37 +05:30
package hosterrorscache
import (
2025-01-31 17:16:57 +07:00
"errors"
2021-08-16 21:24:37 +05:30
"net"
"net/url"
"regexp"
"strings"
2023-01-02 09:22:06 +01:00
"sync"
"sync/atomic"
2021-08-16 21:24:37 +05:30
2025-07-01 00:40:44 +07:00
"github.com/projectdiscovery/gcache"
2021-08-17 14:50:54 +05:30
"github.com/projectdiscovery/gologger"
2024-05-25 00:29:04 +05:30
"github.com/projectdiscovery/nuclei/v3/pkg/catalog/config"
"github.com/projectdiscovery/nuclei/v3/pkg/protocols/common/contextargs"
"github.com/projectdiscovery/nuclei/v3/pkg/types/nucleierr"
"github.com/projectdiscovery/utils/errkit"
2024-06-25 12:26:18 +03:00
stringsutil "github.com/projectdiscovery/utils/strings"
2021-08-16 21:24:37 +05:30
)
2022-07-18 15:35:53 -05:00
// CacheInterface defines the signature of the hosterrorscache so that
// users of Nuclei as embedded lib may implement their own cache
type CacheInterface interface {
2025-01-31 17:16:57 +07:00
SetVerbose ( verbose bool ) // log verbosely
Close ( ) // close the cache
Check ( protoType string , ctx * contextargs . Context ) bool // return true if the host should be skipped
Remove ( ctx * contextargs . Context ) // remove a host from the cache
MarkFailed ( protoType string , ctx * contextargs . Context , err error ) // record a failure (and cause) for the host
MarkFailedOrRemove ( protoType string , ctx * contextargs . Context , err error ) // record a failure (and cause) for the host or remove it
2022-07-18 15:35:53 -05:00
}
2024-05-25 00:29:04 +05:30
var (
_ CacheInterface = ( * Cache ) ( nil )
)
2021-08-16 21:24:37 +05:30
// Cache is a cache for host based errors. It allows skipping
// certain hosts based on an error threshold.
//
// It uses an LRU cache internally for skipping unresponsive hosts
// that remain so for a duration.
type Cache struct {
2021-11-25 17:03:56 +02:00
MaxHostError int
2021-08-17 14:50:54 +05:30
verbose bool
2024-06-11 12:21:43 +02:00
failedTargets gcache . Cache [ string , * cacheItem ]
2023-03-14 01:29:42 -07:00
TrackError [ ] string
2021-08-16 21:24:37 +05:30
}
2023-01-02 09:22:06 +01:00
type cacheItem struct {
sync . Once
2024-05-25 00:29:04 +05:30
errors atomic . Int32
isPermanentErr bool
cause error // optional cause
2025-01-31 17:16:57 +07:00
mu sync . Mutex
2023-01-02 09:22:06 +01:00
}
2021-08-16 21:24:37 +05:30
const DefaultMaxHostsCount = 10000
// New returns a new host max errors cache
2023-03-14 01:29:42 -07:00
func New ( maxHostError , maxHostsCount int , trackError [ ] string ) * Cache {
2025-01-31 17:16:57 +07:00
gc := gcache . New [ string , * cacheItem ] ( maxHostsCount ) . ARC ( ) . Build ( )
return & Cache {
failedTargets : gc ,
MaxHostError : maxHostError ,
TrackError : trackError ,
}
2021-08-16 21:24:37 +05:30
}
2021-08-17 14:50:54 +05:30
// SetVerbose sets the cache to log at verbose level
2022-07-18 15:35:53 -05:00
func ( c * Cache ) SetVerbose ( verbose bool ) {
2021-08-17 14:50:54 +05:30
c . verbose = verbose
}
2021-08-16 21:24:37 +05:30
// Close closes the host errors cache
func ( c * Cache ) Close ( ) {
2024-05-25 00:29:04 +05:30
if config . DefaultConfig . IsDebugArgEnabled ( config . DebugArgHostErrorStats ) {
items := c . failedTargets . GetALL ( false )
2024-06-11 12:21:43 +02:00
for k , val := range items {
2024-05-25 00:29:04 +05:30
gologger . Info ( ) . Label ( "MaxHostErrorStats" ) . Msgf ( "Host: %s, Errors: %d" , k , val . errors . Load ( ) )
}
}
2021-08-16 21:24:37 +05:30
c . failedTargets . Purge ( )
}
2024-09-23 17:27:30 +07:00
// NormalizeCacheValue processes the input value and returns a normalized cache
// value.
func ( c * Cache ) NormalizeCacheValue ( value string ) string {
2025-07-01 00:40:44 +07:00
var normalizedValue = value
2024-09-23 17:27:30 +07:00
u , err := url . ParseRequestURI ( value )
if err != nil || u . Host == "" {
2025-05-07 17:22:15 +05:30
if strings . Contains ( value , ":" ) {
return normalizedValue
}
2024-09-23 17:27:30 +07:00
u , err2 := url . ParseRequestURI ( "https://" + value )
if err2 != nil {
return normalizedValue
}
normalizedValue = u . Host
} else {
port := u . Port ( )
if port == "" {
switch u . Scheme {
case "https" :
normalizedValue = net . JoinHostPort ( u . Host , "443" )
case "http" :
normalizedValue = net . JoinHostPort ( u . Host , "80" )
2021-08-16 21:24:37 +05:30
}
2024-09-23 17:27:30 +07:00
} else {
normalizedValue = u . Host
2021-08-16 21:24:37 +05:30
}
}
2024-09-23 17:27:30 +07:00
return normalizedValue
2021-08-16 21:24:37 +05:30
}
// ErrUnresponsiveHost is returned when a host is unresponsive
2021-11-25 17:03:56 +02:00
// var ErrUnresponsiveHost = errors.New("skipping as host is unresponsive")
2021-08-16 21:24:37 +05:30
// Check returns true if a host should be skipped as it has been
// unresponsive for a certain number of times.
//
// The value can be many formats -
// - URL: https?:// type
// - Host:port type
// - host type
2024-09-28 17:20:35 +04:00
func ( c * Cache ) Check ( protoType string , ctx * contextargs . Context ) bool {
2024-05-25 00:29:04 +05:30
finalValue := c . GetKeyFromContext ( ctx , nil )
2021-08-16 21:24:37 +05:30
2025-01-31 17:16:57 +07:00
cache , err := c . failedTargets . GetIFPresent ( finalValue )
2021-08-16 21:24:37 +05:30
if err != nil {
return false
}
2025-01-31 17:16:57 +07:00
cache . mu . Lock ( )
defer cache . mu . Unlock ( )
if cache . isPermanentErr {
2024-05-25 00:29:04 +05:30
// skipping permanent errors is expected so verbose instead of info
2025-01-31 17:16:57 +07:00
gologger . Verbose ( ) . Msgf ( "Skipped %s from target list as found unresponsive permanently: %s" , finalValue , cache . cause )
2024-05-25 00:29:04 +05:30
return true
}
2021-08-17 14:50:54 +05:30
2025-01-31 17:16:57 +07:00
if cache . errors . Load ( ) >= int32 ( c . MaxHostError ) {
cache . Do ( func ( ) {
gologger . Info ( ) . Msgf ( "Skipped %s from target list as found unresponsive %d times" , finalValue , cache . errors . Load ( ) )
2023-01-02 19:00:10 +05:30
} )
2021-08-17 14:50:54 +05:30
return true
}
2025-01-31 17:16:57 +07:00
2021-08-17 14:50:54 +05:30
return false
2021-08-16 21:24:37 +05:30
}
2025-01-31 17:16:57 +07:00
// Remove removes a host from the cache
func ( c * Cache ) Remove ( ctx * contextargs . Context ) {
key := c . GetKeyFromContext ( ctx , nil )
_ = c . failedTargets . Remove ( key ) // remove even the cache is not present
}
2021-08-16 21:24:37 +05:30
// MarkFailed marks a host as failed previously
2025-01-31 17:16:57 +07:00
//
// Deprecated: Use MarkFailedOrRemove instead.
2024-09-28 17:20:35 +04:00
func ( c * Cache ) MarkFailed ( protoType string , ctx * contextargs . Context , err error ) {
2025-01-31 17:16:57 +07:00
if err == nil {
2022-07-18 15:35:53 -05:00
return
}
2025-01-31 17:16:57 +07:00
c . MarkFailedOrRemove ( protoType , ctx , err )
}
// MarkFailedOrRemove marks a host as failed previously or removes it
func ( c * Cache ) MarkFailedOrRemove ( protoType string , ctx * contextargs . Context , err error ) {
if err != nil && ! c . checkError ( protoType , err ) {
2021-08-16 21:24:37 +05:30
return
}
2025-01-31 17:16:57 +07:00
if err == nil {
// Remove the host from cache
//
// NOTE(dwisiswant0): The decision was made to completely remove the
// cached entry for the host instead of simply decrementing the error
// count (using `(atomic.Int32).Swap` to update the value to `N-1`).
// This approach was chosen because the error handling logic operates
// concurrently, and decrementing the count could lead to UB (unexpected
// behavior) even when the error is `nil`.
//
// To clarify, consider the following scenario where the error
// encountered does NOT belong to the permanent network error category
// (`errkit.ErrKindNetworkPermanent`):
//
// 1. Iteration 1: A timeout error occurs, and the error count for the
// host is incremented.
// 2. Iteration 2: Another timeout error is encountered, leading to
// another increment in the host's error count.
// 3. Iteration 3: A third timeout error happens, which increments the
// error count further. At this point, the host is flagged as
// unresponsive.
// 4. Iteration 4: The host becomes reachable (no error or a transient
// issue resolved). Instead of performing a no-op and leaving the
// host in the cache, the host entry is removed entirely to reset its
// state.
// 5. Iteration 5: A subsequent timeout error occurs after the host was
// removed and re-added to the cache. The error count is reset and
// starts from 1 again.
//
// This removal strategy ensures the cache is updated dynamically to
// reflect the current state of the host without persisting stale or
// irrelevant error counts that could interfere with future error
// handling and tracking logic.
c . Remove ( ctx )
return
}
cacheKey := c . GetKeyFromContext ( ctx , err )
cache , cacheErr := c . failedTargets . GetIFPresent ( cacheKey )
if errors . Is ( cacheErr , gcache . KeyNotFoundError ) {
cache = & cacheItem { errors : atomic . Int32 { } }
}
cache . mu . Lock ( )
defer cache . mu . Unlock ( )
if errkit . IsKind ( err , errkit . ErrKindNetworkPermanent ) {
cache . isPermanentErr = true
}
cache . cause = err
cache . errors . Add ( 1 )
_ = c . failedTargets . Set ( cacheKey , cache )
2021-08-16 21:24:37 +05:30
}
2024-05-25 00:29:04 +05:30
// GetKeyFromContext returns the key for the cache from the context
func ( c * Cache ) GetKeyFromContext ( ctx * contextargs . Context , err error ) string {
// Note:
// ideally any changes made to remote addr in template like {{Hostname}}:81 etc
// should be reflected in contextargs but it is not yet reflected in some cases
// and needs refactor of ScanContext + ContextArgs to achieve that
// i.e why we use real address from error if present
2025-05-07 17:22:15 +05:30
var address string
// 1. the address carried inside the error (if the transport sets it)
2024-05-25 00:29:04 +05:30
if err != nil {
2025-05-07 17:22:15 +05:30
if v := errkit . GetAttrValue ( err , "address" ) ; v . Any ( ) != nil {
address = v . String ( )
2024-05-25 00:29:04 +05:30
}
}
2025-05-07 17:22:15 +05:30
if address == "" {
address = ctx . MetaInput . Address ( )
}
2024-09-23 17:27:30 +07:00
finalValue := c . NormalizeCacheValue ( address )
2024-05-25 00:29:04 +05:30
return finalValue
}
var reCheckError = regexp . MustCompile ( ` (no address found for host|could not resolve host|connection refused|connection reset by peer|could not connect to any address found for host|timeout awaiting response headers) ` )
2021-08-16 21:24:37 +05:30
2022-07-18 15:35:53 -05:00
// checkError checks if an error represents a type that should be
2021-08-16 21:24:37 +05:30
// added to the host skipping table.
2024-05-25 00:29:04 +05:30
// it first parses error and extracts the cause and checks for blacklisted
// or common errors that should be skipped
2024-09-28 17:20:35 +04:00
func ( c * Cache ) checkError ( protoType string , err error ) bool {
2023-03-14 01:29:42 -07:00
if err == nil {
return false
}
2024-09-28 17:20:35 +04:00
if protoType != "http" {
return false
}
2024-05-25 00:29:04 +05:30
kind := errkit . GetErrorKind ( err , nucleierr . ErrTemplateLogic )
switch kind {
case nucleierr . ErrTemplateLogic :
// these are errors that are not related to the target
// and are due to template logic
return false
case errkit . ErrKindNetworkTemporary :
// these should not be counted as host errors
return false
case errkit . ErrKindNetworkPermanent :
// these should be counted as host errors
return true
case errkit . ErrKindDeadline :
// these should not be counted as host errors
return false
default :
2024-06-12 00:34:45 +02:00
// parse error for further processing
2024-05-25 00:29:04 +05:30
errX := errkit . FromError ( err )
tmp := errX . Cause ( )
cause := tmp . Error ( )
2024-06-25 12:26:18 +03:00
if stringsutil . ContainsAll ( cause , "ReadStatusLine:" , "read: connection reset by peer" ) {
2024-05-25 00:29:04 +05:30
// this is a FP and should not be counted as a host error
// because server closes connection when it reads corrupted bytes which we send via rawhttp
return false
}
if strings . HasPrefix ( cause , "ReadStatusLine:" ) {
// error is present in last part when using rawhttp
// this will be fixed once errkit is used everywhere
lastIndex := strings . LastIndex ( cause , ":" )
if lastIndex == - 1 {
lastIndex = 0
}
if lastIndex >= len ( cause ) - 1 {
lastIndex = 0
}
cause = cause [ lastIndex + 1 : ]
}
for _ , msg := range c . TrackError {
if strings . Contains ( cause , msg ) {
return true
}
2023-03-14 01:29:42 -07:00
}
2024-05-25 00:29:04 +05:30
return reCheckError . MatchString ( cause )
2023-03-14 01:29:42 -07:00
}
2021-08-16 21:24:37 +05:30
}