2021-02-02 12:10:47 +05:30
|
|
|
// Package dedupe implements deduplication layer for nuclei-generated
|
|
|
|
|
// issues.
|
|
|
|
|
//
|
|
|
|
|
// The layer can be persisted to leveldb based storage for further use.
|
|
|
|
|
package dedupe
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"crypto/sha1"
|
2021-02-07 23:41:33 +05:30
|
|
|
"os"
|
2021-08-16 17:10:42 +02:00
|
|
|
"reflect"
|
2021-02-02 12:10:47 +05:30
|
|
|
"unsafe"
|
|
|
|
|
|
|
|
|
|
"github.com/syndtr/goleveldb/leveldb"
|
|
|
|
|
"github.com/syndtr/goleveldb/leveldb/errors"
|
2021-09-07 17:31:46 +03:00
|
|
|
|
|
|
|
|
"github.com/projectdiscovery/nuclei/v2/pkg/output"
|
|
|
|
|
"github.com/projectdiscovery/nuclei/v2/pkg/types"
|
2021-02-02 12:10:47 +05:30
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// Storage is a duplicate detecting storage for nuclei scan events.
|
|
|
|
|
type Storage struct {
|
2021-02-07 23:41:33 +05:30
|
|
|
temporary string
|
|
|
|
|
storage *leveldb.DB
|
2021-02-02 12:10:47 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// New creates a new duplicate detecting storage for nuclei scan events.
|
2021-02-07 23:41:33 +05:30
|
|
|
func New(dbPath string) (*Storage, error) {
|
|
|
|
|
storage := &Storage{}
|
2021-02-02 12:10:47 +05:30
|
|
|
|
2021-02-07 23:41:33 +05:30
|
|
|
var err error
|
|
|
|
|
if dbPath == "" {
|
2022-08-25 13:22:08 +02:00
|
|
|
dbPath, err = os.MkdirTemp("", "nuclei-report-*")
|
2021-02-07 23:41:33 +05:30
|
|
|
storage.temporary = dbPath
|
|
|
|
|
}
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
storage.storage, err = leveldb.OpenFile(dbPath, nil)
|
2021-02-02 12:10:47 +05:30
|
|
|
if err != nil {
|
|
|
|
|
if !errors.IsCorrupted(err) {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If the metadata is corrupted, try to recover
|
2021-02-07 23:41:33 +05:30
|
|
|
storage.storage, err = leveldb.RecoverFile(dbPath, nil)
|
2021-02-02 12:10:47 +05:30
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-02-07 23:41:33 +05:30
|
|
|
return storage, nil
|
2021-02-02 12:10:47 +05:30
|
|
|
}
|
|
|
|
|
|
2023-02-07 09:45:49 +01:00
|
|
|
func (s *Storage) Clear() {
|
|
|
|
|
var keys [][]byte
|
|
|
|
|
iter := s.storage.NewIterator(nil, nil)
|
|
|
|
|
for iter.Next() {
|
|
|
|
|
keys = append(keys, iter.Key())
|
|
|
|
|
}
|
|
|
|
|
iter.Release()
|
|
|
|
|
for _, key := range keys {
|
|
|
|
|
s.storage.Delete(key, nil)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-02 12:10:47 +05:30
|
|
|
// Close closes the storage for further operations
|
|
|
|
|
func (s *Storage) Close() {
|
|
|
|
|
s.storage.Close()
|
2021-02-07 23:41:33 +05:30
|
|
|
if s.temporary != "" {
|
|
|
|
|
os.RemoveAll(s.temporary)
|
|
|
|
|
}
|
2021-02-02 12:10:47 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Index indexes an item in storage and returns true if the item
|
|
|
|
|
// was unique.
|
|
|
|
|
func (s *Storage) Index(result *output.ResultEvent) (bool, error) {
|
|
|
|
|
hasher := sha1.New()
|
|
|
|
|
if result.TemplateID != "" {
|
2021-02-26 13:13:11 +05:30
|
|
|
_, _ = hasher.Write(unsafeToBytes(result.TemplateID))
|
2021-02-02 12:10:47 +05:30
|
|
|
}
|
|
|
|
|
if result.MatcherName != "" {
|
2021-02-26 13:13:11 +05:30
|
|
|
_, _ = hasher.Write(unsafeToBytes(result.MatcherName))
|
2021-02-02 12:10:47 +05:30
|
|
|
}
|
|
|
|
|
if result.ExtractorName != "" {
|
2021-02-26 13:13:11 +05:30
|
|
|
_, _ = hasher.Write(unsafeToBytes(result.ExtractorName))
|
2021-02-02 12:10:47 +05:30
|
|
|
}
|
|
|
|
|
if result.Type != "" {
|
2021-02-26 13:13:11 +05:30
|
|
|
_, _ = hasher.Write(unsafeToBytes(result.Type))
|
2021-02-02 12:10:47 +05:30
|
|
|
}
|
|
|
|
|
if result.Host != "" {
|
2021-02-26 13:13:11 +05:30
|
|
|
_, _ = hasher.Write(unsafeToBytes(result.Host))
|
2021-02-02 12:10:47 +05:30
|
|
|
}
|
|
|
|
|
if result.Matched != "" {
|
2021-02-26 13:13:11 +05:30
|
|
|
_, _ = hasher.Write(unsafeToBytes(result.Matched))
|
2021-02-02 12:10:47 +05:30
|
|
|
}
|
|
|
|
|
for _, v := range result.ExtractedResults {
|
2021-02-26 13:13:11 +05:30
|
|
|
_, _ = hasher.Write(unsafeToBytes(v))
|
2021-02-02 12:10:47 +05:30
|
|
|
}
|
|
|
|
|
for k, v := range result.Metadata {
|
2021-02-26 13:13:11 +05:30
|
|
|
_, _ = hasher.Write(unsafeToBytes(k))
|
|
|
|
|
_, _ = hasher.Write(unsafeToBytes(types.ToString(v)))
|
2021-02-02 12:10:47 +05:30
|
|
|
}
|
|
|
|
|
hash := hasher.Sum(nil)
|
|
|
|
|
|
|
|
|
|
exists, err := s.storage.Has(hash, nil)
|
|
|
|
|
if err != nil {
|
|
|
|
|
// if we have an error, return with it but mark it as true
|
2021-09-07 17:31:46 +03:00
|
|
|
// since we don't want to lose an issue considering it a dupe.
|
2021-02-02 12:10:47 +05:30
|
|
|
return true, err
|
|
|
|
|
}
|
|
|
|
|
if !exists {
|
|
|
|
|
return true, s.storage.Put(hash, nil, nil)
|
|
|
|
|
}
|
|
|
|
|
return false, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// unsafeToBytes converts a string to byte slice and does it with
|
|
|
|
|
// zero allocations.
|
|
|
|
|
//
|
|
|
|
|
// Reference - https://stackoverflow.com/questions/59209493/how-to-use-unsafe-get-a-byte-slice-from-a-string-without-memory-copy
|
|
|
|
|
func unsafeToBytes(data string) []byte {
|
2021-08-16 17:10:42 +02:00
|
|
|
var buf = *(*[]byte)(unsafe.Pointer(&data))
|
|
|
|
|
(*reflect.SliceHeader)(unsafe.Pointer(&buf)).Cap = len(data)
|
|
|
|
|
return buf
|
2021-02-02 12:10:47 +05:30
|
|
|
}
|