nuclei/v2/pkg/operators/extractors/extractors.go

134 lines
5.1 KiB
Go
Raw Normal View History

2020-04-06 00:05:01 +05:30
package extractors
2021-08-01 12:38:13 +02:00
import (
"regexp"
2021-08-01 14:42:04 +02:00
"github.com/itchyny/gojq"
2021-08-01 12:38:13 +02:00
)
2020-04-06 00:05:01 +05:30
// Extractor is used to extract part of response using a regex.
type Extractor struct {
2021-07-27 16:03:56 +05:30
// description: |
// Name of the extractor. Name should be lowercase and must not contain
// spaces or dashes (-).
// examples:
// - value: "\"cookie-extractor\""
Name string `yaml:"name,omitempty" jsonschema:"title=name of the extractor,description=Name of the extractor"`
2021-07-27 16:03:56 +05:30
// description: |
// Type is the type of the extractor.
// values:
// - "regex"
// - "kval"
// - "json"
// - "xpath"
Type string `yaml:"type" jsonschema:"title=type of the extractor,description=Type of the extractor,enum=regex,enum=kval,enum=json,enum=xpath"`
2020-07-15 00:47:01 +02:00
// extractorType is the internal type of the extractor
extractorType ExtractorType
2021-07-27 16:03:56 +05:30
// description: |
// Regex contains the regular expression patterns to exract from a part.
//
// Go regex engine does not supports lookaheads or lookbehinds, so as a result
// they are also not supported in nuclei.
// examples:
// - name: Braintree Access Token Regex
// value: >
// []string{"access_token\\$production\\$[0-9a-z]{16}\\$[0-9a-f]{32}"}
// - name: Wordpress Author Extraction regex
// value: >
// []string{"Author:(?:[A-Za-z0-9 -\\_=\"]+)?<span(?:[A-Za-z0-9 -\\_=\"]+)?>([A-Za-z0-9]+)<\\/span>"}
Regex []string `yaml:"regex,omitempty" jsonschema:"title=regex to extract from part,description=Regex to extract from part"`
2021-07-27 16:03:56 +05:30
// description: |
// Group specifies a numbered group to extract from the regex.
// examples:
// - name: Example Regex Group
2021-08-05 00:54:34 +05:30
// value: "1"
RegexGroup int `yaml:"group,omitempty" jsonschema:"title=group to extract from regex,description=Group to extract from regex"`
2020-04-06 00:05:01 +05:30
// regexCompiled is the compiled variant
regexCompiled []*regexp.Regexp
2020-04-06 00:05:01 +05:30
2021-07-27 16:03:56 +05:30
// description: |
// kval contains the key-value pairs required in the response.
//
// Each protocol exposes a lot of different data in response. The kval
// extractor can be used to extract those key-value pairs. A list of
// supported parts is available in docs for request types.
// examples:
// - name: Extract Server Header From HTTP Response
// value: >
// []string{"Server"}
// - name: Extracting value of PHPSESSID Cookie
// value: >
// []string{"PHPSESSID"}
KVal []string `yaml:"kval,omitempty" jsonschema:"title=kval pairs to extract from response,description=Kval pairs to extract from response"`
2020-07-16 10:32:00 +02:00
2021-08-03 20:22:16 +05:30
// description: |
// JSON allows using jq-style syntax to extract items from json response
//
// examples:
2021-08-05 00:54:34 +05:30
// - value: >
// []string{".[] | .id"}
// - value: >
// []string{".batters | .batter | .[] | .id"}
JSON []string `yaml:"json,omitempty" jsonschema:"title=json jq expressions to extract data,description=JSON JQ expressions to evaluate from response part"`
2021-08-19 16:51:02 +05:30
// description: |
// XPath allows using xpath expressions to extract items from html response
//
// examples:
// - value: >
// []string{"/html/body/div/p[2]/a"}
// - value: >
// []string{".batters | .batter | .[] | .id"}
XPath []string `yaml:"xpath,omitempty" jsonschema:"title=html xpath expressions to extract data,description=XPath allows using xpath expressions to extract items from html response"`
2021-08-19 16:51:02 +05:30
// description: |
// Attribute is an optional attribute to extract from response XPath.
//
// examples:
// - value: "\"href\""
Attribute string `yaml:"attribute,omitempty" jsonschema:"title=optional attribute to extract from xpath,description=Optional attribute to extract from response XPath"`
2021-08-19 16:51:02 +05:30
2021-07-31 22:49:23 +02:00
// jsonCompiled is the compiled variant
jsonCompiled []*gojq.Code
2021-08-10 08:04:50 +02:00
// description: |
// Part is the part of the request response to extract data from.
2020-04-06 00:05:01 +05:30
//
2021-08-10 08:04:50 +02:00
// Each protocol exposes a lot of different parts which are well
// documented in docs for each request type.
// examples:
// - value: "\"body\""
// - value: "\"raw\""
Part string `yaml:"part,omitempty" jsonschema:"title=part of response to extract data from,description=Part of the request response to extract data from"`
2021-07-27 16:03:56 +05:30
// description: |
// Internal, when set to true will allow using the value extracted
// in the next request for some protocols (like HTTP).
Internal bool `yaml:"internal,omitempty" jsonschema:"title=mark extracted value for internal variable use,description=Internal when set to true will allow using the value extracted in the next request for some protocols"`
2020-04-06 00:05:01 +05:30
}
// ExtractorType is the type of the extractor specified
type ExtractorType = int
const (
// RegexExtractor extracts responses with regexes
RegexExtractor ExtractorType = iota + 1
2020-07-15 00:47:01 +02:00
// KValExtractor extracts responses with key:value
KValExtractor
2021-08-02 21:46:29 +05:30
// XPathExtractor extracts responses with Xpath selectors
2021-08-02 21:43:50 +05:30
XPathExtractor
2021-07-31 22:49:23 +02:00
// JSONExtractor extracts responses with json
JSONExtractor
)
// ExtractorTypes is an table for conversion of extractor type from string.
var ExtractorTypes = map[string]ExtractorType{
"regex": RegexExtractor,
2020-07-15 00:47:01 +02:00
"kval": KValExtractor,
2021-08-02 21:43:50 +05:30
"xpath": XPathExtractor,
2021-08-01 12:38:35 +02:00
"json": JSONExtractor,
}
2020-12-24 20:47:41 +05:30
// GetType returns the type of the matcher
func (e *Extractor) GetType() ExtractorType {
return e.extractorType
}