feat: generate CPU & PGO profiles (#6058)

* feat: generate CPU profiles

also adjust memory (heap) profiles ext to `.mem`

Signed-off-by: Dwi Siswanto <git@dw1.io>

* docs(DESIGN): add total samples for CPU profiles

Signed-off-by: Dwi Siswanto <git@dw1.io>

* chore(make): trimpath in go-build

and append LDFLAGS ifneq "darwin"

Signed-off-by: Dwi Siswanto <git@dw1.io>

* chore: update goreleaser build

* replace `go mod tidy` with `go mod download` and
  `go mod verify`
* adjust indentations
* add `-trimpath` flag
* set `-pgo` flag to "`auto`"
* add `ldflags`
* quoting 386 GOARCH value

Signed-off-by: Dwi Siswanto <git@dw1.io>

* ci: add generate PGO workflow

Signed-off-by: Dwi Siswanto <git@dw1.io>

* chore(make): set CGO_ENABLED inline in go-build

Signed-off-by: Dwi Siswanto <git@dw1.io>

* refactor(main): streamline profile file creation

Signed-off-by: Dwi Siswanto <git@dw1.io>

* dummy: add PGO file (DO NOT MERGE)

Signed-off-by: Dwi Siswanto <git@dw1.io>

* feat: add main test (benchmark)

Signed-off-by: Dwi Siswanto <git@dw1.io>

* chore(make): add build-test

Signed-off-by: Dwi Siswanto <git@dw1.io>

* Revert "dummy: add PGO file (DO NOT MERGE)"

This reverts commit ee877205f729be2f054c7d7d484a9244121acce6.

* test(main): set Output to /dev/null

Signed-off-by: Dwi Siswanto <git@dw1.io>

* feat(output): add option to disable stdout via env var

Signed-off-by: Dwi Siswanto <git@dw1.io>

* test(main): set `types.Options.Output` to empty

Signed-off-by: Dwi Siswanto <git@dw1.io>

* chore(generate-pgo): add TODO note

Signed-off-by: Dwi Siswanto <git@dw1.io>

* ci: add reusable perf regression workflow

Signed-off-by: Dwi Siswanto <git@dw1.io>

* ci(perf-regression): enabe `DISABLE_STDOUT`

Signed-off-by: Dwi Siswanto <git@dw1.io>

---------

Signed-off-by: Dwi Siswanto <git@dw1.io>
This commit is contained in:
Dwi Siswanto 2025-02-24 18:22:57 +07:00 committed by GitHub
parent 047d49f6f3
commit 940885a3cc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 302 additions and 68 deletions

55
.github/workflows/generate-pgo.yaml vendored Normal file
View File

@ -0,0 +1,55 @@
name: 👤 Generate PGO
on:
push:
branches: ["dev"]
paths:
- '**.go'
- '**.mod'
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
# TODO(dwisiswant0): https://go.dev/doc/pgo#merging-profiles
jobs:
pgo:
strategy:
matrix:
targets: [150]
runs-on: ubuntu-latest-16-cores
if: github.repository == 'projectdiscovery/nuclei'
permissions:
contents: write
env:
PGO_FILE: "cmd/nuclei/default.pgo"
LIST_FILE: "/tmp/targets-${{ matrix.targets }}.txt"
PROFILE_MEM: "/tmp/nuclei-profile-${{ matrix.targets }}-targets"
steps:
- uses: actions/checkout@v4
- uses: projectdiscovery/actions/setup/git@v1
- uses: projectdiscovery/actions/setup/go@v1
- name: Generate list
run: for i in {1..${{ matrix.targets }}}; do echo "https://honey.scanme.sh/?_=${i}" >> "${LIST_FILE}"; done
# NOTE(dwisiswant0): use `-no-mhe` flag to get better samples.
- run: go run . -l "${LIST_FILE}" -profile-mem="${PROFILE_MEM}" -no-mhe
working-directory: cmd/nuclei/
- run: mv "${PROFILE_MEM}.cpu" ${PGO_FILE}
# NOTE(dwisiswant0): shall we prune $PGO_FILE git history?
# if we prune it, this won't be linear since it requires a force-push.
# if we don't, the git objects will just keep growing bigger.
#
# Ref:
# - https://go.dev/blog/pgo#:~:text=We%20recommend%20committing%20default.pgo%20files%20to%20your%20repository
# - https://gist.github.com/nottrobin/5758221
- uses: projectdiscovery/actions/commit@v1
with:
files: "${PGO_FILE}"
message: "build: update PGO profile :robot:"
- run: git push origin $GITHUB_REF
- uses: actions/upload-artifact@v4
with:
name: "pgo"
path: "${{ env.PGO_FILE }}"

38
.github/workflows/perf-regression.yaml vendored Normal file
View File

@ -0,0 +1,38 @@
name: 🔨 Performance Regression
on:
workflow_call:
workflow_dispatch:
jobs:
perf-regression:
runs-on: ubuntu-latest-16-cores
if: github.repository == 'projectdiscovery/nuclei'
env:
BENCH_OUT: "/tmp/bench.out"
steps:
- uses: actions/checkout@v4
- uses: projectdiscovery/actions/setup/go@v1
- run: make build-test
- run: ./bin/nuclei.test -test.run - -test.bench=. -test.benchmem ./cmd/nuclei/ | tee $BENCH_OUT
env:
DISABLE_STDOUT: "1"
- uses: actions/cache/restore@v4
with:
path: ./cache
key: ${{ runner.os }}-benchmark
- uses: benchmark-action/github-action-benchmark@v1
with:
name: 'RunEnumeration Benchmark'
tool: 'go'
output-file-path: ${{ env.BENCH_OUT }}
external-data-json-path: ./cache/benchmark-data.json
fail-on-alert: false
github-token: ${{ secrets.GITHUB_TOKEN }}
comment-on-alert: true
summary-always: true
- uses: actions/cache/save@v4
if: github.event_name == 'push'
with:
path: ./cache
key: ${{ runner.os }}-benchmark

View File

@ -20,16 +20,24 @@ jobs:
- uses: projectdiscovery/actions/setup/go@v1
- run: make verify
- name: Generate list
run: for i in {1..${{ matrix.count }}}; do echo "https://scanme.sh/?_=${i}" >> "${LIST_FILE}"; done
- run: NUCLEI_ARGS=host-error-stats go run . -l "${LIST_FILE}" -profile-mem="${PROFILE_MEM}"
run: for i in {1..${{ matrix.count }}}; do echo "https://honey.scanme.sh/?_=${i}" >> "${LIST_FILE}"; done
- run: go run . -l "${LIST_FILE}" -profile-mem="${PROFILE_MEM}"
env:
NUCLEI_ARGS: host-error-stats
working-directory: cmd/nuclei/
- uses: projectdiscovery/actions/flamegraph@v1
id: flamegraph
id: flamegraph-cpu
with:
profile: "${{ env.PROFILE_MEM }}.prof"
name: "nuclei-perf-test-${{ matrix.count }}"
profile: "${{ env.PROFILE_MEM }}.cpu"
name: "${{ env.FLAMEGRAPH_NAME }} CPU profiles"
continue-on-error: true
- if: ${{ steps.flamegraph.outputs.message == '' }}
run: echo "::notice::${FLAMEGRAPH_URL}"
env:
FLAMEGRAPH_URL: ${{ steps.flamegraph.outputs.url }}
- uses: projectdiscovery/actions/flamegraph@v1
id: flamegraph-mem
with:
profile: "${{ env.PROFILE_MEM }}.mem"
name: "${{ env.FLAMEGRAPH_NAME }} memory profiles"
continue-on-error: true
- if: ${{ steps.flamegraph-mem.outputs.message == '' }}
run: |
echo "::notice::CPU flamegraph: ${{ steps.flamegraph-cpu.outputs.url }}"
echo "::notice::Memory (heap) flamegraph: ${{ steps.flamegraph-mem.outputs.url }}"

View File

@ -156,13 +156,23 @@ jobs:
echo "FLAMEGRAPH_NAME=nuclei (PR #${{ github.event.number }})" >> $GITHUB_ENV
- run: ./bin/nuclei -silent -update-templates
- run: ./bin/nuclei -silent -u "${TARGET_URL}" -profile-mem="${PROFILE_MEM}"
- uses: projectdiscovery/actions/flamegraph@master
id: flamegraph
- uses: projectdiscovery/actions/flamegraph@v1
id: flamegraph-cpu
with:
profile: "${{ env.PROFILE_MEM }}.prof"
name: "${{ env.FLAMEGRAPH_NAME }}"
profile: "${{ env.PROFILE_MEM }}.cpu"
name: "${{ env.FLAMEGRAPH_NAME }} CPU profiles"
continue-on-error: true
- if: ${{ steps.flamegraph.outputs.message == '' }}
run: echo "::notice::${FLAMEGRAPH_URL}"
env:
FLAMEGRAPH_URL: ${{ steps.flamegraph.outputs.url }}
- uses: projectdiscovery/actions/flamegraph@v1
id: flamegraph-mem
with:
profile: "${{ env.PROFILE_MEM }}.mem"
name: "${{ env.FLAMEGRAPH_NAME }} memory profiles"
continue-on-error: true
- if: ${{ steps.flamegraph-mem.outputs.message == '' }}
run: |
echo "::notice::CPU flamegraph: ${{ steps.flamegraph-cpu.outputs.url }}"
echo "::notice::Memory (heap) flamegraph: ${{ steps.flamegraph-mem.outputs.url }}"
perf-regression:
needs: ["tests"]
uses: ./.github/workflows/perf-regression.yaml

2
.gitignore vendored
View File

@ -49,3 +49,5 @@ vendor
*.prof
*.pprof
*.trace
*.mem
*.cpu

View File

@ -1,27 +1,29 @@
before:
hooks:
- go mod tidy
- go mod download
- go mod verify
builds:
- main: cmd/nuclei/main.go
binary: nuclei
id: nuclei-cli
env:
- CGO_ENABLED=0
goos: [windows,linux,darwin]
goarch: [amd64,386,arm,arm64]
goarch: [amd64,'386',arm,arm64]
ignore:
- goos: darwin
goarch: 386
goarch: '386'
- goos: windows
goarch: arm
- goos: windows
goarch: arm64
flags:
- -trimpath
- -pgo=auto
ldflags:
- -s
- -w
#- main: cmd/tmc/main.go
# binary: tmc

View File

@ -459,35 +459,42 @@ That's it, you've added a new protocol to Nuclei. The next good step would be to
## Profiling and Tracing
To analyze Nuclei's performance and resource usage, you can generate memory profiles and trace files using the `-profile-mem` flag:
To analyze Nuclei's performance and resource usage, you can generate CPU & memory profiles and trace files using the `-profile-mem` flag:
```bash
nuclei -t nuclei-templates/ -u https://example.com -profile-mem=nuclei-$(git describe --tags)
```
This command creates two files:
This command creates three files:
* `nuclei.prof`: Memory (heap) profile
* `nuclei.cpu`: CPU profile
* `nuclei.mem`: Memory (heap) profile
* `nuclei.trace`: Execution trace
### Analyzing the Memory Profile
### Analyzing the CPU/Memory Profiles
1. View the profile in the terminal:
* View the profile in the terminal:
```bash
go tool pprof nuclei.prof
go tool pprof nuclei.{cpu,mem}
```
2. Display top memory consumers:
* Display overall CPU time for processing $$N$$ targets:
```bash
go tool pprof -top nuclei.prof | grep "$(go list -m)" | head -10
```
go tool pprof -top nuclei.cpu | grep "Total samples"
```
3. Visualize the profile in a web browser:
* Display top memory consumers:
```bash
go tool pprof -http=:$(shuf -i 1000-99999 -n 1) nuclei.prof
go tool pprof -top nuclei.mem | grep "$(go list -m)" | head -10
```
* Visualize the profile in a web browser:
```bash
go tool pprof -http=:$(shuf -i 1000-99999 -n 1) nuclei.{cpu,mem}
```
### Analyzing the Trace File

View File

@ -11,7 +11,7 @@ GOFLAGS := -v
LDFLAGS := -s -w
ifneq ($(shell go env GOOS),darwin)
LDFLAGS = -extldflags "-static"
LDFLAGS += -extldflags "-static"
endif
.PHONY: all build build-stats clean devtools-all devtools-bindgen devtools-scrapefuncs
@ -26,13 +26,22 @@ clean:
go-build: clean
go-build:
$(GOBUILD) $(GOFLAGS) -ldflags '${LDFLAGS}' $(GOBUILD_ADDITIONAL_ARGS) \
CGO_ENABLED=0 $(GOBUILD) -trimpath $(GOFLAGS) -ldflags '${LDFLAGS}' $(GOBUILD_ADDITIONAL_ARGS) \
-o '${GOBUILD_OUTPUT}' $(GOBUILD_PACKAGES)
build: GOFLAGS = -v -pgo=auto
build: GOBUILD_OUTPUT = ./bin/nuclei
build: GOBUILD_PACKAGES = cmd/nuclei/main.go
build: go-build
build-test: GOFLAGS = -v -pgo=auto
build-test: GOBUILD_OUTPUT = ./bin/nuclei.test
build-test: GOBUILD_PACKAGES = ./cmd/nuclei/
build-test: clean
build-test:
CGO_ENABLED=0 $(GOCMD) test -c -trimpath $(GOFLAGS) -ldflags '${LDFLAGS}' $(GOBUILD_ADDITIONAL_ARGS) \
-o '${GOBUILD_OUTPUT}' ${GOBUILD_PACKAGES}
build-stats: GOBUILD_OUTPUT = ./bin/nuclei-stats
build-stats: GOBUILD_PACKAGES = cmd/nuclei/main.go
build-stats: GOBUILD_ADDITIONAL_ARGS = -tags=stats

View File

@ -106,17 +106,19 @@ func main() {
// Profiling & tracing related code
if memProfile != "" {
memProfile = strings.TrimSuffix(memProfile, filepath.Ext(memProfile)) + ".prof"
memProfileFile, err := os.Create(memProfile)
memProfile = strings.TrimSuffix(memProfile, filepath.Ext(memProfile))
createProfileFile := func(ext, profileType string) *os.File {
f, err := os.Create(memProfile + ext)
if err != nil {
gologger.Fatal().Msgf("profile: could not create memory profile %q file: %v", memProfile, err)
gologger.Fatal().Msgf("profile: could not create %s profile %q file: %v", profileType, f.Name(), err)
}
return f
}
traceFilepath := strings.TrimSuffix(memProfile, filepath.Ext(memProfile)) + ".trace"
traceFile, err := os.Create(traceFilepath)
if err != nil {
gologger.Fatal().Msgf("profile: could not create trace %q file: %v", traceFilepath, err)
}
memProfileFile := createProfileFile(".mem", "memory")
cpuProfileFile := createProfileFile(".cpu", "CPU")
traceFile := createProfileFile(".trace", "trace")
oldMemProfileRate := runtime.MemProfileRate
runtime.MemProfileRate = 4096
@ -126,18 +128,27 @@ func main() {
gologger.Fatal().Msgf("profile: could not start trace: %v", err)
}
defer func() {
// Start CPU profiling
if err := pprof.WriteHeapProfile(memProfileFile); err != nil {
if err := pprof.StartCPUProfile(cpuProfileFile); err != nil {
gologger.Fatal().Msgf("profile: could not start CPU profile: %v", err)
}
defer func() {
// Start heap memory snapshot
if err := pprof.WriteHeapProfile(memProfileFile); err != nil {
gologger.Fatal().Msgf("profile: could not write memory profile: %v", err)
}
pprof.StopCPUProfile()
memProfileFile.Close()
traceFile.Close()
trace.Stop()
runtime.MemProfileRate = oldMemProfileRate
gologger.Info().Msgf("Memory profile saved at %q", memProfile)
gologger.Info().Msgf("Traced at %q", traceFilepath)
gologger.Info().Msgf("CPU profile saved at %q", cpuProfileFile.Name())
gologger.Info().Msgf("Memory usage snapshot saved at %q", memProfileFile.Name())
gologger.Info().Msgf("Traced at %q", traceFile.Name())
}()
}

87
cmd/nuclei/main_test.go Normal file
View File

@ -0,0 +1,87 @@
package main_test
import (
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/projectdiscovery/goflags"
"github.com/projectdiscovery/gologger"
"github.com/projectdiscovery/gologger/levels"
"github.com/projectdiscovery/nuclei/v3/internal/runner"
"github.com/projectdiscovery/nuclei/v3/pkg/types"
)
func BenchmarkRunEnumeration(b *testing.B) {
dummyServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNoContent)
}))
defer dummyServer.Close()
options := &types.Options{
RemoteTemplateDomainList: goflags.StringSlice{
"cloud.projectdiscovery.io",
},
ProjectPath: "/tmp",
Targets: goflags.StringSlice{dummyServer.URL},
StatsInterval: 5,
MetricsPort: 9092,
MaxHostError: 30,
NoHostErrors: true,
BulkSize: 25,
TemplateThreads: 25,
HeadlessBulkSize: 10,
HeadlessTemplateThreads: 10,
Timeout: 10,
Retries: 1,
RateLimit: 150,
RateLimitDuration: time.Duration(time.Second),
RateLimitMinute: 0,
PageTimeout: 20,
InteractionsCacheSize: 5000,
InteractionsPollDuration: 5,
InteractionsEviction: 60,
InteractionsCoolDownPeriod: 5,
MaxRedirects: 10,
Silent: true,
VarDumpLimit: 255,
JSONRequests: true,
StoreResponseDir: "output",
InputFileMode: "list",
ResponseReadSize: 0,
ResponseSaveSize: 1048576,
InputReadTimeout: time.Duration(3 * time.Minute),
UncoverField: "ip:port",
UncoverLimit: 100,
UncoverRateLimit: 60,
ScanStrategy: "auto",
FuzzAggressionLevel: "low",
FuzzParamFrequency: 10,
TeamID: "none",
JsConcurrency: 120,
PayloadConcurrency: 25,
ProbeConcurrency: 50,
LoadHelperFileFunction: types.DefaultOptions().LoadHelperFileFunction,
// DialerKeepAlive: time.Duration(0),
// DASTServerAddress: "localhost:9055",
}
runner.ParseOptions(options)
// Disable logging to reduce benchmark noise.
gologger.DefaultLogger.SetMaxLevel(levels.LevelSilent)
nucleiRunner, err := runner.New(options)
if err != nil {
b.Fatalf("failed to create runner: %s", err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
if err := nucleiRunner.RunEnumeration(); err != nil {
b.Fatalf("RunEnumeration failed: %s", err)
}
}
}

View File

@ -279,6 +279,11 @@ func NewStandardWriter(options *types.Options) (*StandardWriter, error) {
omitTemplate: options.OmitTemplate,
KeysToRedact: options.Redact,
}
if v := os.Getenv("DISABLE_STDOUT"); v == "true" || v == "1" {
writer.DisableStdout = true
}
return writer, nil
}