feat: generate CPU & PGO profiles (#6058)

* feat: generate CPU profiles also adjust memory (heap) profiles ext to `.mem` Signed-off-by: Dwi Siswanto <git@dw1.io> * docs(DESIGN): add total samples for CPU profiles Signed-off-by: Dwi Siswanto <git@dw1.io> * chore(make): trimpath in go-build and append LDFLAGS ifneq "darwin" Signed-off-by: Dwi Siswanto <git@dw1.io> * chore: update goreleaser build * replace `go mod tidy` with `go mod download` and `go mod verify` * adjust indentations * add `-trimpath` flag * set `-pgo` flag to "`auto`" * add `ldflags` * quoting 386 GOARCH value Signed-off-by: Dwi Siswanto <git@dw1.io> * ci: add generate PGO workflow Signed-off-by: Dwi Siswanto <git@dw1.io> * chore(make): set CGO_ENABLED inline in go-build Signed-off-by: Dwi Siswanto <git@dw1.io> * refactor(main): streamline profile file creation Signed-off-by: Dwi Siswanto <git@dw1.io> * dummy: add PGO file (DO NOT MERGE) Signed-off-by: Dwi Siswanto <git@dw1.io> * feat: add main test (benchmark) Signed-off-by: Dwi Siswanto <git@dw1.io> * chore(make): add build-test Signed-off-by: Dwi Siswanto <git@dw1.io> * Revert "dummy: add PGO file (DO NOT MERGE)" This reverts commit ee877205f729be2f054c7d7d484a9244121acce6. * test(main): set Output to /dev/null Signed-off-by: Dwi Siswanto <git@dw1.io> * feat(output): add option to disable stdout via env var Signed-off-by: Dwi Siswanto <git@dw1.io> * test(main): set `types.Options.Output` to empty Signed-off-by: Dwi Siswanto <git@dw1.io> * chore(generate-pgo): add TODO note Signed-off-by: Dwi Siswanto <git@dw1.io> * ci: add reusable perf regression workflow Signed-off-by: Dwi Siswanto <git@dw1.io> * ci(perf-regression): enabe `DISABLE_STDOUT` Signed-off-by: Dwi Siswanto <git@dw1.io> --------- Signed-off-by: Dwi Siswanto <git@dw1.io>
2025-12-29 22:23:02 +00:00 · 2025-02-24 18:22:57 +07:00 · 2025-02-24 18:22:57 +07:00 · 940885a3cc
commit 940885a3cc
parent 047d49f6f3
11 changed files with 302 additions and 68 deletions
--- a/.github/workflows/generate-pgo.yaml
+++ b/.github/workflows/generate-pgo.yaml
@ -0,0 +1,55 @@
+name: 👤 Generate PGO
+
+on:
+  push:
+    branches: ["dev"]
+    paths:
+      - '**.go'
+      - '**.mod'
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+# TODO(dwisiswant0): https://go.dev/doc/pgo#merging-profiles
+
+jobs:
+  pgo:
+    strategy:
+      matrix:
+        targets: [150]
+    runs-on: ubuntu-latest-16-cores
+    if: github.repository == 'projectdiscovery/nuclei'
+    permissions:
+      contents: write
+    env:
+      PGO_FILE: "cmd/nuclei/default.pgo"
+      LIST_FILE: "/tmp/targets-${{ matrix.targets }}.txt"
+      PROFILE_MEM: "/tmp/nuclei-profile-${{ matrix.targets }}-targets"
+    steps:
+      - uses: actions/checkout@v4
+      - uses: projectdiscovery/actions/setup/git@v1
+      - uses: projectdiscovery/actions/setup/go@v1
+      - name: Generate list
+        run: for i in {1..${{ matrix.targets }}}; do echo "https://honey.scanme.sh/?_=${i}" >> "${LIST_FILE}"; done
+      # NOTE(dwisiswant0): use `-no-mhe` flag to get better samples.
+      - run: go run . -l "${LIST_FILE}" -profile-mem="${PROFILE_MEM}" -no-mhe
+        working-directory: cmd/nuclei/
+      - run: mv "${PROFILE_MEM}.cpu" ${PGO_FILE}
+      # NOTE(dwisiswant0): shall we prune $PGO_FILE git history?
+      # if we prune it, this won't be linear since it requires a force-push.
+      # if we don't, the git objects will just keep growing bigger.
+      # 
+      # Ref:
+      # - https://go.dev/blog/pgo#:~:text=We%20recommend%20committing%20default.pgo%20files%20to%20your%20repository
+      # - https://gist.github.com/nottrobin/5758221
+      - uses: projectdiscovery/actions/commit@v1
+        with:
+          files: "${PGO_FILE}"
+          message: "build: update PGO profile :robot:"
+      - run: git push origin $GITHUB_REF
+      - uses: actions/upload-artifact@v4
+        with:
+          name: "pgo"
+          path: "${{ env.PGO_FILE }}"
--- a/.github/workflows/perf-regression.yaml
+++ b/.github/workflows/perf-regression.yaml
@ -0,0 +1,38 @@
+name: 🔨 Performance Regression
+
+on:
+  workflow_call:
+  workflow_dispatch:
+
+jobs:
+  perf-regression:
+    runs-on: ubuntu-latest-16-cores
+    if: github.repository == 'projectdiscovery/nuclei'
+    env:
+      BENCH_OUT: "/tmp/bench.out"
+    steps:
+      - uses: actions/checkout@v4
+      - uses: projectdiscovery/actions/setup/go@v1
+      - run: make build-test
+      - run: ./bin/nuclei.test -test.run - -test.bench=. -test.benchmem ./cmd/nuclei/ | tee $BENCH_OUT
+        env:
+          DISABLE_STDOUT: "1"
+      - uses: actions/cache/restore@v4
+        with:
+          path: ./cache
+          key: ${{ runner.os }}-benchmark
+      - uses: benchmark-action/github-action-benchmark@v1
+        with:
+          name: 'RunEnumeration Benchmark'
+          tool: 'go'
+          output-file-path: ${{ env.BENCH_OUT }}
+          external-data-json-path: ./cache/benchmark-data.json
+          fail-on-alert: false
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          comment-on-alert: true
+          summary-always: true
+      - uses: actions/cache/save@v4
+        if: github.event_name == 'push'
+        with:
+          path: ./cache
+          key: ${{ runner.os }}-benchmark
--- a/.github/workflows/perf-test.yaml
+++ b/.github/workflows/perf-test.yaml
@ -20,16 +20,24 @@ jobs:
      - uses: projectdiscovery/actions/setup/go@v1
      - run: make verify
      - name: Generate list
-        run: for i in {1..${{ matrix.count }}}; do echo "https://scanme.sh/?_=${i}" >> "${LIST_FILE}"; done
-      - run: NUCLEI_ARGS=host-error-stats go run . -l "${LIST_FILE}" -profile-mem="${PROFILE_MEM}"
+        run: for i in {1..${{ matrix.count }}}; do echo "https://honey.scanme.sh/?_=${i}" >> "${LIST_FILE}"; done
+      - run: go run . -l "${LIST_FILE}" -profile-mem="${PROFILE_MEM}"
+        env:
+          NUCLEI_ARGS: host-error-stats
        working-directory: cmd/nuclei/
      - uses: projectdiscovery/actions/flamegraph@v1
-        id: flamegraph
+        id: flamegraph-cpu
        with:
-          profile: "${{ env.PROFILE_MEM }}.prof"
-          name: "nuclei-perf-test-${{ matrix.count }}"
+          profile: "${{ env.PROFILE_MEM }}.cpu"
+          name: "${{ env.FLAMEGRAPH_NAME }} CPU profiles"
        continue-on-error: true
-      - if: ${{ steps.flamegraph.outputs.message == '' }}
-        run: echo "::notice::${FLAMEGRAPH_URL}"
-        env:
-          FLAMEGRAPH_URL: ${{ steps.flamegraph.outputs.url }}
+      - uses: projectdiscovery/actions/flamegraph@v1
+        id: flamegraph-mem
+        with:
+          profile: "${{ env.PROFILE_MEM }}.mem"
+          name: "${{ env.FLAMEGRAPH_NAME }} memory profiles"
+        continue-on-error: true
+      - if: ${{ steps.flamegraph-mem.outputs.message == '' }}
+        run: |
+          echo "::notice::CPU flamegraph: ${{ steps.flamegraph-cpu.outputs.url }}"
+          echo "::notice::Memory (heap) flamegraph: ${{ steps.flamegraph-mem.outputs.url }}"
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@ -156,13 +156,23 @@ jobs:
          echo "FLAMEGRAPH_NAME=nuclei (PR #${{ github.event.number }})" >> $GITHUB_ENV
      - run: ./bin/nuclei -silent -update-templates
      - run: ./bin/nuclei -silent -u "${TARGET_URL}" -profile-mem="${PROFILE_MEM}"
-      - uses: projectdiscovery/actions/flamegraph@master
-        id: flamegraph
+      - uses: projectdiscovery/actions/flamegraph@v1
+        id: flamegraph-cpu
        with:
-          profile: "${{ env.PROFILE_MEM }}.prof"
-          name: "${{ env.FLAMEGRAPH_NAME }}"
+          profile: "${{ env.PROFILE_MEM }}.cpu"
+          name: "${{ env.FLAMEGRAPH_NAME }} CPU profiles"
        continue-on-error: true
-      - if: ${{ steps.flamegraph.outputs.message == '' }}
-        run: echo "::notice::${FLAMEGRAPH_URL}"
-        env:
-          FLAMEGRAPH_URL: ${{ steps.flamegraph.outputs.url }}
+      - uses: projectdiscovery/actions/flamegraph@v1
+        id: flamegraph-mem
+        with:
+          profile: "${{ env.PROFILE_MEM }}.mem"
+          name: "${{ env.FLAMEGRAPH_NAME }} memory profiles"
+        continue-on-error: true
+      - if: ${{ steps.flamegraph-mem.outputs.message == '' }}
+        run: |
+          echo "::notice::CPU flamegraph: ${{ steps.flamegraph-cpu.outputs.url }}"
+          echo "::notice::Memory (heap) flamegraph: ${{ steps.flamegraph-mem.outputs.url }}"
+
+  perf-regression:
+    needs: ["tests"]
+    uses: ./.github/workflows/perf-regression.yaml
--- a/.gitignore
+++ b/.gitignore
@ -49,3 +49,5 @@ vendor
 *.prof
 *.pprof
 *.trace
+*.mem
+*.cpu
--- a/.goreleaser.yml
+++ b/.goreleaser.yml
@ -1,27 +1,29 @@
 before:
  hooks:
-    - go mod tidy
+    - go mod download
+    - go mod verify

 builds:
  - main: cmd/nuclei/main.go
    binary: nuclei
    id: nuclei-cli
-
    env:
      - CGO_ENABLED=0
-
    goos: [windows,linux,darwin]
-  goarch: [amd64,386,arm,arm64]
+    goarch: [amd64,'386',arm,arm64]
    ignore:
      - goos: darwin
-      goarch: 386
+        goarch: '386'
      - goos: windows
        goarch: arm
      - goos: windows
        goarch: arm64
-
    flags:
      - -trimpath
+      - -pgo=auto
+    ldflags:
+      - -s
+      - -w

 #- main: cmd/tmc/main.go
 #  binary: tmc
--- a/DESIGN.md
+++ b/DESIGN.md
@ -459,35 +459,42 @@ That's it, you've added a new protocol to Nuclei. The next good step would be to

 ## Profiling and Tracing

-To analyze Nuclei's performance and resource usage, you can generate memory profiles and trace files using the `-profile-mem` flag:
+To analyze Nuclei's performance and resource usage, you can generate CPU & memory profiles and trace files using the `-profile-mem` flag:

 ```bash
 nuclei -t nuclei-templates/ -u https://example.com -profile-mem=nuclei-$(git describe --tags)
 ```

-This command creates two files:
+This command creates three files:

-* `nuclei.prof`: Memory (heap) profile
+* `nuclei.cpu`: CPU profile
+* `nuclei.mem`: Memory (heap) profile
 * `nuclei.trace`: Execution trace

-### Analyzing the Memory Profile
+### Analyzing the CPU/Memory Profiles

-1. View the profile in the terminal:
+* View the profile in the terminal:

 ```bash
-go tool pprof nuclei.prof
+go tool pprof nuclei.{cpu,mem}
 ```

-2. Display top memory consumers:
+* Display overall CPU time for processing $$N$$ targets:

-```bash
-go tool pprof -top nuclei.prof | grep "$(go list -m)" | head -10
+```
+go tool pprof -top nuclei.cpu | grep "Total samples"
 ```

-3. Visualize the profile in a web browser:
+* Display top memory consumers:

 ```bash
-go tool pprof -http=:$(shuf -i 1000-99999 -n 1) nuclei.prof
+go tool pprof -top nuclei.mem | grep "$(go list -m)" | head -10
+```
+
+* Visualize the profile in a web browser:
+
+```bash
+go tool pprof -http=:$(shuf -i 1000-99999 -n 1) nuclei.{cpu,mem}
 ```

 ### Analyzing the Trace File
--- a/13
+++ b/13
@ -11,7 +11,7 @@ GOFLAGS := -v
 LDFLAGS := -s -w

 ifneq ($(shell go env GOOS),darwin)
-	LDFLAGS = -extldflags "-static"
+	LDFLAGS += -extldflags "-static"
 endif
    
 .PHONY: all build build-stats clean devtools-all devtools-bindgen devtools-scrapefuncs
@ -26,13 +26,22 @@ clean:

 go-build: clean
 go-build:
-	$(GOBUILD) $(GOFLAGS) -ldflags '${LDFLAGS}' $(GOBUILD_ADDITIONAL_ARGS) \
+	CGO_ENABLED=0 $(GOBUILD) -trimpath $(GOFLAGS) -ldflags '${LDFLAGS}' $(GOBUILD_ADDITIONAL_ARGS) \
 		 -o '${GOBUILD_OUTPUT}' $(GOBUILD_PACKAGES)

+build: GOFLAGS = -v -pgo=auto
 build: GOBUILD_OUTPUT = ./bin/nuclei
 build: GOBUILD_PACKAGES = cmd/nuclei/main.go
 build: go-build

+build-test: GOFLAGS = -v -pgo=auto
+build-test: GOBUILD_OUTPUT = ./bin/nuclei.test
+build-test: GOBUILD_PACKAGES = ./cmd/nuclei/
+build-test: clean
+build-test:
+	CGO_ENABLED=0 $(GOCMD) test -c -trimpath $(GOFLAGS) -ldflags '${LDFLAGS}' $(GOBUILD_ADDITIONAL_ARGS) \
+		 -o '${GOBUILD_OUTPUT}' ${GOBUILD_PACKAGES}
+
 build-stats: GOBUILD_OUTPUT = ./bin/nuclei-stats
 build-stats: GOBUILD_PACKAGES = cmd/nuclei/main.go
 build-stats: GOBUILD_ADDITIONAL_ARGS = -tags=stats
--- a/cmd/nuclei/main.go
+++ b/cmd/nuclei/main.go
@ -106,17 +106,19 @@ func main() {

 	// Profiling & tracing related code
 	if memProfile != "" {
-		memProfile = strings.TrimSuffix(memProfile, filepath.Ext(memProfile)) + ".prof"
-		memProfileFile, err := os.Create(memProfile)
+		memProfile = strings.TrimSuffix(memProfile, filepath.Ext(memProfile))
+
+		createProfileFile := func(ext, profileType string) *os.File {
+			f, err := os.Create(memProfile + ext)
 			if err != nil {
-			gologger.Fatal().Msgf("profile: could not create memory profile %q file: %v", memProfile, err)
+				gologger.Fatal().Msgf("profile: could not create %s profile %q file: %v", profileType, f.Name(), err)
+			}
+			return f
 		}

-		traceFilepath := strings.TrimSuffix(memProfile, filepath.Ext(memProfile)) + ".trace"
-		traceFile, err := os.Create(traceFilepath)
-		if err != nil {
-			gologger.Fatal().Msgf("profile: could not create trace %q file: %v", traceFilepath, err)
-		}
+		memProfileFile := createProfileFile(".mem", "memory")
+		cpuProfileFile := createProfileFile(".cpu", "CPU")
+		traceFile := createProfileFile(".trace", "trace")

 		oldMemProfileRate := runtime.MemProfileRate
 		runtime.MemProfileRate = 4096
@ -126,18 +128,27 @@ func main() {
 			gologger.Fatal().Msgf("profile: could not start trace: %v", err)
 		}

-		defer func() {
 		// Start CPU profiling
-			if err := pprof.WriteHeapProfile(memProfileFile); err != nil {
+		if err := pprof.StartCPUProfile(cpuProfileFile); err != nil {
 			gologger.Fatal().Msgf("profile: could not start CPU profile: %v", err)
 		}
+
+		defer func() {
+			// Start heap memory snapshot
+			if err := pprof.WriteHeapProfile(memProfileFile); err != nil {
+				gologger.Fatal().Msgf("profile: could not write memory profile: %v", err)
+			}
+
+			pprof.StopCPUProfile()
 			memProfileFile.Close()
 			traceFile.Close()
 			trace.Stop()
+
 			runtime.MemProfileRate = oldMemProfileRate

-			gologger.Info().Msgf("Memory profile saved at %q", memProfile)
-			gologger.Info().Msgf("Traced at %q", traceFilepath)
+			gologger.Info().Msgf("CPU profile saved at %q", cpuProfileFile.Name())
+			gologger.Info().Msgf("Memory usage snapshot saved at %q", memProfileFile.Name())
+			gologger.Info().Msgf("Traced at %q", traceFile.Name())
 		}()
 	}

--- a/cmd/nuclei/main_test.go
+++ b/cmd/nuclei/main_test.go
@ -0,0 +1,87 @@
+package main_test
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/projectdiscovery/goflags"
+	"github.com/projectdiscovery/gologger"
+	"github.com/projectdiscovery/gologger/levels"
+	"github.com/projectdiscovery/nuclei/v3/internal/runner"
+	"github.com/projectdiscovery/nuclei/v3/pkg/types"
+)
+
+func BenchmarkRunEnumeration(b *testing.B) {
+	dummyServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusNoContent)
+	}))
+	defer dummyServer.Close()
+
+	options := &types.Options{
+		RemoteTemplateDomainList: goflags.StringSlice{
+			"cloud.projectdiscovery.io",
+		},
+		ProjectPath:                "/tmp",
+		Targets:                    goflags.StringSlice{dummyServer.URL},
+		StatsInterval:              5,
+		MetricsPort:                9092,
+		MaxHostError:               30,
+		NoHostErrors:               true,
+		BulkSize:                   25,
+		TemplateThreads:            25,
+		HeadlessBulkSize:           10,
+		HeadlessTemplateThreads:    10,
+		Timeout:                    10,
+		Retries:                    1,
+		RateLimit:                  150,
+		RateLimitDuration:          time.Duration(time.Second),
+		RateLimitMinute:            0,
+		PageTimeout:                20,
+		InteractionsCacheSize:      5000,
+		InteractionsPollDuration:   5,
+		InteractionsEviction:       60,
+		InteractionsCoolDownPeriod: 5,
+		MaxRedirects:               10,
+		Silent:                     true,
+		VarDumpLimit:               255,
+		JSONRequests:               true,
+		StoreResponseDir:           "output",
+		InputFileMode:              "list",
+		ResponseReadSize:           0,
+		ResponseSaveSize:           1048576,
+		InputReadTimeout:           time.Duration(3 * time.Minute),
+		UncoverField:               "ip:port",
+		UncoverLimit:               100,
+		UncoverRateLimit:           60,
+		ScanStrategy:               "auto",
+		FuzzAggressionLevel:        "low",
+		FuzzParamFrequency:         10,
+		TeamID:                     "none",
+		JsConcurrency:              120,
+		PayloadConcurrency:         25,
+		ProbeConcurrency:           50,
+		LoadHelperFileFunction:     types.DefaultOptions().LoadHelperFileFunction,
+		// DialerKeepAlive:            time.Duration(0),
+		// DASTServerAddress:          "localhost:9055",
+	}
+
+	runner.ParseOptions(options)
+
+	// Disable logging to reduce benchmark noise.
+	gologger.DefaultLogger.SetMaxLevel(levels.LevelSilent)
+
+	nucleiRunner, err := runner.New(options)
+	if err != nil {
+		b.Fatalf("failed to create runner: %s", err)
+	}
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		if err := nucleiRunner.RunEnumeration(); err != nil {
+			b.Fatalf("RunEnumeration failed: %s", err)
+		}
+	}
+}
--- a/pkg/output/output.go
+++ b/pkg/output/output.go
@ -279,6 +279,11 @@ func NewStandardWriter(options *types.Options) (*StandardWriter, error) {
 		omitTemplate:     options.OmitTemplate,
 		KeysToRedact:     options.Redact,
 	}
+
+	if v := os.Getenv("DISABLE_STDOUT"); v == "true" || v == "1" {
+		writer.DisableStdout = true
+	}
+
 	return writer, nil
 }