diff --git a/.github/workflows/generate-pgo.yaml b/.github/workflows/generate-pgo.yaml
new file mode 100644
index 000000000..25ea17858
--- /dev/null
+++ b/.github/workflows/generate-pgo.yaml
@@ -0,0 +1,55 @@
+name: 👤 Generate PGO
+
+on:
+  push:
+    branches: ["dev"]
+    paths:
+      - '**.go'
+      - '**.mod'
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+# TODO(dwisiswant0): https://go.dev/doc/pgo#merging-profiles
+
+jobs:
+  pgo:
+    strategy:
+      matrix:
+        targets: [150]
+    runs-on: ubuntu-latest-16-cores
+    if: github.repository == 'projectdiscovery/nuclei'
+    permissions:
+      contents: write
+    env:
+      PGO_FILE: "cmd/nuclei/default.pgo"
+      LIST_FILE: "/tmp/targets-${{ matrix.targets }}.txt"
+      PROFILE_MEM: "/tmp/nuclei-profile-${{ matrix.targets }}-targets"
+    steps:
+      - uses: actions/checkout@v4
+      - uses: projectdiscovery/actions/setup/git@v1
+      - uses: projectdiscovery/actions/setup/go@v1
+      - name: Generate list
+        run: for i in {1..${{ matrix.targets }}}; do echo "https://honey.scanme.sh/?_=${i}" >> "${LIST_FILE}"; done
+      # NOTE(dwisiswant0): use `-no-mhe` flag to get better samples.
+      - run: go run . -l "${LIST_FILE}" -profile-mem="${PROFILE_MEM}" -no-mhe
+        working-directory: cmd/nuclei/
+      - run: mv "${PROFILE_MEM}.cpu" ${PGO_FILE}
+      # NOTE(dwisiswant0): shall we prune $PGO_FILE git history?
+      # if we prune it, this won't be linear since it requires a force-push.
+      # if we don't, the git objects will just keep growing bigger.
+      # 
+      # Ref:
+      # - https://go.dev/blog/pgo#:~:text=We%20recommend%20committing%20default.pgo%20files%20to%20your%20repository
+      # - https://gist.github.com/nottrobin/5758221
+      - uses: projectdiscovery/actions/commit@v1
+        with:
+          files: "${PGO_FILE}"
+          message: "build: update PGO profile :robot:"
+      - run: git push origin $GITHUB_REF
+      - uses: actions/upload-artifact@v4
+        with:
+          name: "pgo"
+          path: "${{ env.PGO_FILE }}"
diff --git a/.github/workflows/perf-regression.yaml b/.github/workflows/perf-regression.yaml
new file mode 100644
index 000000000..9f4b2fb88
--- /dev/null
+++ b/.github/workflows/perf-regression.yaml
@@ -0,0 +1,38 @@
+name: 🔨 Performance Regression
+
+on:
+  workflow_call:
+  workflow_dispatch:
+
+jobs:
+  perf-regression:
+    runs-on: ubuntu-latest-16-cores
+    if: github.repository == 'projectdiscovery/nuclei'
+    env:
+      BENCH_OUT: "/tmp/bench.out"
+    steps:
+      - uses: actions/checkout@v4
+      - uses: projectdiscovery/actions/setup/go@v1
+      - run: make build-test
+      - run: ./bin/nuclei.test -test.run - -test.bench=. -test.benchmem ./cmd/nuclei/ | tee $BENCH_OUT
+        env:
+          DISABLE_STDOUT: "1"
+      - uses: actions/cache/restore@v4
+        with:
+          path: ./cache
+          key: ${{ runner.os }}-benchmark
+      - uses: benchmark-action/github-action-benchmark@v1
+        with:
+          name: 'RunEnumeration Benchmark'
+          tool: 'go'
+          output-file-path: ${{ env.BENCH_OUT }}
+          external-data-json-path: ./cache/benchmark-data.json
+          fail-on-alert: false
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          comment-on-alert: true
+          summary-always: true
+      - uses: actions/cache/save@v4
+        if: github.event_name == 'push'
+        with:
+          path: ./cache
+          key: ${{ runner.os }}-benchmark
diff --git a/.github/workflows/perf-test.yaml b/.github/workflows/perf-test.yaml
index f4c327fe0..ec2449464 100644
--- a/.github/workflows/perf-test.yaml
+++ b/.github/workflows/perf-test.yaml
@@ -20,16 +20,24 @@ jobs:
       - uses: projectdiscovery/actions/setup/go@v1
       - run: make verify
       - name: Generate list
-        run: for i in {1..${{ matrix.count }}}; do echo "https://scanme.sh/?_=${i}" >> "${LIST_FILE}"; done
-      - run: NUCLEI_ARGS=host-error-stats go run . -l "${LIST_FILE}" -profile-mem="${PROFILE_MEM}"
+        run: for i in {1..${{ matrix.count }}}; do echo "https://honey.scanme.sh/?_=${i}" >> "${LIST_FILE}"; done
+      - run: go run . -l "${LIST_FILE}" -profile-mem="${PROFILE_MEM}"
+        env:
+          NUCLEI_ARGS: host-error-stats
         working-directory: cmd/nuclei/
       - uses: projectdiscovery/actions/flamegraph@v1
-        id: flamegraph
+        id: flamegraph-cpu
         with:
-          profile: "${{ env.PROFILE_MEM }}.prof"
-          name: "nuclei-perf-test-${{ matrix.count }}"
+          profile: "${{ env.PROFILE_MEM }}.cpu"
+          name: "${{ env.FLAMEGRAPH_NAME }} CPU profiles"
         continue-on-error: true
-      - if: ${{ steps.flamegraph.outputs.message == '' }}
-        run: echo "::notice::${FLAMEGRAPH_URL}"
-        env:
-          FLAMEGRAPH_URL: ${{ steps.flamegraph.outputs.url }}
+      - uses: projectdiscovery/actions/flamegraph@v1
+        id: flamegraph-mem
+        with:
+          profile: "${{ env.PROFILE_MEM }}.mem"
+          name: "${{ env.FLAMEGRAPH_NAME }} memory profiles"
+        continue-on-error: true
+      - if: ${{ steps.flamegraph-mem.outputs.message == '' }}
+        run: |
+          echo "::notice::CPU flamegraph: ${{ steps.flamegraph-cpu.outputs.url }}"
+          echo "::notice::Memory (heap) flamegraph: ${{ steps.flamegraph-mem.outputs.url }}"
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index b0d695b1c..b4cf0b22d 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -156,13 +156,23 @@ jobs:
           echo "FLAMEGRAPH_NAME=nuclei (PR #${{ github.event.number }})" >> $GITHUB_ENV
       - run: ./bin/nuclei -silent -update-templates
       - run: ./bin/nuclei -silent -u "${TARGET_URL}" -profile-mem="${PROFILE_MEM}"
-      - uses: projectdiscovery/actions/flamegraph@master
-        id: flamegraph
+      - uses: projectdiscovery/actions/flamegraph@v1
+        id: flamegraph-cpu
         with:
-          profile: "${{ env.PROFILE_MEM }}.prof"
-          name: "${{ env.FLAMEGRAPH_NAME }}"
+          profile: "${{ env.PROFILE_MEM }}.cpu"
+          name: "${{ env.FLAMEGRAPH_NAME }} CPU profiles"
         continue-on-error: true
-      - if: ${{ steps.flamegraph.outputs.message == '' }}
-        run: echo "::notice::${FLAMEGRAPH_URL}"
-        env:
-          FLAMEGRAPH_URL: ${{ steps.flamegraph.outputs.url }}
+      - uses: projectdiscovery/actions/flamegraph@v1
+        id: flamegraph-mem
+        with:
+          profile: "${{ env.PROFILE_MEM }}.mem"
+          name: "${{ env.FLAMEGRAPH_NAME }} memory profiles"
+        continue-on-error: true
+      - if: ${{ steps.flamegraph-mem.outputs.message == '' }}
+        run: |
+          echo "::notice::CPU flamegraph: ${{ steps.flamegraph-cpu.outputs.url }}"
+          echo "::notice::Memory (heap) flamegraph: ${{ steps.flamegraph-mem.outputs.url }}"
+
+  perf-regression:
+    needs: ["tests"]
+    uses: ./.github/workflows/perf-regression.yaml
diff --git a/.gitignore b/.gitignore
index f61794b19..f5153fe0f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,4 +48,6 @@ vendor
 # Profiling & tracing
 *.prof
 *.pprof
-*.trace
\ No newline at end of file
+*.trace
+*.mem
+*.cpu
\ No newline at end of file
diff --git a/.goreleaser.yml b/.goreleaser.yml
index df77d595c..f488d4df0 100644
--- a/.goreleaser.yml
+++ b/.goreleaser.yml
@@ -1,27 +1,29 @@
 before:
   hooks:
-    - go mod tidy
+    - go mod download
+    - go mod verify
 
 builds:
-- main: cmd/nuclei/main.go
-  binary: nuclei
-  id: nuclei-cli
-
-  env:
-  - CGO_ENABLED=0
-
-  goos: [windows,linux,darwin]
-  goarch: [amd64,386,arm,arm64]
-  ignore:
-    - goos: darwin
-      goarch: 386
-    - goos: windows
-      goarch: arm
-    - goos: windows
-      goarch: arm64
-
-  flags:
-    - -trimpath
+  - main: cmd/nuclei/main.go
+    binary: nuclei
+    id: nuclei-cli
+    env:
+      - CGO_ENABLED=0
+    goos: [windows,linux,darwin]
+    goarch: [amd64,'386',arm,arm64]
+    ignore:
+      - goos: darwin
+        goarch: '386'
+      - goos: windows
+        goarch: arm
+      - goos: windows
+        goarch: arm64
+    flags:
+      - -trimpath
+      - -pgo=auto
+    ldflags:
+      - -s
+      - -w
 
 #- main: cmd/tmc/main.go
 #  binary: tmc
@@ -34,10 +36,10 @@ builds:
 #  goarch: [amd64]
 
 archives:
-- format: zip
-  id: nuclei
-  builds: [nuclei-cli]
-  name_template: '{{ .ProjectName }}_{{ .Version }}_{{ if eq .Os "darwin" }}macOS{{ else }}{{ .Os }}{{ end }}_{{ .Arch }}'
+  - format: zip
+    id: nuclei
+    builds: [nuclei-cli]
+    name_template: '{{ .ProjectName }}_{{ .Version }}_{{ if eq .Os "darwin" }}macOS{{ else }}{{ .Os }}{{ end }}_{{ .Arch }}'
 
 checksum:
   algorithm: sha256
diff --git a/DESIGN.md b/DESIGN.md
index 93e8755ce..2d10a6d5b 100644
--- a/DESIGN.md
+++ b/DESIGN.md
@@ -459,35 +459,42 @@ That's it, you've added a new protocol to Nuclei. The next good step would be to
 
 ## Profiling and Tracing
 
-To analyze Nuclei's performance and resource usage, you can generate memory profiles and trace files using the `-profile-mem` flag:
+To analyze Nuclei's performance and resource usage, you can generate CPU & memory profiles and trace files using the `-profile-mem` flag:
 
 ```bash
 nuclei -t nuclei-templates/ -u https://example.com -profile-mem=nuclei-$(git describe --tags)
 ```
 
-This command creates two files:
+This command creates three files:
 
-* `nuclei.prof`: Memory (heap) profile
+* `nuclei.cpu`: CPU profile
+* `nuclei.mem`: Memory (heap) profile
 * `nuclei.trace`: Execution trace
 
-### Analyzing the Memory Profile
+### Analyzing the CPU/Memory Profiles
 
-1. View the profile in the terminal:
+* View the profile in the terminal:
 
 ```bash
-go tool pprof nuclei.prof
+go tool pprof nuclei.{cpu,mem}
 ```
 
-2. Display top memory consumers:
+* Display overall CPU time for processing $$N$$ targets:
 
-```bash
-go tool pprof -top nuclei.prof | grep "$(go list -m)" | head -10
+```
+go tool pprof -top nuclei.cpu | grep "Total samples"
 ```
 
-3. Visualize the profile in a web browser:
+* Display top memory consumers:
 
 ```bash
-go tool pprof -http=:$(shuf -i 1000-99999 -n 1) nuclei.prof
+go tool pprof -top nuclei.mem | grep "$(go list -m)" | head -10
+```
+
+* Visualize the profile in a web browser:
+
+```bash
+go tool pprof -http=:$(shuf -i 1000-99999 -n 1) nuclei.{cpu,mem}
 ```
 
 ### Analyzing the Trace File
diff --git a/Makefile b/Makefile
index b3aed71bd..0c3ab083b 100644
--- a/Makefile
+++ b/Makefile
@@ -11,7 +11,7 @@ GOFLAGS := -v
 LDFLAGS := -s -w
 
 ifneq ($(shell go env GOOS),darwin)
-	LDFLAGS = -extldflags "-static"
+	LDFLAGS += -extldflags "-static"
 endif
     
 .PHONY: all build build-stats clean devtools-all devtools-bindgen devtools-scrapefuncs
@@ -26,13 +26,22 @@ clean:
 
 go-build: clean
 go-build:
-	$(GOBUILD) $(GOFLAGS) -ldflags '${LDFLAGS}' $(GOBUILD_ADDITIONAL_ARGS) \
+	CGO_ENABLED=0 $(GOBUILD) -trimpath $(GOFLAGS) -ldflags '${LDFLAGS}' $(GOBUILD_ADDITIONAL_ARGS) \
 		 -o '${GOBUILD_OUTPUT}' $(GOBUILD_PACKAGES)
 
+build: GOFLAGS = -v -pgo=auto
 build: GOBUILD_OUTPUT = ./bin/nuclei
 build: GOBUILD_PACKAGES = cmd/nuclei/main.go
 build: go-build
 
+build-test: GOFLAGS = -v -pgo=auto
+build-test: GOBUILD_OUTPUT = ./bin/nuclei.test
+build-test: GOBUILD_PACKAGES = ./cmd/nuclei/
+build-test: clean
+build-test:
+	CGO_ENABLED=0 $(GOCMD) test -c -trimpath $(GOFLAGS) -ldflags '${LDFLAGS}' $(GOBUILD_ADDITIONAL_ARGS) \
+		 -o '${GOBUILD_OUTPUT}' ${GOBUILD_PACKAGES}
+
 build-stats: GOBUILD_OUTPUT = ./bin/nuclei-stats
 build-stats: GOBUILD_PACKAGES = cmd/nuclei/main.go
 build-stats: GOBUILD_ADDITIONAL_ARGS = -tags=stats
diff --git a/cmd/nuclei/main.go b/cmd/nuclei/main.go
index 6b45d4939..8cc2b53f1 100644
--- a/cmd/nuclei/main.go
+++ b/cmd/nuclei/main.go
@@ -106,17 +106,19 @@ func main() {
 
 	// Profiling & tracing related code
 	if memProfile != "" {
-		memProfile = strings.TrimSuffix(memProfile, filepath.Ext(memProfile)) + ".prof"
-		memProfileFile, err := os.Create(memProfile)
-		if err != nil {
-			gologger.Fatal().Msgf("profile: could not create memory profile %q file: %v", memProfile, err)
+		memProfile = strings.TrimSuffix(memProfile, filepath.Ext(memProfile))
+
+		createProfileFile := func(ext, profileType string) *os.File {
+			f, err := os.Create(memProfile + ext)
+			if err != nil {
+				gologger.Fatal().Msgf("profile: could not create %s profile %q file: %v", profileType, f.Name(), err)
+			}
+			return f
 		}
 
-		traceFilepath := strings.TrimSuffix(memProfile, filepath.Ext(memProfile)) + ".trace"
-		traceFile, err := os.Create(traceFilepath)
-		if err != nil {
-			gologger.Fatal().Msgf("profile: could not create trace %q file: %v", traceFilepath, err)
-		}
+		memProfileFile := createProfileFile(".mem", "memory")
+		cpuProfileFile := createProfileFile(".cpu", "CPU")
+		traceFile := createProfileFile(".trace", "trace")
 
 		oldMemProfileRate := runtime.MemProfileRate
 		runtime.MemProfileRate = 4096
@@ -126,18 +128,27 @@ func main() {
 			gologger.Fatal().Msgf("profile: could not start trace: %v", err)
 		}
 
+		// Start CPU profiling
+		if err := pprof.StartCPUProfile(cpuProfileFile); err != nil {
+			gologger.Fatal().Msgf("profile: could not start CPU profile: %v", err)
+		}
+
 		defer func() {
-			// Start CPU profiling
+			// Start heap memory snapshot
 			if err := pprof.WriteHeapProfile(memProfileFile); err != nil {
-				gologger.Fatal().Msgf("profile: could not start CPU profile: %v", err)
+				gologger.Fatal().Msgf("profile: could not write memory profile: %v", err)
 			}
+
+			pprof.StopCPUProfile()
 			memProfileFile.Close()
 			traceFile.Close()
 			trace.Stop()
+
 			runtime.MemProfileRate = oldMemProfileRate
 
-			gologger.Info().Msgf("Memory profile saved at %q", memProfile)
-			gologger.Info().Msgf("Traced at %q", traceFilepath)
+			gologger.Info().Msgf("CPU profile saved at %q", cpuProfileFile.Name())
+			gologger.Info().Msgf("Memory usage snapshot saved at %q", memProfileFile.Name())
+			gologger.Info().Msgf("Traced at %q", traceFile.Name())
 		}()
 	}
 
diff --git a/cmd/nuclei/main_test.go b/cmd/nuclei/main_test.go
new file mode 100644
index 000000000..01c75d5c8
--- /dev/null
+++ b/cmd/nuclei/main_test.go
@@ -0,0 +1,87 @@
+package main_test
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/projectdiscovery/goflags"
+	"github.com/projectdiscovery/gologger"
+	"github.com/projectdiscovery/gologger/levels"
+	"github.com/projectdiscovery/nuclei/v3/internal/runner"
+	"github.com/projectdiscovery/nuclei/v3/pkg/types"
+)
+
+func BenchmarkRunEnumeration(b *testing.B) {
+	dummyServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusNoContent)
+	}))
+	defer dummyServer.Close()
+
+	options := &types.Options{
+		RemoteTemplateDomainList: goflags.StringSlice{
+			"cloud.projectdiscovery.io",
+		},
+		ProjectPath:                "/tmp",
+		Targets:                    goflags.StringSlice{dummyServer.URL},
+		StatsInterval:              5,
+		MetricsPort:                9092,
+		MaxHostError:               30,
+		NoHostErrors:               true,
+		BulkSize:                   25,
+		TemplateThreads:            25,
+		HeadlessBulkSize:           10,
+		HeadlessTemplateThreads:    10,
+		Timeout:                    10,
+		Retries:                    1,
+		RateLimit:                  150,
+		RateLimitDuration:          time.Duration(time.Second),
+		RateLimitMinute:            0,
+		PageTimeout:                20,
+		InteractionsCacheSize:      5000,
+		InteractionsPollDuration:   5,
+		InteractionsEviction:       60,
+		InteractionsCoolDownPeriod: 5,
+		MaxRedirects:               10,
+		Silent:                     true,
+		VarDumpLimit:               255,
+		JSONRequests:               true,
+		StoreResponseDir:           "output",
+		InputFileMode:              "list",
+		ResponseReadSize:           0,
+		ResponseSaveSize:           1048576,
+		InputReadTimeout:           time.Duration(3 * time.Minute),
+		UncoverField:               "ip:port",
+		UncoverLimit:               100,
+		UncoverRateLimit:           60,
+		ScanStrategy:               "auto",
+		FuzzAggressionLevel:        "low",
+		FuzzParamFrequency:         10,
+		TeamID:                     "none",
+		JsConcurrency:              120,
+		PayloadConcurrency:         25,
+		ProbeConcurrency:           50,
+		LoadHelperFileFunction:     types.DefaultOptions().LoadHelperFileFunction,
+		// DialerKeepAlive:            time.Duration(0),
+		// DASTServerAddress:          "localhost:9055",
+	}
+
+	runner.ParseOptions(options)
+
+	// Disable logging to reduce benchmark noise.
+	gologger.DefaultLogger.SetMaxLevel(levels.LevelSilent)
+
+	nucleiRunner, err := runner.New(options)
+	if err != nil {
+		b.Fatalf("failed to create runner: %s", err)
+	}
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		if err := nucleiRunner.RunEnumeration(); err != nil {
+			b.Fatalf("RunEnumeration failed: %s", err)
+		}
+	}
+}
diff --git a/pkg/output/output.go b/pkg/output/output.go
index 2ccbd2c1d..3c461891f 100644
--- a/pkg/output/output.go
+++ b/pkg/output/output.go
@@ -279,6 +279,11 @@ func NewStandardWriter(options *types.Options) (*StandardWriter, error) {
 		omitTemplate:     options.OmitTemplate,
 		KeysToRedact:     options.Redact,
 	}
+
+	if v := os.Getenv("DISABLE_STDOUT"); v == "true" || v == "1" {
+		writer.DisableStdout = true
+	}
+
 	return writer, nil
 }