diff --git a/.devenv/docker/clickhouse/compose.yaml b/.devenv/docker/clickhouse/compose.yaml index a230ea824e38..a1432c0b94d1 100644 --- a/.devenv/docker/clickhouse/compose.yaml +++ b/.devenv/docker/clickhouse/compose.yaml @@ -1,6 +1,6 @@ services: clickhouse: - image: clickhouse/clickhouse-server:24.1.2-alpine + image: clickhouse/clickhouse-server:25.5.6 container_name: clickhouse volumes: - ${PWD}/fs/etc/clickhouse-server/config.d/config.xml:/etc/clickhouse-server/config.d/config.xml @@ -23,6 +23,8 @@ services: retries: 3 depends_on: - zookeeper + environment: + - CLICKHOUSE_SKIP_USER_SETUP=1 zookeeper: image: signoz/zookeeper:3.7.1 container_name: zookeeper diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index bf486037afcf..b525276f8146 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -5,6 +5,45 @@ /frontend/ @SigNoz/frontend @YounixM /frontend/src/container/MetricsApplication @srikanthccv /frontend/src/container/NewWidget/RightContainer/types.ts @srikanthccv + +# Dashboard, Alert, Metrics, Service Map, Services +/frontend/src/container/ListOfDashboard/ @srikanthccv +/frontend/src/container/NewDashboard/ @srikanthccv +/frontend/src/pages/DashboardsListPage/ @srikanthccv +/frontend/src/pages/DashboardWidget/ @srikanthccv +/frontend/src/pages/NewDashboard/ @srikanthccv +/frontend/src/providers/Dashboard/ @srikanthccv + +# Alerts +/frontend/src/container/AlertHistory/ @srikanthccv +/frontend/src/container/AllAlertChannels/ @srikanthccv +/frontend/src/container/AnomalyAlertEvaluationView/ @srikanthccv +/frontend/src/container/CreateAlertChannels/ @srikanthccv +/frontend/src/container/CreateAlertRule/ @srikanthccv +/frontend/src/container/EditAlertChannels/ @srikanthccv +/frontend/src/container/FormAlertChannels/ @srikanthccv +/frontend/src/container/FormAlertRules/ @srikanthccv +/frontend/src/container/ListAlertRules/ @srikanthccv +/frontend/src/container/TriggeredAlerts/ @srikanthccv +/frontend/src/pages/AlertChannelCreate/ @srikanthccv +/frontend/src/pages/AlertDetails/ @srikanthccv +/frontend/src/pages/AlertHistory/ @srikanthccv +/frontend/src/pages/AlertList/ @srikanthccv +/frontend/src/pages/CreateAlert/ @srikanthccv +/frontend/src/providers/Alert.tsx @srikanthccv + +# Metrics +/frontend/src/container/MetricsExplorer/ @srikanthccv +/frontend/src/pages/MetricsApplication/ @srikanthccv +/frontend/src/pages/MetricsExplorer/ @srikanthccv + +# Services and Service Map +/frontend/src/container/ServiceApplication/ @srikanthccv +/frontend/src/container/ServiceTable/ @srikanthccv +/frontend/src/pages/Services/ @srikanthccv +/frontend/src/pages/ServiceTopLevelOperations/ @srikanthccv +/frontend/src/container/Home/Services/ @srikanthccv + /deploy/ @SigNoz/devops .github @SigNoz/devops diff --git a/deploy/docker-swarm/docker-compose.ha.yaml b/deploy/docker-swarm/docker-compose.ha.yaml index c1fd89b807ea..791fb4a3fcc1 100644 --- a/deploy/docker-swarm/docker-compose.ha.yaml +++ b/deploy/docker-swarm/docker-compose.ha.yaml @@ -11,7 +11,7 @@ x-common: &common max-file: "3" x-clickhouse-defaults: &clickhouse-defaults !!merge <<: *common - image: clickhouse/clickhouse-server:24.1.2-alpine + image: clickhouse/clickhouse-server:25.5.6 tty: true deploy: labels: @@ -37,6 +37,8 @@ x-clickhouse-defaults: &clickhouse-defaults nofile: soft: 262144 hard: 262144 + environment: + - CLICKHOUSE_SKIP_USER_SETUP=1 x-zookeeper-defaults: &zookeeper-defaults !!merge <<: *common image: signoz/zookeeper:3.7.1 @@ -63,7 +65,7 @@ x-db-depend: &db-depend services: init-clickhouse: !!merge <<: *common - image: clickhouse/clickhouse-server:24.1.2-alpine + image: clickhouse/clickhouse-server:25.5.6 command: - bash - -c diff --git a/deploy/docker-swarm/docker-compose.yaml b/deploy/docker-swarm/docker-compose.yaml index 60e38d68f757..890ae226595f 100644 --- a/deploy/docker-swarm/docker-compose.yaml +++ b/deploy/docker-swarm/docker-compose.yaml @@ -11,7 +11,7 @@ x-common: &common max-file: "3" x-clickhouse-defaults: &clickhouse-defaults !!merge <<: *common - image: clickhouse/clickhouse-server:24.1.2-alpine + image: clickhouse/clickhouse-server:25.5.6 tty: true deploy: labels: @@ -36,6 +36,8 @@ x-clickhouse-defaults: &clickhouse-defaults nofile: soft: 262144 hard: 262144 + environment: + - CLICKHOUSE_SKIP_USER_SETUP=1 x-zookeeper-defaults: &zookeeper-defaults !!merge <<: *common image: signoz/zookeeper:3.7.1 @@ -60,7 +62,7 @@ x-db-depend: &db-depend services: init-clickhouse: !!merge <<: *common - image: clickhouse/clickhouse-server:24.1.2-alpine + image: clickhouse/clickhouse-server:25.5.6 command: - bash - -c diff --git a/deploy/docker/docker-compose.ha.yaml b/deploy/docker/docker-compose.ha.yaml index 1a9799492742..11e9bebcddce 100644 --- a/deploy/docker/docker-compose.ha.yaml +++ b/deploy/docker/docker-compose.ha.yaml @@ -10,7 +10,7 @@ x-common: &common x-clickhouse-defaults: &clickhouse-defaults !!merge <<: *common # addding non LTS version due to this fix https://github.com/ClickHouse/ClickHouse/commit/32caf8716352f45c1b617274c7508c86b7d1afab - image: clickhouse/clickhouse-server:24.1.2-alpine + image: clickhouse/clickhouse-server:25.5.6 tty: true labels: signoz.io/scrape: "true" @@ -40,6 +40,8 @@ x-clickhouse-defaults: &clickhouse-defaults nofile: soft: 262144 hard: 262144 + environment: + - CLICKHOUSE_SKIP_USER_SETUP=1 x-zookeeper-defaults: &zookeeper-defaults !!merge <<: *common image: signoz/zookeeper:3.7.1 @@ -65,7 +67,7 @@ x-db-depend: &db-depend services: init-clickhouse: !!merge <<: *common - image: clickhouse/clickhouse-server:24.1.2-alpine + image: clickhouse/clickhouse-server:25.5.6 container_name: signoz-init-clickhouse command: - bash diff --git a/deploy/docker/docker-compose.yaml b/deploy/docker/docker-compose.yaml index cde2414d03d7..c6dfce321ec2 100644 --- a/deploy/docker/docker-compose.yaml +++ b/deploy/docker/docker-compose.yaml @@ -9,8 +9,7 @@ x-common: &common max-file: "3" x-clickhouse-defaults: &clickhouse-defaults !!merge <<: *common - # addding non LTS version due to this fix https://github.com/ClickHouse/ClickHouse/commit/32caf8716352f45c1b617274c7508c86b7d1afab - image: clickhouse/clickhouse-server:24.1.2-alpine + image: clickhouse/clickhouse-server:25.5.6 tty: true labels: signoz.io/scrape: "true" @@ -36,6 +35,8 @@ x-clickhouse-defaults: &clickhouse-defaults nofile: soft: 262144 hard: 262144 + environment: + - CLICKHOUSE_SKIP_USER_SETUP=1 x-zookeeper-defaults: &zookeeper-defaults !!merge <<: *common image: signoz/zookeeper:3.7.1 @@ -61,7 +62,7 @@ x-db-depend: &db-depend services: init-clickhouse: !!merge <<: *common - image: clickhouse/clickhouse-server:24.1.2-alpine + image: clickhouse/clickhouse-server:25.5.6 container_name: signoz-init-clickhouse command: - bash diff --git a/go.mod b/go.mod index 3e6b35309e97..4fa125c5079c 100644 --- a/go.mod +++ b/go.mod @@ -300,13 +300,13 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.36.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.36.0 // indirect - go.opentelemetry.io/otel/exporters/prometheus v0.58.0 // indirect + go.opentelemetry.io/otel/exporters/prometheus v0.58.0 go.opentelemetry.io/otel/exporters/stdout/stdoutlog v0.12.2 // indirect go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.36.0 // indirect go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.36.0 // indirect go.opentelemetry.io/otel/log v0.12.2 // indirect go.opentelemetry.io/otel/sdk/log v0.12.2 // indirect - go.opentelemetry.io/otel/sdk/metric v1.36.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.36.0 go.opentelemetry.io/proto/otlp v1.6.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/goleak v1.3.0 // indirect diff --git a/pkg/alertmanager/alertmanagerserver/server.go b/pkg/alertmanager/alertmanagerserver/server.go index f53be783db97..55662340f528 100644 --- a/pkg/alertmanager/alertmanagerserver/server.go +++ b/pkg/alertmanager/alertmanagerserver/server.go @@ -73,8 +73,10 @@ func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registere stateStore: stateStore, stopc: make(chan struct{}), } + signozRegisterer := prometheus.WrapRegistererWithPrefix("signoz_", registry) + signozRegisterer = prometheus.WrapRegistererWith(prometheus.Labels{"org_id": server.orgID}, signozRegisterer) // initialize marker - server.marker = alertmanagertypes.NewMarker(server.registry) + server.marker = alertmanagertypes.NewMarker(signozRegisterer) // get silences for initial state state, err := server.stateStore.Get(ctx, server.orgID) @@ -97,7 +99,7 @@ func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registere MaxSilences: func() int { return srvConfig.Silences.Max }, MaxSilenceSizeBytes: func() int { return srvConfig.Silences.MaxSizeBytes }, }, - Metrics: server.registry, + Metrics: signozRegisterer, Logger: server.logger, }) if err != nil { @@ -116,7 +118,7 @@ func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registere server.nflog, err = nflog.New(nflog.Options{ SnapshotReader: strings.NewReader(nflogSnapshot), Retention: server.srvConfig.NFLog.Retention, - Metrics: server.registry, + Metrics: signozRegisterer, Logger: server.logger, }) if err != nil { @@ -181,13 +183,13 @@ func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registere }) }() - server.alerts, err = mem.NewAlerts(ctx, server.marker, server.srvConfig.Alerts.GCInterval, nil, server.logger, server.registry) + server.alerts, err = mem.NewAlerts(ctx, server.marker, server.srvConfig.Alerts.GCInterval, nil, server.logger, signozRegisterer) if err != nil { return nil, err } - server.pipelineBuilder = notify.NewPipelineBuilder(server.registry, featurecontrol.NoopFlags{}) - server.dispatcherMetrics = dispatch.NewDispatcherMetrics(false, server.registry) + server.pipelineBuilder = notify.NewPipelineBuilder(signozRegisterer, featurecontrol.NoopFlags{}) + server.dispatcherMetrics = dispatch.NewDispatcherMetrics(false, signozRegisterer) return server, nil } diff --git a/pkg/instrumentation/metric.go b/pkg/instrumentation/metric.go new file mode 100644 index 000000000000..63bb0b302cf6 --- /dev/null +++ b/pkg/instrumentation/metric.go @@ -0,0 +1,160 @@ +package instrumentation + +import ( + "context" + "errors" + "fmt" + "net" + "net/http" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + contribsdkconfig "go.opentelemetry.io/contrib/config" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + otelprom "go.opentelemetry.io/otel/exporters/prometheus" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/metric/noop" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" +) + +// readerWithServer wraps a metric reader with an HTTP server for proper shutdown +// This mirrors the upstream contrib/config implementation +type readerWithServer struct { + sdkmetric.Reader + server *http.Server +} + +func (rws readerWithServer) Shutdown(ctx context.Context) error { + return errors.Join( + rws.Reader.Shutdown(ctx), + rws.server.Shutdown(ctx), + ) +} + +// prometheusReaderWithCustomRegistry creates a Prometheus metric reader using a custom registry +// This is based on the upstream contrib/config implementation but allows passing a custom registry +func prometheusReaderWithCustomRegistry(ctx context.Context, prometheusConfig *contribsdkconfig.Prometheus, customRegistry *prometheus.Registry) (sdkmetric.Reader, error) { + var opts []otelprom.Option + if prometheusConfig.Host == nil { + return nil, fmt.Errorf("host must be specified") + } + if prometheusConfig.Port == nil { + return nil, fmt.Errorf("port must be specified") + } + if prometheusConfig.WithoutScopeInfo != nil && *prometheusConfig.WithoutScopeInfo { + opts = append(opts, otelprom.WithoutScopeInfo()) + } + if prometheusConfig.WithoutTypeSuffix != nil && *prometheusConfig.WithoutTypeSuffix { + opts = append(opts, otelprom.WithoutCounterSuffixes()) + } + if prometheusConfig.WithoutUnits != nil && *prometheusConfig.WithoutUnits { + opts = append(opts, otelprom.WithoutUnits()) + } + if prometheusConfig.WithResourceConstantLabels != nil { + if prometheusConfig.WithResourceConstantLabels.Included != nil { + var keys []attribute.Key + for _, val := range prometheusConfig.WithResourceConstantLabels.Included { + keys = append(keys, attribute.Key(val)) + } + opts = append(opts, otelprom.WithResourceAsConstantLabels(attribute.NewAllowKeysFilter(keys...))) + } + if prometheusConfig.WithResourceConstantLabels.Excluded != nil { + var keys []attribute.Key + for _, val := range prometheusConfig.WithResourceConstantLabels.Excluded { + keys = append(keys, attribute.Key(val)) + } + opts = append(opts, otelprom.WithResourceAsConstantLabels(attribute.NewDenyKeysFilter(keys...))) + } + } + + // Use custom registry instead of creating a new one + opts = append(opts, otelprom.WithRegisterer(customRegistry)) + + mux := http.NewServeMux() + mux.Handle("/metrics", promhttp.HandlerFor(customRegistry, promhttp.HandlerOpts{Registry: customRegistry})) + server := http.Server{ + // Timeouts are necessary to make a server resilient to attacks, but ListenAndServe doesn't set any. + // We use values from this example: https://blog.cloudflare.com/exposing-go-on-the-internet/#:~:text=There%20are%20three%20main%20timeouts + ReadTimeout: 5 * time.Second, + WriteTimeout: 10 * time.Second, + IdleTimeout: 120 * time.Second, + Handler: mux, + } + addr := fmt.Sprintf("%s:%d", *prometheusConfig.Host, *prometheusConfig.Port) + + reader, err := otelprom.New(opts...) + if err != nil { + return nil, fmt.Errorf("error creating otel prometheus exporter: %w", err) + } + lis, err := net.Listen("tcp", addr) + if err != nil { + return nil, errors.Join( + fmt.Errorf("binding address %s for Prometheus exporter: %w", addr, err), + reader.Shutdown(ctx), + ) + } + + go func() { + if err := server.Serve(lis); err != nil && !errors.Is(err, http.ErrServerClosed) { + otel.Handle(fmt.Errorf("the Prometheus HTTP server exited unexpectedly: %w", err)) + } + }() + + return readerWithServer{reader, &server}, nil +} + +type shutdownFunc func(context.Context) error + +// noopShutdown is a no-op shutdown function +func noopShutdown(context.Context) error { return nil } + +// meterProviderWithCustomRegistry creates a meter provider using contrib config approach +// but with custom Prometheus registry injection +func meterProviderWithCustomRegistry(ctx context.Context, meterProviderConfig *contribsdkconfig.MeterProvider, res *resource.Resource, customRegistry *prometheus.Registry) (metric.MeterProvider, shutdownFunc, error) { + if meterProviderConfig == nil { + return noop.NewMeterProvider(), noopShutdown, nil + } + opts := []sdkmetric.Option{ + sdkmetric.WithResource(res), + } + + var errs []error + for _, reader := range meterProviderConfig.Readers { + r, err := metricReaderWithCustomRegistry(ctx, reader, customRegistry) + if err == nil { + opts = append(opts, sdkmetric.WithReader(r)) + } else { + errs = append(errs, err) + } + } + + if len(errs) > 0 { + return noop.NewMeterProvider(), noopShutdown, errors.Join(errs...) + } + + mp := sdkmetric.NewMeterProvider(opts...) + return mp, mp.Shutdown, nil +} + +// metricReaderWithCustomRegistry creates metric readers with custom Prometheus registry support +func metricReaderWithCustomRegistry(ctx context.Context, r contribsdkconfig.MetricReader, customRegistry *prometheus.Registry) (sdkmetric.Reader, error) { + if r.Periodic != nil && r.Pull != nil { + return nil, errors.New("must not specify multiple metric reader type") + } + + if r.Pull != nil { + return pullReaderWithCustomRegistry(ctx, r.Pull.Exporter, customRegistry) + } + return nil, errors.New("no valid metric reader") +} + +// pullReaderWithCustomRegistry creates pull readers with custom Prometheus registry support +func pullReaderWithCustomRegistry(ctx context.Context, exporter contribsdkconfig.MetricExporter, customRegistry *prometheus.Registry) (sdkmetric.Reader, error) { + if exporter.Prometheus != nil { + return prometheusReaderWithCustomRegistry(ctx, exporter.Prometheus, customRegistry) + } + return nil, errors.New("no valid metric exporter") +} diff --git a/pkg/instrumentation/sdk.go b/pkg/instrumentation/sdk.go index 89c2fb03055a..89e03fb27a8c 100644 --- a/pkg/instrumentation/sdk.go +++ b/pkg/instrumentation/sdk.go @@ -22,6 +22,7 @@ var _ Instrumentation = (*SDK)(nil) type SDK struct { logger *slog.Logger sdk contribsdkconfig.SDK + meterProvider sdkmetric.MeterProvider prometheusRegistry *prometheus.Registry startCh chan struct{} } @@ -59,6 +60,9 @@ func New(ctx context.Context, cfg Config, build version.Build, serviceName strin SchemaUrl: &sch, } + prometheusRegistry := prometheus.NewRegistry() + prometheusRegistry.MustRegister(collectors.NewBuildInfoCollector()) + var tracerProvider *contribsdkconfig.TracerProvider if cfg.Traces.Enabled { tracerProvider = &contribsdkconfig.TracerProvider{ @@ -69,20 +73,26 @@ func New(ctx context.Context, cfg Config, build version.Build, serviceName strin } } - var meterProvider *contribsdkconfig.MeterProvider + // Use contrib config approach but with custom Prometheus registry + var meterProvider sdkmetric.MeterProvider if cfg.Metrics.Enabled { - meterProvider = &contribsdkconfig.MeterProvider{ + meterProviderConfig := &contribsdkconfig.MeterProvider{ Readers: []contribsdkconfig.MetricReader{ {Pull: &cfg.Metrics.Readers.Pull}, }, } + + mp, _, err := meterProviderWithCustomRegistry(ctx, meterProviderConfig, resource, prometheusRegistry) + if err != nil { + return nil, err + } + meterProvider = mp } sdk, err := contribsdkconfig.NewSDK( contribsdkconfig.WithContext(ctx), contribsdkconfig.WithOpenTelemetryConfiguration(contribsdkconfig.OpenTelemetryConfiguration{ TracerProvider: tracerProvider, - MeterProvider: meterProvider, Resource: &configResource, }), ) @@ -90,11 +100,9 @@ func New(ctx context.Context, cfg Config, build version.Build, serviceName strin return nil, err } - prometheusRegistry := prometheus.NewRegistry() - prometheusRegistry.MustRegister(collectors.NewBuildInfoCollector()) - return &SDK{ sdk: sdk, + meterProvider: meterProvider, prometheusRegistry: prometheusRegistry, logger: NewLogger(cfg), startCh: make(chan struct{}),