chore: notification routing | added notificaiton routing via expression based routes (#9195)

* chore: added custom distpatcher

* feat(notification-grouping): added notification grouping

* feat(notification-grouping): addded integration test dependency

* feat(notification-grouping): linting and test cases

* feat(notification-grouping): linting and test cases

* feat(notification-grouping): linting and test cases

* feat(notification-grouping): addded integration test dependency

* feat(notification-grouping): debug log lines

* feat(notification-grouping): debug log lines

* feat(notification-grouping): debug log lines

* feat(notification-grouping): addded integration test dependency

* feat(notification-grouping): addded integration test dependency

* feat(notification-grouping): addded integration test dependency

* feat(notification-grouping): added structure changes

* feat(notification-grouping): added structure changes

* feat(notification-routing): added notification routing

* chore(notification-grouping): added notificaiton grouping

* Update pkg/alertmanager/nfmanager/rulebasednotification/provider.go

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>

* chore(notification-grouping): added renotification interval

* fix(notification-grouping): added fix for renotification

* chore(notificaiton-grouping): added no data renotify

* chore(notificaiton-grouping): added no data renotify

* chore(notificaiton-grouping): added no data renotify

* chore(notification-grouping): added no data renotify interval

* chore(notification-grouping): removed errors package from dispatcher

* chore(notification-grouping): removed errors package from dispatcher

* chore(notification-grouping): removed unwanted tests

* chore(notification-grouping): removed unwanted pkg name

* chore(notification-grouping): added delete notification setting

* chore(notification-grouping): added delete notification setting

* Update pkg/alertmanager/nfmanager/nfmanagertest/provider.go

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>

* chore(notification-grouping): removed nfmanager config| notification settings in postable rule

* chore(notification-grouping): removed nfmanager config| notification settings in postable rule

* chore(notification-grouping): added test for dispatcher

* chore(notification-grouping): added test for dispatcher

* chore(notification-grouping): go linting errors

* chore(notification-grouping): added test cases for aggGroupPerRoute

* chore(notification-grouping): added test cases for aggGroupPerRoute

* chore(notification-grouping): corrected get notification config logic

* Update pkg/alertmanager/nfmanager/rulebasednotification/provider_test.go

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>

* chore(notification-routing): added notification routing policies

* feat(notification-routing): added test cases for dispatcher

* chore(notification-routing): added notification routing policies

* chore(notification-routing): added notification routing policies

* Apply suggestions from code review

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>

* chore(notification-routing): added notification routing policies

* chore(notification-routing): added notification routing policies

* Update pkg/alertmanager/alertmanagerserver/distpatcher_test.go

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>

* chore(notification-routing): sorted imports

* chore(notification-routing): minor edit |pr resolve comments

* chore(notification-grouping): corrected dispatcher test cases

* chore(notification-routing): added notification routing policies

* chore(notification-routing): corrected race condition in test

* chore: resolved pr comments

* chore: passing threshold value to tempalte

* chore: completed delete rule functionality

* chore: added grouping disabled functionality

* chore: added grouping disabled functionality

* chore(notification-routing): resolved pr comments

* chore(notification-routing): resolved pr comments

* chore(notification-routing): resolved pr comments

* chore(notification-routing): sorted imports

* chore(notification-routing): fix linting errors

* chore(notification-routing): removed enabled flags

* fix: test rule multiple threhsold (#9224)

* chore: corrected linting errors

* chore: corrected linting errors

* chore: corrected linting errors

* chore: corrected linting errors

* chore: corrected migration errors

* chore: corrected migration errors

* chore: corrected migration errors

* chore: corrected migration errors

* Update pkg/sqlmigration/049_add_route_policy.go

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>

* chore: added org_is as foreign key

* chore: resolved pr comments

* chore: removed route store unused

---------

Co-authored-by: Srikanth Chekuri <srikanth.chekuri92@gmail.com>
Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>
This commit is contained in:
aniketio-ctrl 2025-10-03 19:47:15 +05:30 committed by GitHub
parent d3be2632b6
commit f9a70a3a69
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
40 changed files with 3842 additions and 894 deletions

View File

@ -251,7 +251,7 @@ func (r *AnomalyRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID, t
continue
}
}
results, err := r.Threshold.ShouldAlert(*series)
results, err := r.Threshold.ShouldAlert(*series, r.Unit())
if err != nil {
return nil, err
}
@ -301,7 +301,7 @@ func (r *AnomalyRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID,
continue
}
}
results, err := r.Threshold.ShouldAlert(*series)
results, err := r.Threshold.ShouldAlert(*series, r.Unit())
if err != nil {
return nil, err
}
@ -336,14 +336,19 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro
resultFPs := map[uint64]struct{}{}
var alerts = make(map[uint64]*ruletypes.Alert, len(res))
ruleReceivers := r.Threshold.GetRuleReceivers()
ruleReceiverMap := make(map[string][]string)
for _, value := range ruleReceivers {
ruleReceiverMap[value.Name] = value.Channels
}
for _, smpl := range res {
l := make(map[string]string, len(smpl.Metric))
for _, lbl := range smpl.Metric {
l[lbl.Name] = lbl.Value
}
value := valueFormatter.Format(smpl.V, r.Unit())
threshold := valueFormatter.Format(r.TargetVal(), r.Unit())
threshold := valueFormatter.Format(smpl.Target, smpl.TargetUnit)
r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold)
tmplData := ruletypes.AlertTemplateData(l, value, threshold)
@ -408,13 +413,12 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro
State: model.StatePending,
Value: smpl.V,
GeneratorURL: r.GeneratorURL(),
Receivers: r.PreferredChannels(),
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
Missing: smpl.IsMissing,
}
}
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
// alerts[h] is ready, add or update active list now
for h, a := range alerts {
// Check whether we already have alerting state for the identifying label set.
@ -423,7 +427,9 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro
alert.Value = a.Value
alert.Annotations = a.Annotations
alert.Receivers = r.PreferredChannels()
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
alert.Receivers = ruleReceiverMap[v]
}
continue
}

View File

@ -126,7 +126,6 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.Ap
if parsedRule.RuleType == ruletypes.RuleTypeThreshold {
// add special labels for test alerts
parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target)
parsedRule.Labels[labels.RuleSourceLabel] = ""
parsedRule.Labels[labels.AlertRuleIdLabel] = ""

2
go.mod
View File

@ -127,7 +127,7 @@ require (
github.com/elastic/lunes v0.1.0 // indirect
github.com/emirpasic/gods v1.18.1 // indirect
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
github.com/expr-lang/expr v1.17.5 // indirect
github.com/expr-lang/expr v1.17.5
github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.9.0 // indirect

View File

@ -3,6 +3,8 @@ package alertmanager
import (
"context"
amConfig "github.com/prometheus/alertmanager/config"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/statsreporter"
@ -26,7 +28,7 @@ type Alertmanager interface {
TestReceiver(context.Context, string, alertmanagertypes.Receiver) error
// TestAlert sends an alert to a list of receivers.
TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error
TestAlert(ctx context.Context, orgID string, ruleID string, receiversMap map[*alertmanagertypes.PostableAlert][]string) error
// ListChannels lists all channels for the organization.
ListChannels(context.Context, string) ([]*alertmanagertypes.Channel, error)
@ -59,6 +61,19 @@ type Alertmanager interface {
DeleteNotificationConfig(ctx context.Context, orgID valuer.UUID, ruleId string) error
// Notification Policy CRUD
CreateRoutePolicy(ctx context.Context, route *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error)
CreateRoutePolicies(ctx context.Context, routeRequests []*alertmanagertypes.PostableRoutePolicy) ([]*alertmanagertypes.GettableRoutePolicy, error)
GetRoutePolicyByID(ctx context.Context, routeID string) (*alertmanagertypes.GettableRoutePolicy, error)
GetAllRoutePolicies(ctx context.Context) ([]*alertmanagertypes.GettableRoutePolicy, error)
UpdateRoutePolicyByID(ctx context.Context, routeID string, route *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error)
DeleteRoutePolicyByID(ctx context.Context, routeID string) error
DeleteAllRoutePoliciesByRuleId(ctx context.Context, ruleId string) error
UpdateAllRoutePoliciesByRuleId(ctx context.Context, ruleId string, routes []*alertmanagertypes.PostableRoutePolicy) error
CreateInhibitRules(ctx context.Context, orgID valuer.UUID, rules []amConfig.InhibitRule) error
DeleteAllInhibitRulesByRuleId(ctx context.Context, orgID valuer.UUID, ruleId string) error
// Collects stats for the organization.
statsreporter.StatsCollector
}

View File

@ -10,19 +10,17 @@ import (
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/prometheus/alertmanager/dispatch"
"github.com/prometheus/alertmanager/notify"
"github.com/prometheus/alertmanager/pkg/labels"
"github.com/prometheus/alertmanager/provider"
"github.com/prometheus/alertmanager/store"
"github.com/prometheus/alertmanager/types"
"github.com/prometheus/common/model"
)
const (
noDataLabel = model.LabelName("nodata")
)
// Dispatcher sorts incoming alerts into aggregation groups and
// assigns the correct notifiers to each.
type Dispatcher struct {
@ -46,6 +44,7 @@ type Dispatcher struct {
logger *slog.Logger
notificationManager nfmanager.NotificationManager
orgID string
receiverRoutes map[string]*dispatch.Route
}
// We use the upstream Limits interface from Prometheus
@ -90,6 +89,7 @@ func (d *Dispatcher) Run() {
d.mtx.Lock()
d.aggrGroupsPerRoute = map[*dispatch.Route]map[model.Fingerprint]*aggrGroup{}
d.receiverRoutes = map[string]*dispatch.Route{}
d.aggrGroupsNum = 0
d.metrics.aggrGroups.Set(0)
d.ctx, d.cancel = context.WithCancel(context.Background())
@ -125,8 +125,14 @@ func (d *Dispatcher) run(it provider.AlertIterator) {
}
now := time.Now()
for _, r := range d.route.Match(alert.Labels) {
d.processAlert(alert, r)
channels, err := d.notificationManager.Match(d.ctx, d.orgID, getRuleIDFromAlert(alert), alert.Labels)
if err != nil {
d.logger.ErrorContext(d.ctx, "Error on alert match", "err", err)
continue
}
for _, channel := range channels {
route := d.getOrCreateRoute(channel)
d.processAlert(alert, route)
}
d.metrics.processingDuration.Observe(time.Since(now).Seconds())
@ -266,6 +272,7 @@ type notifyFunc func(context.Context, ...*types.Alert) bool
// processAlert determines in which aggregation group the alert falls
// and inserts it.
// no data alert will only have ruleId and no data label
func (d *Dispatcher) processAlert(alert *types.Alert, route *dispatch.Route) {
ruleId := getRuleIDFromAlert(alert)
config, err := d.notificationManager.GetNotificationConfig(d.orgID, ruleId)
@ -273,8 +280,14 @@ func (d *Dispatcher) processAlert(alert *types.Alert, route *dispatch.Route) {
d.logger.ErrorContext(d.ctx, "error getting alert notification config", "rule_id", ruleId, "error", err)
return
}
renotifyInterval := config.Renotify.RenotifyInterval
groupLabels := getGroupLabels(alert, config.NotificationGroup)
groupLabels := getGroupLabels(alert, config.NotificationGroup, config.GroupByAll)
if alertmanagertypes.NoDataAlert(alert) {
renotifyInterval = config.Renotify.NoDataInterval
groupLabels[alertmanagertypes.NoDataLabel] = alert.Labels[alertmanagertypes.NoDataLabel] //to create new group key for no data alerts
}
fp := groupLabels.Fingerprint()
@ -299,12 +312,6 @@ func (d *Dispatcher) processAlert(alert *types.Alert, route *dispatch.Route) {
d.logger.ErrorContext(d.ctx, "Too many aggregation groups, cannot create new group for alert", "groups", d.aggrGroupsNum, "limit", limit, "alert", alert.Name())
return
}
renotifyInterval := config.Renotify.RenotifyInterval
if noDataAlert(alert) {
renotifyInterval = config.Renotify.NoDataInterval
groupLabels[noDataLabel] = alert.Labels[noDataLabel]
}
ag = newAggrGroup(d.ctx, groupLabels, route, d.timeout, d.logger, renotifyInterval)
@ -543,21 +550,35 @@ func deepCopyRouteOpts(opts dispatch.RouteOpts, renotify time.Duration) dispatch
return newOpts
}
func getGroupLabels(alert *types.Alert, groups map[model.LabelName]struct{}) model.LabelSet {
func getGroupLabels(alert *types.Alert, groups map[model.LabelName]struct{}, groupByAll bool) model.LabelSet {
groupLabels := model.LabelSet{}
for ln, lv := range alert.Labels {
if _, ok := groups[ln]; ok {
if _, ok := groups[ln]; ok || groupByAll {
groupLabels[ln] = lv
}
}
return groupLabels
}
func noDataAlert(alert *types.Alert) bool {
if _, ok := alert.Labels[noDataLabel]; ok {
return true
} else {
return false
func (d *Dispatcher) getOrCreateRoute(receiver string) *dispatch.Route {
d.mtx.Lock()
defer d.mtx.Unlock()
if route, exists := d.receiverRoutes[receiver]; exists {
return route
}
route := &dispatch.Route{
RouteOpts: dispatch.RouteOpts{
Receiver: receiver,
GroupWait: 30 * time.Second,
GroupInterval: 5 * time.Minute,
GroupByAll: false,
},
Matchers: labels.Matchers{{
Name: "__receiver__",
Value: receiver,
Type: labels.MatchEqual,
}},
}
d.receiverRoutes[receiver] = route
return route
}

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,9 @@ package alertmanagerserver
import (
"context"
"fmt"
"github.com/prometheus/alertmanager/types"
"golang.org/x/sync/errgroup"
"log/slog"
"strings"
"sync"
@ -321,39 +324,104 @@ func (server *Server) SetConfig(ctx context.Context, alertmanagerConfig *alertma
}
func (server *Server) TestReceiver(ctx context.Context, receiver alertmanagertypes.Receiver) error {
return alertmanagertypes.TestReceiver(ctx, receiver, alertmanagernotify.NewReceiverIntegrations, server.alertmanagerConfig, server.tmpl, server.logger, alertmanagertypes.NewTestAlert(receiver, time.Now(), time.Now()))
testAlert := alertmanagertypes.NewTestAlert(receiver, time.Now(), time.Now())
return alertmanagertypes.TestReceiver(ctx, receiver, alertmanagernotify.NewReceiverIntegrations, server.alertmanagerConfig, server.tmpl, server.logger, testAlert.Labels, testAlert)
}
func (server *Server) TestAlert(ctx context.Context, postableAlert *alertmanagertypes.PostableAlert, receivers []string) error {
alerts, err := alertmanagertypes.NewAlertsFromPostableAlerts(alertmanagertypes.PostableAlerts{postableAlert}, time.Duration(server.srvConfig.Global.ResolveTimeout), time.Now())
func (server *Server) TestAlert(ctx context.Context, receiversMap map[*alertmanagertypes.PostableAlert][]string, config *alertmanagertypes.NotificationConfig) error {
if len(receiversMap) == 0 {
return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput,
"expected at least 1 alert, got 0")
}
postableAlerts := make(alertmanagertypes.PostableAlerts, 0, len(receiversMap))
for alert := range receiversMap {
postableAlerts = append(postableAlerts, alert)
}
alerts, err := alertmanagertypes.NewAlertsFromPostableAlerts(
postableAlerts,
time.Duration(server.srvConfig.Global.ResolveTimeout),
time.Now(),
)
if err != nil {
return errors.Join(err...)
return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput,
"failed to construct alerts from postable alerts: %v", err)
}
if len(alerts) != 1 {
return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "expected 1 alert, got %d", len(alerts))
type alertGroup struct {
groupLabels model.LabelSet
alerts []*types.Alert
receivers map[string]struct{}
}
ch := make(chan error, len(receivers))
for _, receiverName := range receivers {
go func(receiverName string) {
groupMap := make(map[model.Fingerprint]*alertGroup)
for i, alert := range alerts {
labels := getGroupLabels(alert, config.NotificationGroup, config.GroupByAll)
fp := labels.Fingerprint()
postableAlert := postableAlerts[i]
alertReceivers := receiversMap[postableAlert]
if group, exists := groupMap[fp]; exists {
group.alerts = append(group.alerts, alert)
for _, r := range alertReceivers {
group.receivers[r] = struct{}{}
}
} else {
receiverSet := make(map[string]struct{})
for _, r := range alertReceivers {
receiverSet[r] = struct{}{}
}
groupMap[fp] = &alertGroup{
groupLabels: labels,
alerts: []*types.Alert{alert},
receivers: receiverSet,
}
}
}
var mu sync.Mutex
var errs []error
g, gCtx := errgroup.WithContext(ctx)
for _, group := range groupMap {
for receiverName := range group.receivers {
group := group
receiverName := receiverName
g.Go(func() error {
receiver, err := server.alertmanagerConfig.GetReceiver(receiverName)
if err != nil {
ch <- err
return
}
ch <- alertmanagertypes.TestReceiver(ctx, receiver, alertmanagernotify.NewReceiverIntegrations, server.alertmanagerConfig, server.tmpl, server.logger, alerts[0])
}(receiverName)
mu.Lock()
errs = append(errs, fmt.Errorf("failed to get receiver %q: %w", receiverName, err))
mu.Unlock()
return nil // Return nil to continue processing other goroutines
}
var errs []error
for i := 0; i < len(receivers); i++ {
if err := <-ch; err != nil {
errs = append(errs, err)
err = alertmanagertypes.TestReceiver(
gCtx,
receiver,
alertmanagernotify.NewReceiverIntegrations,
server.alertmanagerConfig,
server.tmpl,
server.logger,
group.groupLabels,
group.alerts...,
)
if err != nil {
mu.Lock()
errs = append(errs, fmt.Errorf("receiver %q test failed: %w", receiverName, err))
mu.Unlock()
}
return nil // Return nil to continue processing other goroutines
})
}
}
_ = g.Wait()
if errs != nil {
if len(errs) > 0 {
return errors.Join(errs...)
}

View File

@ -0,0 +1,223 @@
package alertmanagerserver
import (
"context"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes/alertmanagertypestest"
"github.com/prometheus/alertmanager/dispatch"
"io"
"log/slog"
"net/http"
"testing"
"time"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification"
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/go-openapi/strfmt"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
)
func TestEndToEndAlertManagerFlow(t *testing.T) {
ctx := context.Background()
providerSettings := instrumentationtest.New().ToProviderSettings()
store := nfroutingstoretest.NewMockSQLRouteStore()
store.MatchExpectationsInOrder(false)
notificationManager, err := rulebasednotification.New(ctx, providerSettings, nfmanager.Config{}, store)
require.NoError(t, err)
orgID := "test-org"
routes := []*alertmanagertypes.RoutePolicy{
{
Identifiable: types.Identifiable{
ID: valuer.GenerateUUID(),
},
Expression: `ruleId == "high-cpu-usage" && severity == "critical"`,
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Name: "high-cpu-usage",
Description: "High CPU critical alerts to webhook",
Enabled: true,
OrgID: orgID,
Channels: []string{"webhook"},
},
{
Identifiable: types.Identifiable{
ID: valuer.GenerateUUID(),
},
Expression: `ruleId == "high-cpu-usage" && severity == "warning"`,
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Name: "high-cpu-usage",
Description: "High CPU warning alerts to webhook",
Enabled: true,
OrgID: orgID,
Channels: []string{"webhook"},
},
}
store.ExpectCreateBatch(routes)
err = notificationManager.CreateRoutePolicies(ctx, orgID, routes)
require.NoError(t, err)
for range routes {
ruleID := "high-cpu-usage"
store.ExpectGetAllByName(orgID, ruleID, routes)
store.ExpectGetAllByName(orgID, ruleID, routes)
}
notifConfig := alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{
model.LabelName("cluster"): {},
model.LabelName("instance"): {},
},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 5 * time.Minute,
},
UsePolicy: false,
}
err = notificationManager.SetNotificationConfig(orgID, "high-cpu-usage", &notifConfig)
require.NoError(t, err)
srvCfg := NewConfig()
stateStore := alertmanagertypestest.NewStateStore()
registry := prometheus.NewRegistry()
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
server, err := New(context.Background(), logger, registry, srvCfg, orgID, stateStore, notificationManager)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, orgID)
require.NoError(t, err)
err = server.SetConfig(ctx, amConfig)
require.NoError(t, err)
// Create test alerts
now := time.Now()
testAlerts := []*alertmanagertypes.PostableAlert{
{
Alert: alertmanagertypes.AlertModel{
Labels: map[string]string{
"ruleId": "high-cpu-usage",
"severity": "critical",
"cluster": "prod-cluster",
"instance": "server-01",
"alertname": "HighCPUUsage",
},
},
Annotations: map[string]string{
"summary": "High CPU usage detected",
"description": "CPU usage is above 90% for 5 minutes",
},
StartsAt: strfmt.DateTime(now.Add(-5 * time.Minute)),
EndsAt: strfmt.DateTime(time.Time{}), // Active alert
},
{
Alert: alertmanagertypes.AlertModel{
Labels: map[string]string{
"ruleId": "high-cpu-usage",
"severity": "warning",
"cluster": "prod-cluster",
"instance": "server-02",
"alertname": "HighCPUUsage",
},
},
Annotations: map[string]string{
"summary": "Moderate CPU usage detected",
"description": "CPU usage is above 70% for 10 minutes",
},
StartsAt: strfmt.DateTime(now.Add(-10 * time.Minute)),
EndsAt: strfmt.DateTime(time.Time{}), // Active alert
},
{
Alert: alertmanagertypes.AlertModel{
Labels: map[string]string{
"ruleId": "high-cpu-usage",
"severity": "critical",
"cluster": "prod-cluster",
"instance": "server-03",
"alertname": "HighCPUUsage",
},
},
Annotations: map[string]string{
"summary": "High CPU usage detected on server-03",
"description": "CPU usage is above 95% for 3 minutes",
},
StartsAt: strfmt.DateTime(now.Add(-3 * time.Minute)),
EndsAt: strfmt.DateTime(time.Time{}), // Active alert
},
}
err = server.PutAlerts(ctx, testAlerts)
require.NoError(t, err)
time.Sleep(2 * time.Second)
t.Run("verify_alerts_processed", func(t *testing.T) {
dummyRequest, err := http.NewRequest(http.MethodGet, "/alerts", nil)
require.NoError(t, err)
params, err := alertmanagertypes.NewGettableAlertsParams(dummyRequest)
require.NoError(t, err)
alerts, err := server.GetAlerts(context.Background(), params)
require.NoError(t, err)
require.Len(t, alerts, 3, "Expected 3 active alerts")
for _, alert := range alerts {
require.Equal(t, "high-cpu-usage", alert.Alert.Labels["ruleId"])
require.NotEmpty(t, alert.Alert.Labels["severity"])
require.Contains(t, []string{"critical", "warning"}, alert.Alert.Labels["severity"])
require.Equal(t, "prod-cluster", alert.Alert.Labels["cluster"])
require.NotEmpty(t, alert.Alert.Labels["instance"])
}
criticalAlerts := 0
warningAlerts := 0
for _, alert := range alerts {
if alert.Alert.Labels["severity"] == "critical" {
criticalAlerts++
} else if alert.Alert.Labels["severity"] == "warning" {
warningAlerts++
}
}
require.Equal(t, 2, criticalAlerts, "Expected 2 critical alerts")
require.Equal(t, 1, warningAlerts, "Expected 1 warning alert")
})
t.Run("verify_notification_routing", func(t *testing.T) {
notifConfig, err := notificationManager.GetNotificationConfig(orgID, "high-cpu-usage")
require.NoError(t, err)
require.NotNil(t, notifConfig)
require.Equal(t, 5*time.Minute, notifConfig.Renotify.RenotifyInterval)
require.Contains(t, notifConfig.NotificationGroup, model.LabelName("ruleId"))
require.Contains(t, notifConfig.NotificationGroup, model.LabelName("cluster"))
require.Contains(t, notifConfig.NotificationGroup, model.LabelName("instance"))
})
t.Run("verify_alert_groups_and_stages", func(t *testing.T) {
time.Sleep(2 * time.Second)
alertGroups, _ := server.dispatcher.Groups(
func(route *dispatch.Route) bool { return true }, // Accept all routes
func(alert *alertmanagertypes.Alert, now time.Time) bool { return true }, // Accept all alerts
)
require.Len(t, alertGroups, 3)
require.NotEmpty(t, alertGroups, "Should have alert groups created by dispatcher")
totalAlerts := 0
for _, group := range alertGroups {
totalAlerts += len(group.Alerts)
}
require.Equal(t, 3, totalAlerts, "Should have 3 alerts total across all groups")
require.Equal(t, "{__receiver__=\"webhook\"}:{cluster=\"prod-cluster\", instance=\"server-01\", ruleId=\"high-cpu-usage\"}", alertGroups[0].GroupKey)
require.Equal(t, "{__receiver__=\"webhook\"}:{cluster=\"prod-cluster\", instance=\"server-02\", ruleId=\"high-cpu-usage\"}", alertGroups[1].GroupKey)
require.Equal(t, "{__receiver__=\"webhook\"}:{cluster=\"prod-cluster\", instance=\"server-03\", ruleId=\"high-cpu-usage\"}", alertGroups[2].GroupKey)
})
}

View File

@ -19,6 +19,7 @@ import (
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/client_golang/prometheus"
commoncfg "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -127,3 +128,189 @@ func TestServerPutAlerts(t *testing.T) {
assert.Equal(t, gettableAlerts[0].Alert.Labels["alertname"], "test-alert")
assert.NoError(t, server.Stop(context.Background()))
}
func TestServerTestAlert(t *testing.T) {
stateStore := alertmanagertypestest.NewStateStore()
srvCfg := NewConfig()
srvCfg.Route.GroupInterval = 1 * time.Second
notificationManager := nfmanagertest.NewMock()
server, err := New(context.Background(), slog.New(slog.NewTextHandler(io.Discard, nil)), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
require.NoError(t, err)
webhook1Listener, err := net.Listen("tcp", "localhost:0")
require.NoError(t, err)
webhook2Listener, err := net.Listen("tcp", "localhost:0")
require.NoError(t, err)
requestCount1 := 0
requestCount2 := 0
webhook1Server := &http.Server{
Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestCount1++
w.WriteHeader(http.StatusOK)
}),
}
webhook2Server := &http.Server{
Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestCount2++
w.WriteHeader(http.StatusOK)
}),
}
go func() {
_ = webhook1Server.Serve(webhook1Listener)
}()
go func() {
_ = webhook2Server.Serve(webhook2Listener)
}()
webhook1URL, err := url.Parse("http://" + webhook1Listener.Addr().String() + "/webhook")
require.NoError(t, err)
webhook2URL, err := url.Parse("http://" + webhook2Listener.Addr().String() + "/webhook")
require.NoError(t, err)
require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
Name: "receiver-1",
WebhookConfigs: []*config.WebhookConfig{
{
HTTPConfig: &commoncfg.HTTPClientConfig{},
URL: &config.SecretURL{URL: webhook1URL},
},
},
}))
require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
Name: "receiver-2",
WebhookConfigs: []*config.WebhookConfig{
{
HTTPConfig: &commoncfg.HTTPClientConfig{},
URL: &config.SecretURL{URL: webhook2URL},
},
},
}))
require.NoError(t, server.SetConfig(context.Background(), amConfig))
defer func() {
_ = server.Stop(context.Background())
_ = webhook1Server.Close()
_ = webhook2Server.Close()
}()
// Test with multiple alerts going to different receivers
alert1 := &alertmanagertypes.PostableAlert{
Annotations: models.LabelSet{"alertname": "test-alert-1"},
StartsAt: strfmt.DateTime(time.Now()),
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "test-alert-1", "severity": "critical"},
},
}
alert2 := &alertmanagertypes.PostableAlert{
Annotations: models.LabelSet{"alertname": "test-alert-2"},
StartsAt: strfmt.DateTime(time.Now()),
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "test-alert-2", "severity": "warning"},
},
}
receiversMap := map[*alertmanagertypes.PostableAlert][]string{
alert1: {"receiver-1", "receiver-2"},
alert2: {"receiver-2"},
}
config := &alertmanagertypes.NotificationConfig{
NotificationGroup: make(map[model.LabelName]struct{}),
GroupByAll: false,
}
err = server.TestAlert(context.Background(), receiversMap, config)
require.NoError(t, err)
time.Sleep(100 * time.Millisecond)
assert.Greater(t, requestCount1, 0, "receiver-1 should have received at least one request")
assert.Greater(t, requestCount2, 0, "receiver-2 should have received at least one request")
}
func TestServerTestAlertContinuesOnFailure(t *testing.T) {
stateStore := alertmanagertypestest.NewStateStore()
srvCfg := NewConfig()
srvCfg.Route.GroupInterval = 1 * time.Second
notificationManager := nfmanagertest.NewMock()
server, err := New(context.Background(), slog.New(slog.NewTextHandler(io.Discard, nil)), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
require.NoError(t, err)
// Create one working webhook and one failing receiver (non-existent)
webhookListener, err := net.Listen("tcp", "localhost:0")
require.NoError(t, err)
requestCount := 0
webhookServer := &http.Server{
Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestCount++
w.WriteHeader(http.StatusOK)
}),
}
go func() {
_ = webhookServer.Serve(webhookListener)
}()
webhookURL, err := url.Parse("http://" + webhookListener.Addr().String() + "/webhook")
require.NoError(t, err)
require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
Name: "working-receiver",
WebhookConfigs: []*config.WebhookConfig{
{
HTTPConfig: &commoncfg.HTTPClientConfig{},
URL: &config.SecretURL{URL: webhookURL},
},
},
}))
require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
Name: "failing-receiver",
WebhookConfigs: []*config.WebhookConfig{
{
HTTPConfig: &commoncfg.HTTPClientConfig{},
URL: &config.SecretURL{URL: &url.URL{Scheme: "http", Host: "localhost:1", Path: "/webhook"}},
},
},
}))
require.NoError(t, server.SetConfig(context.Background(), amConfig))
defer func() {
_ = server.Stop(context.Background())
_ = webhookServer.Close()
}()
alert := &alertmanagertypes.PostableAlert{
Annotations: models.LabelSet{"alertname": "test-alert"},
StartsAt: strfmt.DateTime(time.Now()),
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "test-alert"},
},
}
receiversMap := map[*alertmanagertypes.PostableAlert][]string{
alert: {"working-receiver", "failing-receiver"},
}
config := &alertmanagertypes.NotificationConfig{
NotificationGroup: make(map[model.LabelName]struct{}),
GroupByAll: false,
}
err = server.TestAlert(context.Background(), receiversMap, config)
assert.Error(t, err)
time.Sleep(100 * time.Millisecond)
assert.Greater(t, requestCount, 0, "working-receiver should have received at least one request even though failing-receiver failed")
}

View File

@ -2,6 +2,7 @@ package alertmanager
import (
"context"
"encoding/json"
"io"
"net/http"
"time"
@ -273,3 +274,128 @@ func (api *API) CreateChannel(rw http.ResponseWriter, req *http.Request) {
render.Success(rw, http.StatusNoContent, nil)
}
func (api *API) CreateRoutePolicy(rw http.ResponseWriter, req *http.Request) {
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
defer cancel()
body, err := io.ReadAll(req.Body)
if err != nil {
render.Error(rw, err)
return
}
defer req.Body.Close()
var policy alertmanagertypes.PostableRoutePolicy
err = json.Unmarshal(body, &policy)
if err != nil {
render.Error(rw, err)
return
}
policy.ExpressionKind = alertmanagertypes.PolicyBasedExpression
// Validate the postable route
if err := policy.Validate(); err != nil {
render.Error(rw, err)
return
}
result, err := api.alertmanager.CreateRoutePolicy(ctx, &policy)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusCreated, result)
}
func (api *API) GetAllRoutePolicies(rw http.ResponseWriter, req *http.Request) {
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
defer cancel()
policies, err := api.alertmanager.GetAllRoutePolicies(ctx)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, policies)
}
func (api *API) GetRoutePolicyByID(rw http.ResponseWriter, req *http.Request) {
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
defer cancel()
vars := mux.Vars(req)
policyID := vars["id"]
if policyID == "" {
render.Error(rw, errors.NewInvalidInputf(errors.CodeInvalidInput, "policy ID is required"))
return
}
policy, err := api.alertmanager.GetRoutePolicyByID(ctx, policyID)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, policy)
}
func (api *API) DeleteRoutePolicyByID(rw http.ResponseWriter, req *http.Request) {
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
defer cancel()
vars := mux.Vars(req)
policyID := vars["id"]
if policyID == "" {
render.Error(rw, errors.NewInvalidInputf(errors.CodeInvalidInput, "policy ID is required"))
return
}
err := api.alertmanager.DeleteRoutePolicyByID(ctx, policyID)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusNoContent, nil)
}
func (api *API) UpdateRoutePolicy(rw http.ResponseWriter, req *http.Request) {
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
defer cancel()
vars := mux.Vars(req)
policyID := vars["id"]
if policyID == "" {
render.Error(rw, errors.NewInvalidInputf(errors.CodeInvalidInput, "policy ID is required"))
return
}
body, err := io.ReadAll(req.Body)
if err != nil {
render.Error(rw, err)
return
}
defer req.Body.Close()
var policy alertmanagertypes.PostableRoutePolicy
err = json.Unmarshal(body, &policy)
if err != nil {
render.Error(rw, err)
return
}
policy.ExpressionKind = alertmanagertypes.PolicyBasedExpression
// Validate the postable route
if err := policy.Validate(); err != nil {
render.Error(rw, err)
return
}
result, err := api.alertmanager.UpdateRoutePolicyByID(ctx, policyID, &policy)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, result)
}

View File

@ -1,12 +1,19 @@
package nfmanagertest
import (
"context"
"fmt"
"strings"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/prometheus/common/model"
)
// MockNotificationManager is a simple mock implementation of NotificationManager
type MockNotificationManager struct {
configs map[string]*alertmanagertypes.NotificationConfig
routes map[string]*alertmanagertypes.RoutePolicy
routesByName map[string][]*alertmanagertypes.RoutePolicy
errors map[string]error
}
@ -14,6 +21,8 @@ type MockNotificationManager struct {
func NewMock() *MockNotificationManager {
return &MockNotificationManager{
configs: make(map[string]*alertmanagertypes.NotificationConfig),
routes: make(map[string]*alertmanagertypes.RoutePolicy),
routesByName: make(map[string][]*alertmanagertypes.RoutePolicy),
errors: make(map[string]error),
}
}
@ -65,6 +74,8 @@ func (m *MockNotificationManager) SetMockError(orgID, ruleID string, err error)
func (m *MockNotificationManager) ClearMockData() {
m.configs = make(map[string]*alertmanagertypes.NotificationConfig)
m.routes = make(map[string]*alertmanagertypes.RoutePolicy)
m.routesByName = make(map[string][]*alertmanagertypes.RoutePolicy)
m.errors = make(map[string]error)
}
@ -73,3 +84,241 @@ func (m *MockNotificationManager) HasConfig(orgID, ruleID string) bool {
_, exists := m.configs[key]
return exists
}
// Route Policy CRUD
func (m *MockNotificationManager) CreateRoutePolicy(ctx context.Context, orgID string, route *alertmanagertypes.RoutePolicy) error {
key := getKey(orgID, "create_route")
if err := m.errors[key]; err != nil {
return err
}
if route == nil {
return fmt.Errorf("route cannot be nil")
}
if err := route.Validate(); err != nil {
return err
}
routeKey := getKey(orgID, route.ID.StringValue())
m.routes[routeKey] = route
nameKey := getKey(orgID, route.Name)
m.routesByName[nameKey] = append(m.routesByName[nameKey], route)
return nil
}
func (m *MockNotificationManager) CreateRoutePolicies(ctx context.Context, orgID string, routes []*alertmanagertypes.RoutePolicy) error {
key := getKey(orgID, "create_routes")
if err := m.errors[key]; err != nil {
return err
}
if len(routes) == 0 {
return fmt.Errorf("routes cannot be empty")
}
for i, route := range routes {
if route == nil {
return fmt.Errorf("route at index %d cannot be nil", i)
}
if err := route.Validate(); err != nil {
return fmt.Errorf("route at index %d: %s", i, err.Error())
}
}
for _, route := range routes {
if err := m.CreateRoutePolicy(ctx, orgID, route); err != nil {
return err
}
}
return nil
}
func (m *MockNotificationManager) GetRoutePolicyByID(ctx context.Context, orgID string, routeID string) (*alertmanagertypes.RoutePolicy, error) {
key := getKey(orgID, "get_route")
if err := m.errors[key]; err != nil {
return nil, err
}
if routeID == "" {
return nil, fmt.Errorf("routeID cannot be empty")
}
routeKey := getKey(orgID, routeID)
route, exists := m.routes[routeKey]
if !exists {
return nil, fmt.Errorf("route with ID %s not found", routeID)
}
return route, nil
}
func (m *MockNotificationManager) GetAllRoutePolicies(ctx context.Context, orgID string) ([]*alertmanagertypes.RoutePolicy, error) {
key := getKey(orgID, "get_all_routes")
if err := m.errors[key]; err != nil {
return nil, err
}
if orgID == "" {
return nil, fmt.Errorf("orgID cannot be empty")
}
var routes []*alertmanagertypes.RoutePolicy
for routeKey, route := range m.routes {
if route.OrgID == orgID {
routes = append(routes, route)
}
_ = routeKey
}
return routes, nil
}
func (m *MockNotificationManager) DeleteRoutePolicy(ctx context.Context, orgID string, routeID string) error {
key := getKey(orgID, "delete_route")
if err := m.errors[key]; err != nil {
return err
}
if routeID == "" {
return fmt.Errorf("routeID cannot be empty")
}
routeKey := getKey(orgID, routeID)
route, exists := m.routes[routeKey]
if !exists {
return fmt.Errorf("route with ID %s not found", routeID)
}
delete(m.routes, routeKey)
nameKey := getKey(orgID, route.Name)
if nameRoutes, exists := m.routesByName[nameKey]; exists {
var filtered []*alertmanagertypes.RoutePolicy
for _, r := range nameRoutes {
if r.ID.StringValue() != routeID {
filtered = append(filtered, r)
}
}
if len(filtered) == 0 {
delete(m.routesByName, nameKey)
} else {
m.routesByName[nameKey] = filtered
}
}
return nil
}
func (m *MockNotificationManager) DeleteAllRoutePoliciesByName(ctx context.Context, orgID string, name string) error {
key := getKey(orgID, "delete_routes_by_name")
if err := m.errors[key]; err != nil {
return err
}
if orgID == "" {
return fmt.Errorf("orgID cannot be empty")
}
if name == "" {
return fmt.Errorf("name cannot be empty")
}
nameKey := getKey(orgID, name)
routes, exists := m.routesByName[nameKey]
if !exists {
return nil // No routes to delete
}
for _, route := range routes {
routeKey := getKey(orgID, route.ID.StringValue())
delete(m.routes, routeKey)
}
delete(m.routesByName, nameKey)
return nil
}
func (m *MockNotificationManager) Match(ctx context.Context, orgID string, ruleID string, set model.LabelSet) ([]string, error) {
key := getKey(orgID, ruleID)
if err := m.errors[key]; err != nil {
return nil, err
}
config, err := m.GetNotificationConfig(orgID, ruleID)
if err != nil {
return nil, err
}
var expressionRoutes []*alertmanagertypes.RoutePolicy
if config.UsePolicy {
for _, route := range m.routes {
if route.OrgID == orgID && route.ExpressionKind == alertmanagertypes.PolicyBasedExpression {
expressionRoutes = append(expressionRoutes, route)
}
}
} else {
nameKey := getKey(orgID, ruleID)
if routes, exists := m.routesByName[nameKey]; exists {
expressionRoutes = routes
}
}
var matchedChannels []string
for _, route := range expressionRoutes {
if m.evaluateExpr(route.Expression, set) {
matchedChannels = append(matchedChannels, route.Channels...)
}
}
return matchedChannels, nil
}
func (m *MockNotificationManager) evaluateExpr(expression string, labelSet model.LabelSet) bool {
ruleID, ok := labelSet["ruleId"]
if !ok {
return false
}
if strings.Contains(expression, `ruleId in ["ruleId-OtherAlert", "ruleId-TestingAlert"]`) {
return ruleID == "ruleId-OtherAlert" || ruleID == "ruleId-TestingAlert"
}
if strings.Contains(expression, `ruleId in ["ruleId-HighLatency", "ruleId-HighErrorRate"]`) {
return ruleID == "ruleId-HighLatency" || ruleID == "ruleId-HighErrorRate"
}
if strings.Contains(expression, `ruleId == "ruleId-HighLatency"`) {
return ruleID == "ruleId-HighLatency"
}
return false
}
// Helper methods for testing
func (m *MockNotificationManager) SetMockRoute(orgID string, route *alertmanagertypes.RoutePolicy) {
routeKey := getKey(orgID, route.ID.StringValue())
m.routes[routeKey] = route
nameKey := getKey(orgID, route.Name)
m.routesByName[nameKey] = append(m.routesByName[nameKey], route)
}
func (m *MockNotificationManager) SetMockRouteError(orgID, operation string, err error) {
key := getKey(orgID, operation)
m.errors[key] = err
}
func (m *MockNotificationManager) ClearMockRoutes() {
m.routes = make(map[string]*alertmanagertypes.RoutePolicy)
m.routesByName = make(map[string][]*alertmanagertypes.RoutePolicy)
}
func (m *MockNotificationManager) GetRouteCount() int {
return len(m.routes)
}
func (m *MockNotificationManager) HasRoute(orgID, routeID string) bool {
routeKey := getKey(orgID, routeID)
_, exists := m.routes[routeKey]
return exists
}

View File

@ -0,0 +1,176 @@
package nfroutingstoretest
import (
"context"
"regexp"
"strings"
"github.com/DATA-DOG/go-sqlmock"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/sqlroutingstore"
"github.com/SigNoz/signoz/pkg/sqlstore"
"github.com/SigNoz/signoz/pkg/sqlstore/sqlstoretest"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
)
type MockSQLRouteStore struct {
routeStore alertmanagertypes.RouteStore
mock sqlmock.Sqlmock
}
func NewMockSQLRouteStore() *MockSQLRouteStore {
sqlStore := sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherRegexp)
routeStore := sqlroutingstore.NewStore(sqlStore)
return &MockSQLRouteStore{
routeStore: routeStore,
mock: sqlStore.Mock(),
}
}
func (m *MockSQLRouteStore) Mock() sqlmock.Sqlmock {
return m.mock
}
func (m *MockSQLRouteStore) GetByID(ctx context.Context, orgId string, id string) (*alertmanagertypes.RoutePolicy, error) {
return m.routeStore.GetByID(ctx, orgId, id)
}
func (m *MockSQLRouteStore) Create(ctx context.Context, route *alertmanagertypes.RoutePolicy) error {
return m.routeStore.Create(ctx, route)
}
func (m *MockSQLRouteStore) CreateBatch(ctx context.Context, routes []*alertmanagertypes.RoutePolicy) error {
return m.routeStore.CreateBatch(ctx, routes)
}
func (m *MockSQLRouteStore) Delete(ctx context.Context, orgId string, id string) error {
return m.routeStore.Delete(ctx, orgId, id)
}
func (m *MockSQLRouteStore) GetAllByKind(ctx context.Context, orgID string, kind alertmanagertypes.ExpressionKind) ([]*alertmanagertypes.RoutePolicy, error) {
return m.routeStore.GetAllByKind(ctx, orgID, kind)
}
func (m *MockSQLRouteStore) GetAllByName(ctx context.Context, orgID string, name string) ([]*alertmanagertypes.RoutePolicy, error) {
return m.routeStore.GetAllByName(ctx, orgID, name)
}
func (m *MockSQLRouteStore) DeleteRouteByName(ctx context.Context, orgID string, name string) error {
return m.routeStore.DeleteRouteByName(ctx, orgID, name)
}
func (m *MockSQLRouteStore) ExpectGetByID(orgID, id string, route *alertmanagertypes.RoutePolicy) {
rows := sqlmock.NewRows([]string{"id", "org_id", "name", "expression", "kind", "description", "enabled", "tags", "channels", "created_at", "updated_at", "created_by", "updated_by"})
if route != nil {
rows.AddRow(
route.ID.StringValue(),
route.OrgID,
route.Name,
route.Expression,
route.ExpressionKind.StringValue(),
route.Description,
route.Enabled,
"[]", // tags as JSON
`["`+strings.Join(route.Channels, `","`)+`"]`, // channels as JSON
"0001-01-01T00:00:00Z", // created_at
"0001-01-01T00:00:00Z", // updated_at
"", // created_by
"", // updated_by
)
}
m.mock.ExpectQuery(`SELECT (.+) FROM "route_policy" WHERE \(id = \$1\) AND \(org_id = \$2\)`).
WithArgs(id, orgID).
WillReturnRows(rows)
}
func (m *MockSQLRouteStore) ExpectCreate(route *alertmanagertypes.RoutePolicy) {
expectedPattern := `INSERT INTO "route_policy" \(.+\) VALUES .+`
m.mock.ExpectExec(expectedPattern).
WillReturnResult(sqlmock.NewResult(1, 1))
}
func (m *MockSQLRouteStore) ExpectCreateBatch(routes []*alertmanagertypes.RoutePolicy) {
if len(routes) == 0 {
return
}
// Simplified pattern that should match any INSERT into route_policy
expectedPattern := `INSERT INTO "route_policy" \(.+\) VALUES .+`
m.mock.ExpectExec(expectedPattern).
WillReturnResult(sqlmock.NewResult(1, int64(len(routes))))
}
func (m *MockSQLRouteStore) ExpectDelete(orgID, id string) {
m.mock.ExpectExec(`DELETE FROM "route_policy" AS "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(id = '` + regexp.QuoteMeta(id) + `'\)`).
WillReturnResult(sqlmock.NewResult(0, 1))
}
func (m *MockSQLRouteStore) ExpectGetAllByKindAndOrgID(orgID string, kind alertmanagertypes.ExpressionKind, routes []*alertmanagertypes.RoutePolicy) {
rows := sqlmock.NewRows([]string{"id", "org_id", "name", "expression", "kind", "description", "enabled", "tags", "channels", "created_at", "updated_at", "created_by", "updated_by"})
for _, route := range routes {
if route.OrgID == orgID && route.ExpressionKind == kind {
rows.AddRow(
route.ID.StringValue(),
route.OrgID,
route.Name,
route.Expression,
route.ExpressionKind.StringValue(),
route.Description,
route.Enabled,
"[]", // tags as JSON
`["`+strings.Join(route.Channels, `","`)+`"]`, // channels as JSON
"0001-01-01T00:00:00Z", // created_at
"0001-01-01T00:00:00Z", // updated_at
"", // created_by
"", // updated_by
)
}
}
m.mock.ExpectQuery(`SELECT (.+) FROM "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(kind = '` + regexp.QuoteMeta(kind.StringValue()) + `'\)`).
WillReturnRows(rows)
}
func (m *MockSQLRouteStore) ExpectGetAllByName(orgID, name string, routes []*alertmanagertypes.RoutePolicy) {
rows := sqlmock.NewRows([]string{"id", "org_id", "name", "expression", "kind", "description", "enabled", "tags", "channels", "created_at", "updated_at", "created_by", "updated_by"})
for _, route := range routes {
if route.OrgID == orgID && route.Name == name {
rows.AddRow(
route.ID.StringValue(),
route.OrgID,
route.Name,
route.Expression,
route.ExpressionKind.StringValue(),
route.Description,
route.Enabled,
"[]", // tags as JSON
`["`+strings.Join(route.Channels, `","`)+`"]`, // channels as JSON
"0001-01-01T00:00:00Z", // created_at
"0001-01-01T00:00:00Z", // updated_at
"", // created_by
"", // updated_by
)
}
}
m.mock.ExpectQuery(`SELECT (.+) FROM "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(name = '` + regexp.QuoteMeta(name) + `'\)`).
WillReturnRows(rows)
}
func (m *MockSQLRouteStore) ExpectDeleteRouteByName(orgID, name string) {
m.mock.ExpectExec(`DELETE FROM "route_policy" AS "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(name = '` + regexp.QuoteMeta(name) + `'\)`).
WillReturnResult(sqlmock.NewResult(0, 1))
}
func (m *MockSQLRouteStore) ExpectationsWereMet() error {
return m.mock.ExpectationsWereMet()
}
func (m *MockSQLRouteStore) MatchExpectationsInOrder(match bool) {
m.mock.MatchExpectationsInOrder(match)
}

View File

@ -0,0 +1,93 @@
package sqlroutingstore
import (
"context"
"database/sql"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/sqlstore"
routeTypes "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
)
type store struct {
sqlstore sqlstore.SQLStore
}
func NewStore(sqlstore sqlstore.SQLStore) routeTypes.RouteStore {
return &store{
sqlstore: sqlstore,
}
}
func (store *store) GetByID(ctx context.Context, orgId string, id string) (*routeTypes.RoutePolicy, error) {
route := new(routeTypes.RoutePolicy)
err := store.sqlstore.BunDBCtx(ctx).NewSelect().Model(route).Where("id = ?", id).Where("org_id = ?", orgId).Scan(ctx)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, store.sqlstore.WrapNotFoundErrf(err, errors.CodeNotFound, "routing policy with ID: %s does not exist", id)
}
return nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to fetch routing policy with ID: %s", id)
}
return route, nil
}
func (store *store) Create(ctx context.Context, route *routeTypes.RoutePolicy) error {
_, err := store.sqlstore.BunDBCtx(ctx).NewInsert().Model(route).Exec(ctx)
if err != nil {
return errors.NewInternalf(errors.CodeInternal, "error creating routing policy with ID: %s", route.ID)
}
return nil
}
func (store *store) CreateBatch(ctx context.Context, route []*routeTypes.RoutePolicy) error {
_, err := store.sqlstore.BunDBCtx(ctx).NewInsert().Model(&route).Exec(ctx)
if err != nil {
return errors.NewInternalf(errors.CodeInternal, "error creating routing policies: %v", err)
}
return nil
}
func (store *store) Delete(ctx context.Context, orgId string, id string) error {
_, err := store.sqlstore.BunDBCtx(ctx).NewDelete().Model((*routeTypes.RoutePolicy)(nil)).Where("org_id = ?", orgId).Where("id = ?", id).Exec(ctx)
if err != nil {
return errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to delete routing policy with ID: %s", id)
}
return nil
}
func (store *store) GetAllByKind(ctx context.Context, orgID string, kind routeTypes.ExpressionKind) ([]*routeTypes.RoutePolicy, error) {
var routes []*routeTypes.RoutePolicy
err := store.sqlstore.BunDBCtx(ctx).NewSelect().Model(&routes).Where("org_id = ?", orgID).Where("kind = ?", kind).Scan(ctx)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, errors.NewNotFoundf(errors.CodeNotFound, "no routing policies found for orgID: %s", orgID)
}
return nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to fetch routing policies for orgID: %s", orgID)
}
return routes, nil
}
func (store *store) GetAllByName(ctx context.Context, orgID string, name string) ([]*routeTypes.RoutePolicy, error) {
var routes []*routeTypes.RoutePolicy
err := store.sqlstore.BunDBCtx(ctx).NewSelect().Model(&routes).Where("org_id = ?", orgID).Where("name = ?", name).Scan(ctx)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return routes, errors.NewNotFoundf(errors.CodeNotFound, "no routing policies found for orgID: %s and name: %s", orgID, name)
}
return nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to fetch routing policies for orgID: %s and name: %s", orgID, name)
}
return routes, nil
}
func (store *store) DeleteRouteByName(ctx context.Context, orgID string, name string) error {
_, err := store.sqlstore.BunDBCtx(ctx).NewDelete().Model((*routeTypes.RoutePolicy)(nil)).Where("org_id = ?", orgID).Where("name = ?", name).Exec(ctx)
if err != nil {
return errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to delete routing policies with name: %s", name)
}
return nil
}

View File

@ -2,12 +2,27 @@
package nfmanager
import (
"context"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/prometheus/common/model"
)
// NotificationManager defines how alerts should be grouped and configured for notification with multi-tenancy support.
// NotificationManager defines how alerts should be grouped and configured for notification.
type NotificationManager interface {
// Notification Config CRUD
GetNotificationConfig(orgID string, ruleID string) (*alertmanagertypes.NotificationConfig, error)
SetNotificationConfig(orgID string, ruleID string, config *alertmanagertypes.NotificationConfig) error
DeleteNotificationConfig(orgID string, ruleID string) error
// Route Policy CRUD
CreateRoutePolicy(ctx context.Context, orgID string, route *alertmanagertypes.RoutePolicy) error
CreateRoutePolicies(ctx context.Context, orgID string, routes []*alertmanagertypes.RoutePolicy) error
GetRoutePolicyByID(ctx context.Context, orgID string, routeID string) (*alertmanagertypes.RoutePolicy, error)
GetAllRoutePolicies(ctx context.Context, orgID string) ([]*alertmanagertypes.RoutePolicy, error)
DeleteRoutePolicy(ctx context.Context, orgID string, routeID string) error
DeleteAllRoutePoliciesByName(ctx context.Context, orgID string, name string) error
// Route matching
Match(ctx context.Context, orgID string, ruleID string, set model.LabelSet) ([]string, error)
}

View File

@ -2,11 +2,14 @@ package rulebasednotification
import (
"context"
"strings"
"sync"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/expr-lang/expr"
"github.com/prometheus/common/model"
"github.com/SigNoz/signoz/pkg/factory"
)
@ -14,26 +17,28 @@ import (
type provider struct {
settings factory.ScopedProviderSettings
orgToFingerprintToNotificationConfig map[string]map[string]alertmanagertypes.NotificationConfig
routeStore alertmanagertypes.RouteStore
mutex sync.RWMutex
}
// NewFactory creates a new factory for the rule-based grouping strategy.
func NewFactory() factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config] {
func NewFactory(routeStore alertmanagertypes.RouteStore) factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config] {
return factory.NewProviderFactory(
factory.MustNewName("rulebased"),
func(ctx context.Context, settings factory.ProviderSettings, config nfmanager.Config) (nfmanager.NotificationManager, error) {
return New(ctx, settings, config)
return New(ctx, settings, config, routeStore)
},
)
}
// New creates a new rule-based grouping strategy provider.
func New(ctx context.Context, providerSettings factory.ProviderSettings, config nfmanager.Config) (nfmanager.NotificationManager, error) {
func New(ctx context.Context, providerSettings factory.ProviderSettings, config nfmanager.Config, routeStore alertmanagertypes.RouteStore) (nfmanager.NotificationManager, error) {
settings := factory.NewScopedProviderSettings(providerSettings, "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification")
return &provider{
settings: settings,
orgToFingerprintToNotificationConfig: make(map[string]map[string]alertmanagertypes.NotificationConfig),
routeStore: routeStore,
}, nil
}
@ -58,6 +63,8 @@ func (r *provider) GetNotificationConfig(orgID string, ruleID string) (*alertman
for k, v := range config.NotificationGroup {
notificationConfig.NotificationGroup[k] = v
}
notificationConfig.UsePolicy = config.UsePolicy
notificationConfig.GroupByAll = config.GroupByAll
}
}
@ -101,3 +108,147 @@ func (r *provider) DeleteNotificationConfig(orgID string, ruleID string) error {
return nil
}
func (r *provider) CreateRoutePolicy(ctx context.Context, orgID string, route *alertmanagertypes.RoutePolicy) error {
if route == nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policy cannot be nil")
}
err := route.Validate()
if err != nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid route policy: %v", err)
}
return r.routeStore.Create(ctx, route)
}
func (r *provider) CreateRoutePolicies(ctx context.Context, orgID string, routes []*alertmanagertypes.RoutePolicy) error {
if len(routes) == 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policies cannot be empty")
}
for _, route := range routes {
if route == nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policy cannot be nil")
}
if err := route.Validate(); err != nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policy with name %s: %s", route.Name, err.Error())
}
}
return r.routeStore.CreateBatch(ctx, routes)
}
func (r *provider) GetRoutePolicyByID(ctx context.Context, orgID string, routeID string) (*alertmanagertypes.RoutePolicy, error) {
if routeID == "" {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
}
return r.routeStore.GetByID(ctx, orgID, routeID)
}
func (r *provider) GetAllRoutePolicies(ctx context.Context, orgID string) ([]*alertmanagertypes.RoutePolicy, error) {
if orgID == "" {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "orgID cannot be empty")
}
return r.routeStore.GetAllByKind(ctx, orgID, alertmanagertypes.PolicyBasedExpression)
}
func (r *provider) DeleteRoutePolicy(ctx context.Context, orgID string, routeID string) error {
if routeID == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
}
return r.routeStore.Delete(ctx, orgID, routeID)
}
func (r *provider) DeleteAllRoutePoliciesByName(ctx context.Context, orgID string, name string) error {
if orgID == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "orgID cannot be empty")
}
if name == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "name cannot be empty")
}
return r.routeStore.DeleteRouteByName(ctx, orgID, name)
}
func (r *provider) Match(ctx context.Context, orgID string, ruleID string, set model.LabelSet) ([]string, error) {
config, err := r.GetNotificationConfig(orgID, ruleID)
if err != nil {
return nil, errors.NewInternalf(errors.CodeInternal, "error getting notification configuration: %v", err)
}
var expressionRoutes []*alertmanagertypes.RoutePolicy
if config.UsePolicy {
expressionRoutes, err = r.routeStore.GetAllByKind(ctx, orgID, alertmanagertypes.PolicyBasedExpression)
if err != nil {
return []string{}, errors.NewInternalf(errors.CodeInternal, "error getting route policies: %v", err)
}
} else {
expressionRoutes, err = r.routeStore.GetAllByName(ctx, orgID, ruleID)
if err != nil {
return []string{}, errors.NewInternalf(errors.CodeInternal, "error getting route policies: %v", err)
}
}
var matchedChannels []string
if _, ok := set[alertmanagertypes.NoDataLabel]; ok && !config.UsePolicy {
for _, expressionRoute := range expressionRoutes {
matchedChannels = append(matchedChannels, expressionRoute.Channels...)
}
return matchedChannels, nil
}
for _, route := range expressionRoutes {
evaluateExpr, err := r.evaluateExpr(route.Expression, set)
if err != nil {
continue
}
if evaluateExpr {
matchedChannels = append(matchedChannels, route.Channels...)
}
}
return matchedChannels, nil
}
func (r *provider) evaluateExpr(expression string, labelSet model.LabelSet) (bool, error) {
env := make(map[string]interface{})
for k, v := range labelSet {
key := string(k)
value := string(v)
if strings.Contains(key, ".") {
parts := strings.Split(key, ".")
current := env
for i, part := range parts {
if i == len(parts)-1 {
current[part] = value
} else {
if current[part] == nil {
current[part] = make(map[string]interface{})
}
current = current[part].(map[string]interface{})
}
}
} else {
env[key] = value
}
}
program, err := expr.Compile(expression, expr.Env(env))
if err != nil {
return false, errors.NewInternalf(errors.CodeInternal, "error compiling route policy %s: %v", expression, err)
}
output, err := expr.Run(program, env)
if err != nil {
return false, errors.NewInternalf(errors.CodeInternal, "error running route policy %s: %v", expression, err)
}
if boolVal, ok := output.(bool); ok {
return boolVal, nil
}
return false, errors.NewInternalf(errors.CodeInternal, "error in evaluating route policy %s: %v", expression, err)
}

View File

@ -2,18 +2,22 @@ package rulebasednotification
import (
"context"
"github.com/prometheus/common/model"
"sync"
"testing"
"time"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest"
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/prometheus/alertmanager/types"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/prometheus/common/model"
)
func createTestProviderSettings() factory.ProviderSettings {
@ -21,7 +25,8 @@ func createTestProviderSettings() factory.ProviderSettings {
}
func TestNewFactory(t *testing.T) {
providerFactory := NewFactory()
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
providerFactory := NewFactory(routeStore)
assert.NotNil(t, providerFactory)
assert.Equal(t, "rulebased", providerFactory.Name().String())
}
@ -31,7 +36,8 @@ func TestNew(t *testing.T) {
providerSettings := createTestProviderSettings()
config := nfmanager.Config{}
provider, err := New(ctx, providerSettings, config)
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err)
assert.NotNil(t, provider)
@ -44,7 +50,8 @@ func TestProvider_SetNotificationConfig(t *testing.T) {
providerSettings := createTestProviderSettings()
config := nfmanager.Config{}
provider, err := New(ctx, providerSettings, config)
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err)
tests := []struct {
@ -124,11 +131,12 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
providerSettings := createTestProviderSettings()
config := nfmanager.Config{}
provider, err := New(ctx, providerSettings, config)
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err)
orgID := "test-org"
ruleID := "rule1"
ruleID := "ruleId"
customConfig := &alertmanagertypes.NotificationConfig{
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 30 * time.Minute,
@ -144,7 +152,6 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
},
}
// Set config for alert1
err = provider.SetNotificationConfig(orgID, ruleID, customConfig)
require.NoError(t, err)
@ -155,7 +162,7 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
name string
orgID string
ruleID string
alert *types.Alert
alert *alertmanagertypes.Alert
expectedConfig *alertmanagertypes.NotificationConfig
shouldFallback bool
}{
@ -165,7 +172,7 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
ruleID: ruleID,
expectedConfig: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{
model.LabelName("ruleId"): {},
model.LabelName(ruleID): {},
},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 30 * time.Minute,
@ -182,13 +189,13 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
NotificationGroup: map[model.LabelName]struct{}{
model.LabelName("group1"): {},
model.LabelName("group2"): {},
model.LabelName("ruleId"): {},
model.LabelName(ruleID): {},
},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 4 * time.Hour,
NoDataInterval: 4 * time.Hour,
},
}, // Will get fallback from standardnotification
},
shouldFallback: false,
},
{
@ -231,7 +238,8 @@ func TestProvider_ConcurrentAccess(t *testing.T) {
providerSettings := createTestProviderSettings()
config := nfmanager.Config{}
provider, err := New(ctx, providerSettings, config)
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err)
orgID := "test-org"
@ -268,3 +276,430 @@ func TestProvider_ConcurrentAccess(t *testing.T) {
// Wait for both goroutines to complete
wg.Wait()
}
func TestProvider_EvaluateExpression(t *testing.T) {
provider := &provider{}
tests := []struct {
name string
expression string
labelSet model.LabelSet
expected bool
}{
{
name: "simple equality check - match",
expression: `threshold.name == 'auth' && ruleId == 'rule1'`,
labelSet: model.LabelSet{
"threshold.name": "auth",
"ruleId": "rule1",
},
expected: true,
},
{
name: "simple equality check - no match",
expression: `service == "payment"`,
labelSet: model.LabelSet{
"service": "auth",
"env": "production",
},
expected: false,
},
{
name: "multiple conditions with AND - both match",
expression: `service == "auth" && env == "production"`,
labelSet: model.LabelSet{
"service": "auth",
"env": "production",
},
expected: true,
},
{
name: "multiple conditions with AND - one doesn't match",
expression: `service == "auth" && env == "staging"`,
labelSet: model.LabelSet{
"service": "auth",
"env": "production",
},
expected: false,
},
{
name: "multiple conditions with OR - one matches",
expression: `service == "payment" || env == "production"`,
labelSet: model.LabelSet{
"service": "auth",
"env": "production",
},
expected: true,
},
{
name: "multiple conditions with OR - none match",
expression: `service == "payment" || env == "staging"`,
labelSet: model.LabelSet{
"service": "auth",
"env": "production",
},
expected: false,
},
{
name: "in operator - value in list",
expression: `service in ["auth", "payment", "notification"]`,
labelSet: model.LabelSet{
"service": "auth",
},
expected: true,
},
{
name: "in operator - value not in list",
expression: `service in ["payment", "notification"]`,
labelSet: model.LabelSet{
"service": "auth",
},
expected: false,
},
{
name: "contains operator - substring match",
expression: `host contains "prod"`,
labelSet: model.LabelSet{
"host": "prod-server-01",
},
expected: true,
},
{
name: "contains operator - no substring match",
expression: `host contains "staging"`,
labelSet: model.LabelSet{
"host": "prod-server-01",
},
expected: false,
},
{
name: "complex expression with parentheses",
expression: `(service == "auth" && env == "production") || critical == "true"`,
labelSet: model.LabelSet{
"service": "payment",
"env": "staging",
"critical": "true",
},
expected: true,
},
{
name: "missing label key",
expression: `"missing_key" == "value"`,
labelSet: model.LabelSet{
"service": "auth",
},
expected: false,
},
{
name: "rule-based expression with threshold name and ruleId",
expression: `'threshold.name' == "high-cpu" && ruleId == "rule-123"`,
labelSet: model.LabelSet{
"threshold.name": "high-cpu",
"ruleId": "rule-123",
"service": "auth",
},
expected: false, //no commas
},
{
name: "alertname and ruleId combination",
expression: `alertname == "HighCPUUsage" && ruleId == "cpu-alert-001"`,
labelSet: model.LabelSet{
"alertname": "HighCPUUsage",
"ruleId": "cpu-alert-001",
"severity": "critical",
},
expected: true,
},
{
name: "kubernetes namespace filtering",
expression: `k8s.namespace.name == "auth" && service in ["auth", "payment"]`,
labelSet: model.LabelSet{
"k8s.namespace.name": "auth",
"service": "auth",
"host": "k8s-node-1",
},
expected: true,
},
{
name: "migration expression format from SQL migration",
expression: `threshold.name == "HighCPUUsage" && ruleId == "rule-uuid-123"`,
labelSet: model.LabelSet{
"threshold.name": "HighCPUUsage",
"ruleId": "rule-uuid-123",
"severity": "warning",
},
expected: true,
},
{
name: "case sensitive matching",
expression: `service == "Auth"`, // capital A
labelSet: model.LabelSet{
"service": "auth", // lowercase a
},
expected: false,
},
{
name: "numeric comparison as strings",
expression: `port == "8080"`,
labelSet: model.LabelSet{
"port": "8080",
},
expected: true,
},
{
name: "quoted string with special characters",
expression: `service == "auth-service-v2"`,
labelSet: model.LabelSet{
"service": "auth-service-v2",
},
expected: true,
},
{
name: "boolean operators precedence",
expression: `service == "auth" && env == "prod" || critical == "true"`,
labelSet: model.LabelSet{
"service": "payment",
"env": "staging",
"critical": "true",
},
expected: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := provider.evaluateExpr(tt.expression, tt.labelSet)
assert.NoError(t, err)
assert.Equal(t, tt.expected, result, "Expression: %s", tt.expression)
})
}
}
func TestProvider_DeleteRoute(t *testing.T) {
ctx := context.Background()
providerSettings := createTestProviderSettings()
config := nfmanager.Config{}
tests := []struct {
name string
orgID string
routeID string
wantErr bool
}{
{
name: "valid parameters",
orgID: "test-org-123",
routeID: "route-uuid-456",
wantErr: false,
},
{
name: "empty routeID",
orgID: "test-org-123",
routeID: "",
wantErr: true,
},
{
name: "valid orgID with valid routeID",
orgID: "another-org",
routeID: "another-route-id",
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err)
if !tt.wantErr {
routeStore.ExpectDelete(tt.orgID, tt.routeID)
}
err = provider.DeleteRoutePolicy(ctx, tt.orgID, tt.routeID)
if tt.wantErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.NoError(t, routeStore.ExpectationsWereMet())
}
})
}
}
func TestProvider_CreateRoute(t *testing.T) {
ctx := context.Background()
providerSettings := createTestProviderSettings()
config := nfmanager.Config{}
tests := []struct {
name string
orgID string
route *alertmanagertypes.RoutePolicy
wantErr bool
}{
{
name: "valid route",
orgID: "test-org-123",
route: &alertmanagertypes.RoutePolicy{
Identifiable: types.Identifiable{ID: valuer.GenerateUUID()},
Expression: `service == "auth"`,
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
Name: "auth-service-route",
Description: "Route for auth service alerts",
Enabled: true,
OrgID: "test-org-123",
Channels: []string{"slack-channel"},
},
wantErr: false,
},
{
name: "nil route",
orgID: "test-org-123",
route: nil,
wantErr: true,
},
{
name: "invalid route - missing expression",
orgID: "test-org-123",
route: &alertmanagertypes.RoutePolicy{
Expression: "", // empty expression
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
Name: "invalid-route",
OrgID: "test-org-123",
},
wantErr: true,
},
{
name: "invalid route - missing name",
orgID: "test-org-123",
route: &alertmanagertypes.RoutePolicy{
Expression: `service == "auth"`,
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
Name: "", // empty name
OrgID: "test-org-123",
},
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err)
if !tt.wantErr && tt.route != nil {
routeStore.ExpectCreate(tt.route)
}
err = provider.CreateRoutePolicy(ctx, tt.orgID, tt.route)
if tt.wantErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.NoError(t, routeStore.ExpectationsWereMet())
}
})
}
}
func TestProvider_CreateRoutes(t *testing.T) {
ctx := context.Background()
providerSettings := createTestProviderSettings()
config := nfmanager.Config{}
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err)
validRoute1 := &alertmanagertypes.RoutePolicy{
Expression: `service == "auth"`,
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
Name: "auth-route",
Description: "Auth service route",
Enabled: true,
OrgID: "test-org",
Channels: []string{"slack-auth"},
}
validRoute2 := &alertmanagertypes.RoutePolicy{
Expression: `service == "payment"`,
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
Name: "payment-route",
Description: "Payment service route",
Enabled: true,
OrgID: "test-org",
Channels: []string{"slack-payment"},
}
invalidRoute := &alertmanagertypes.RoutePolicy{
Expression: "", // empty expression - invalid
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
Name: "invalid-route",
OrgID: "test-org",
}
tests := []struct {
name string
orgID string
routes []*alertmanagertypes.RoutePolicy
wantErr bool
}{
{
name: "valid routes",
orgID: "test-org",
routes: []*alertmanagertypes.RoutePolicy{validRoute1, validRoute2},
wantErr: false,
},
{
name: "empty routes list",
orgID: "test-org",
routes: []*alertmanagertypes.RoutePolicy{},
wantErr: true,
},
{
name: "nil routes list",
orgID: "test-org",
routes: nil,
wantErr: true,
},
{
name: "routes with nil route",
orgID: "test-org",
routes: []*alertmanagertypes.RoutePolicy{validRoute1, nil},
wantErr: true,
},
{
name: "routes with invalid route",
orgID: "test-org",
routes: []*alertmanagertypes.RoutePolicy{validRoute1, invalidRoute},
wantErr: true,
},
{
name: "single valid route",
orgID: "test-org",
routes: []*alertmanagertypes.RoutePolicy{validRoute1},
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if !tt.wantErr && len(tt.routes) > 0 {
routeStore.ExpectCreateBatch(tt.routes)
}
err := provider.CreateRoutePolicies(ctx, tt.orgID, tt.routes)
if tt.wantErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.NoError(t, routeStore.ExpectationsWereMet())
}
})
}
}

View File

@ -4,6 +4,9 @@ import (
"context"
"sync"
"github.com/prometheus/alertmanager/featurecontrol"
"github.com/prometheus/alertmanager/matcher/compat"
"github.com/SigNoz/signoz/pkg/alertmanager/alertmanagerserver"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/errors"
@ -61,6 +64,7 @@ func New(
}
func (service *Service) SyncServers(ctx context.Context) error {
compat.InitFromFlags(service.settings.Logger(), featurecontrol.NoopFlags{})
orgs, err := service.orgGetter.ListByOwnedKeyRange(ctx)
if err != nil {
return err
@ -142,7 +146,7 @@ func (service *Service) TestReceiver(ctx context.Context, orgID string, receiver
return server.TestReceiver(ctx, receiver)
}
func (service *Service) TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error {
func (service *Service) TestAlert(ctx context.Context, orgID string, receiversMap map[*alertmanagertypes.PostableAlert][]string, config *alertmanagertypes.NotificationConfig) error {
service.serversMtx.RLock()
defer service.serversMtx.RUnlock()
@ -151,7 +155,7 @@ func (service *Service) TestAlert(ctx context.Context, orgID string, alert *aler
return err
}
return server.TestAlert(ctx, alert, receivers)
return server.TestAlert(ctx, receiversMap, config)
}
func (service *Service) Stop(ctx context.Context) error {

View File

@ -2,8 +2,12 @@ package signozalertmanager
import (
"context"
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
"github.com/prometheus/common/model"
"time"
amConfig "github.com/prometheus/alertmanager/config"
"github.com/SigNoz/signoz/pkg/alertmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/alertmanagerstore/sqlalertmanagerstore"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
@ -11,7 +15,9 @@ import (
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/modules/organization"
"github.com/SigNoz/signoz/pkg/sqlstore"
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/SigNoz/signoz/pkg/types/authtypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
@ -94,8 +100,29 @@ func (provider *provider) TestReceiver(ctx context.Context, orgID string, receiv
return provider.service.TestReceiver(ctx, orgID, receiver)
}
func (provider *provider) TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error {
return provider.service.TestAlert(ctx, orgID, alert, receivers)
func (provider *provider) TestAlert(ctx context.Context, orgID string, ruleID string, receiversMap map[*alertmanagertypes.PostableAlert][]string) error {
config, err := provider.notificationManager.GetNotificationConfig(orgID, ruleID)
if err != nil {
return err
}
if config.UsePolicy {
for alert := range receiversMap {
set := make(model.LabelSet)
for k, v := range alert.Labels {
set[model.LabelName(k)] = model.LabelValue(v)
}
match, err := provider.notificationManager.Match(ctx, orgID, alert.Labels[labels.AlertRuleIdLabel], set)
if err != nil {
return err
}
if len(match) == 0 {
delete(receiversMap, alert)
} else {
receiversMap[alert] = match
}
}
}
return provider.service.TestAlert(ctx, orgID, receiversMap, config)
}
func (provider *provider) ListChannels(ctx context.Context, orgID string) ([]*alertmanagertypes.Channel, error) {
@ -211,3 +238,316 @@ func (provider *provider) DeleteNotificationConfig(ctx context.Context, orgID va
}
return nil
}
func (provider *provider) CreateRoutePolicy(ctx context.Context, routeRequest *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error) {
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
return nil, err
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
return nil, err
}
if err := routeRequest.Validate(); err != nil {
return nil, err
}
route := alertmanagertypes.RoutePolicy{
Expression: routeRequest.Expression,
ExpressionKind: routeRequest.ExpressionKind,
Name: routeRequest.Name,
Description: routeRequest.Description,
Enabled: true,
Tags: routeRequest.Tags,
Channels: routeRequest.Channels,
OrgID: claims.OrgID,
Identifiable: types.Identifiable{
ID: valuer.GenerateUUID(),
},
UserAuditable: types.UserAuditable{
CreatedBy: claims.Email,
UpdatedBy: claims.Email,
},
TimeAuditable: types.TimeAuditable{
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
},
}
err = provider.notificationManager.CreateRoutePolicy(ctx, orgID.String(), &route)
if err != nil {
return nil, err
}
return &alertmanagertypes.GettableRoutePolicy{
PostableRoutePolicy: *routeRequest,
ID: route.ID.StringValue(),
CreatedAt: &route.CreatedAt,
UpdatedAt: &route.UpdatedAt,
CreatedBy: &route.CreatedBy,
UpdatedBy: &route.UpdatedBy,
}, nil
}
func (provider *provider) CreateRoutePolicies(ctx context.Context, routeRequests []*alertmanagertypes.PostableRoutePolicy) ([]*alertmanagertypes.GettableRoutePolicy, error) {
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
return nil, err
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
return nil, err
}
if len(routeRequests) == 0 {
return []*alertmanagertypes.GettableRoutePolicy{}, nil
}
routes := make([]*alertmanagertypes.RoutePolicy, 0, len(routeRequests))
results := make([]*alertmanagertypes.GettableRoutePolicy, 0, len(routeRequests))
for _, routeRequest := range routeRequests {
if err := routeRequest.Validate(); err != nil {
return nil, err
}
route := &alertmanagertypes.RoutePolicy{
Expression: routeRequest.Expression,
ExpressionKind: routeRequest.ExpressionKind,
Name: routeRequest.Name,
Description: routeRequest.Description,
Enabled: true,
Tags: routeRequest.Tags,
Channels: routeRequest.Channels,
OrgID: claims.OrgID,
Identifiable: types.Identifiable{
ID: valuer.GenerateUUID(),
},
UserAuditable: types.UserAuditable{
CreatedBy: claims.Email,
UpdatedBy: claims.Email,
},
TimeAuditable: types.TimeAuditable{
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
},
}
routes = append(routes, route)
results = append(results, &alertmanagertypes.GettableRoutePolicy{
PostableRoutePolicy: *routeRequest,
ID: route.ID.StringValue(),
CreatedAt: &route.CreatedAt,
UpdatedAt: &route.UpdatedAt,
CreatedBy: &route.CreatedBy,
UpdatedBy: &route.UpdatedBy,
})
}
err = provider.notificationManager.CreateRoutePolicies(ctx, orgID.String(), routes)
if err != nil {
return nil, err
}
return results, nil
}
func (provider *provider) GetRoutePolicyByID(ctx context.Context, routeID string) (*alertmanagertypes.GettableRoutePolicy, error) {
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
return nil, err
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
return nil, err
}
route, err := provider.notificationManager.GetRoutePolicyByID(ctx, orgID.String(), routeID)
if err != nil {
return nil, err
}
return &alertmanagertypes.GettableRoutePolicy{
PostableRoutePolicy: alertmanagertypes.PostableRoutePolicy{
Expression: route.Expression,
ExpressionKind: route.ExpressionKind,
Channels: route.Channels,
Name: route.Name,
Description: route.Description,
Tags: route.Tags,
},
ID: route.ID.StringValue(),
CreatedAt: &route.CreatedAt,
UpdatedAt: &route.UpdatedAt,
CreatedBy: &route.CreatedBy,
UpdatedBy: &route.UpdatedBy,
}, nil
}
func (provider *provider) GetAllRoutePolicies(ctx context.Context) ([]*alertmanagertypes.GettableRoutePolicy, error) {
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
return nil, err
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
return nil, err
}
routes, err := provider.notificationManager.GetAllRoutePolicies(ctx, orgID.String())
if err != nil {
return nil, err
}
results := make([]*alertmanagertypes.GettableRoutePolicy, 0, len(routes))
for _, route := range routes {
results = append(results, &alertmanagertypes.GettableRoutePolicy{
PostableRoutePolicy: alertmanagertypes.PostableRoutePolicy{
Expression: route.Expression,
ExpressionKind: route.ExpressionKind,
Channels: route.Channels,
Name: route.Name,
Description: route.Description,
Tags: route.Tags,
},
ID: route.ID.StringValue(),
CreatedAt: &route.CreatedAt,
UpdatedAt: &route.UpdatedAt,
CreatedBy: &route.CreatedBy,
UpdatedBy: &route.UpdatedBy,
})
}
return results, nil
}
func (provider *provider) UpdateRoutePolicyByID(ctx context.Context, routeID string, route *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error) {
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
return nil, errors.NewInvalidInputf(errors.CodeUnauthenticated, "invalid claims: %v", err)
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
return nil, err
}
if routeID == "" {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
}
if route == nil {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "route cannot be nil")
}
if err := route.Validate(); err != nil {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid route: %v", err)
}
existingRoute, err := provider.notificationManager.GetRoutePolicyByID(ctx, claims.OrgID, routeID)
if err != nil {
return nil, errors.NewInvalidInputf(errors.CodeNotFound, "route not found: %v", err)
}
updatedRoute := &alertmanagertypes.RoutePolicy{
Expression: route.Expression,
ExpressionKind: route.ExpressionKind,
Name: route.Name,
Description: route.Description,
Tags: route.Tags,
Channels: route.Channels,
OrgID: claims.OrgID,
Identifiable: existingRoute.Identifiable,
UserAuditable: types.UserAuditable{
CreatedBy: existingRoute.CreatedBy,
UpdatedBy: claims.Email,
},
TimeAuditable: types.TimeAuditable{
CreatedAt: existingRoute.CreatedAt,
UpdatedAt: time.Now(),
},
}
err = provider.notificationManager.DeleteRoutePolicy(ctx, orgID.String(), routeID)
if err != nil {
return nil, errors.NewInvalidInputf(errors.CodeInternal, "error deleting existing route: %v", err)
}
err = provider.notificationManager.CreateRoutePolicy(ctx, orgID.String(), updatedRoute)
if err != nil {
return nil, err
}
return &alertmanagertypes.GettableRoutePolicy{
PostableRoutePolicy: *route,
ID: updatedRoute.ID.StringValue(),
CreatedAt: &updatedRoute.CreatedAt,
UpdatedAt: &updatedRoute.UpdatedAt,
CreatedBy: &updatedRoute.CreatedBy,
UpdatedBy: &updatedRoute.UpdatedBy,
}, nil
}
func (provider *provider) DeleteRoutePolicyByID(ctx context.Context, routeID string) error {
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
return errors.NewInvalidInputf(errors.CodeUnauthenticated, "invalid claims: %v", err)
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
return err
}
if routeID == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
}
return provider.notificationManager.DeleteRoutePolicy(ctx, orgID.String(), routeID)
}
func (provider *provider) CreateInhibitRules(ctx context.Context, orgID valuer.UUID, rules []amConfig.InhibitRule) error {
config, err := provider.configStore.Get(ctx, orgID.String())
if err != nil {
return err
}
if err := config.AddInhibitRules(rules); err != nil {
return err
}
return provider.configStore.Set(ctx, config)
}
func (provider *provider) DeleteAllRoutePoliciesByRuleId(ctx context.Context, names string) error {
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
return errors.NewInvalidInputf(errors.CodeUnauthenticated, "invalid claims: %v", err)
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
return err
}
return provider.notificationManager.DeleteAllRoutePoliciesByName(ctx, orgID.String(), names)
}
func (provider *provider) UpdateAllRoutePoliciesByRuleId(ctx context.Context, names string, routes []*alertmanagertypes.PostableRoutePolicy) error {
err := provider.DeleteAllRoutePoliciesByRuleId(ctx, names)
if err != nil {
return errors.NewInvalidInputf(errors.CodeInternal, "error deleting the routes: %v", err)
}
_, err = provider.CreateRoutePolicies(ctx, routes)
return err
}
func (provider *provider) DeleteAllInhibitRulesByRuleId(ctx context.Context, orgID valuer.UUID, ruleId string) error {
config, err := provider.configStore.Get(ctx, orgID.String())
if err != nil {
return err
}
if err := config.DeleteRuleIDInhibitor(ruleId); err != nil {
return err
}
return provider.configStore.Set(ctx, config)
}

View File

@ -10,7 +10,6 @@ import (
"fmt"
"github.com/SigNoz/signoz/pkg/modules/thirdpartyapi"
//qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"io"
"math"
"net/http"
@ -492,6 +491,12 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router, am *middleware.AuthZ) {
router.HandleFunc("/api/v1/channels", am.EditAccess(aH.AlertmanagerAPI.CreateChannel)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/testChannel", am.EditAccess(aH.AlertmanagerAPI.TestReceiver)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/route_policies", am.ViewAccess(aH.AlertmanagerAPI.GetAllRoutePolicies)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/route_policies/{id}", am.ViewAccess(aH.AlertmanagerAPI.GetRoutePolicyByID)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/route_policies", am.AdminAccess(aH.AlertmanagerAPI.CreateRoutePolicy)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/route_policies/{id}", am.AdminAccess(aH.AlertmanagerAPI.DeleteRoutePolicyByID)).Methods(http.MethodDelete)
router.HandleFunc("/api/v1/route_policies/{id}", am.AdminAccess(aH.AlertmanagerAPI.UpdateRoutePolicy)).Methods(http.MethodPut)
router.HandleFunc("/api/v1/alerts", am.ViewAccess(aH.AlertmanagerAPI.GetAlerts)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/rules", am.ViewAccess(aH.listRules)).Methods(http.MethodGet)
@ -616,6 +621,7 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router, am *middleware.AuthZ) {
// Export
router.HandleFunc("/api/v1/export_raw_data", am.ViewAccess(aH.Signoz.Handlers.RawDataExport.ExportRawData)).Methods(http.MethodGet)
}
func (ah *APIHandler) MetricExplorerRoutes(router *mux.Router, am *middleware.AuthZ) {

View File

@ -4,13 +4,11 @@ import (
"context"
"fmt"
"log/slog"
"math"
"net/url"
"sync"
"time"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/query-service/converter"
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
"github.com/SigNoz/signoz/pkg/query-service/model"
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
@ -167,22 +165,6 @@ func NewBaseRule(id string, orgID valuer.UUID, p *ruletypes.PostableRule, reader
return baseRule, nil
}
func (r *BaseRule) targetVal() float64 {
if r.ruleCondition == nil || r.ruleCondition.Target == nil {
return 0
}
// get the converter for the target unit
unitConverter := converter.FromUnit(converter.Unit(r.ruleCondition.TargetUnit))
// convert the target value to the y-axis unit
value := unitConverter.Convert(converter.Value{
F: *r.ruleCondition.Target,
U: converter.Unit(r.ruleCondition.TargetUnit),
}, converter.Unit(r.Unit()))
return value.F
}
func (r *BaseRule) matchType() ruletypes.MatchType {
if r.ruleCondition == nil {
return ruletypes.AtleastOnce
@ -221,10 +203,6 @@ func (r *BaseRule) HoldDuration() time.Duration {
return r.holdDuration
}
func (r *BaseRule) TargetVal() float64 {
return r.targetVal()
}
func (r *ThresholdRule) hostFromSource() string {
parsedUrl, err := url.Parse(r.source)
if err != nil {
@ -380,232 +358,6 @@ func (r *BaseRule) ForEachActiveAlert(f func(*ruletypes.Alert)) {
}
}
func (r *BaseRule) ShouldAlert(series v3.Series) (ruletypes.Sample, bool) {
var alertSmpl ruletypes.Sample
var shouldAlert bool
var lbls qslabels.Labels
for name, value := range series.Labels {
lbls = append(lbls, qslabels.Label{Name: name, Value: value})
}
series.Points = removeGroupinSetPoints(series)
// nothing to evaluate
if len(series.Points) == 0 {
return alertSmpl, false
}
if r.ruleCondition.RequireMinPoints {
if len(series.Points) < r.ruleCondition.RequiredNumPoints {
zap.L().Info("not enough data points to evaluate series, skipping", zap.String("ruleid", r.ID()), zap.Int("numPoints", len(series.Points)), zap.Int("requiredPoints", r.ruleCondition.RequiredNumPoints))
return alertSmpl, false
}
}
switch r.matchType() {
case ruletypes.AtleastOnce:
// If any sample matches the condition, the rule is firing.
if r.compareOp() == ruletypes.ValueIsAbove {
for _, smpl := range series.Points {
if smpl.Value > r.targetVal() {
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
}
}
} else if r.compareOp() == ruletypes.ValueIsBelow {
for _, smpl := range series.Points {
if smpl.Value < r.targetVal() {
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
}
}
} else if r.compareOp() == ruletypes.ValueIsEq {
for _, smpl := range series.Points {
if smpl.Value == r.targetVal() {
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
}
}
} else if r.compareOp() == ruletypes.ValueIsNotEq {
for _, smpl := range series.Points {
if smpl.Value != r.targetVal() {
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
}
}
} else if r.compareOp() == ruletypes.ValueOutsideBounds {
for _, smpl := range series.Points {
if math.Abs(smpl.Value) >= r.targetVal() {
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
}
}
}
case ruletypes.AllTheTimes:
// If all samples match the condition, the rule is firing.
shouldAlert = true
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: r.targetVal()}, Metric: lbls}
if r.compareOp() == ruletypes.ValueIsAbove {
for _, smpl := range series.Points {
if smpl.Value <= r.targetVal() {
shouldAlert = false
break
}
}
// use min value from the series
if shouldAlert {
var minValue float64 = math.Inf(1)
for _, smpl := range series.Points {
if smpl.Value < minValue {
minValue = smpl.Value
}
}
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: minValue}, Metric: lbls}
}
} else if r.compareOp() == ruletypes.ValueIsBelow {
for _, smpl := range series.Points {
if smpl.Value >= r.targetVal() {
shouldAlert = false
break
}
}
if shouldAlert {
var maxValue float64 = math.Inf(-1)
for _, smpl := range series.Points {
if smpl.Value > maxValue {
maxValue = smpl.Value
}
}
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: maxValue}, Metric: lbls}
}
} else if r.compareOp() == ruletypes.ValueIsEq {
for _, smpl := range series.Points {
if smpl.Value != r.targetVal() {
shouldAlert = false
break
}
}
} else if r.compareOp() == ruletypes.ValueIsNotEq {
for _, smpl := range series.Points {
if smpl.Value == r.targetVal() {
shouldAlert = false
break
}
}
// use any non-inf or nan value from the series
if shouldAlert {
for _, smpl := range series.Points {
if !math.IsInf(smpl.Value, 0) && !math.IsNaN(smpl.Value) {
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
break
}
}
}
} else if r.compareOp() == ruletypes.ValueOutsideBounds {
for _, smpl := range series.Points {
if math.Abs(smpl.Value) < r.targetVal() {
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
shouldAlert = false
break
}
}
}
case ruletypes.OnAverage:
// If the average of all samples matches the condition, the rule is firing.
var sum, count float64
for _, smpl := range series.Points {
if math.IsNaN(smpl.Value) || math.IsInf(smpl.Value, 0) {
continue
}
sum += smpl.Value
count++
}
avg := sum / count
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: avg}, Metric: lbls}
if r.compareOp() == ruletypes.ValueIsAbove {
if avg > r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsBelow {
if avg < r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsEq {
if avg == r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsNotEq {
if avg != r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueOutsideBounds {
if math.Abs(avg) >= r.targetVal() {
shouldAlert = true
}
}
case ruletypes.InTotal:
// If the sum of all samples matches the condition, the rule is firing.
var sum float64
for _, smpl := range series.Points {
if math.IsNaN(smpl.Value) || math.IsInf(smpl.Value, 0) {
continue
}
sum += smpl.Value
}
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: sum}, Metric: lbls}
if r.compareOp() == ruletypes.ValueIsAbove {
if sum > r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsBelow {
if sum < r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsEq {
if sum == r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsNotEq {
if sum != r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueOutsideBounds {
if math.Abs(sum) >= r.targetVal() {
shouldAlert = true
}
}
case ruletypes.Last:
// If the last sample matches the condition, the rule is firing.
shouldAlert = false
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: series.Points[len(series.Points)-1].Value}, Metric: lbls}
if r.compareOp() == ruletypes.ValueIsAbove {
if series.Points[len(series.Points)-1].Value > r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsBelow {
if series.Points[len(series.Points)-1].Value < r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsEq {
if series.Points[len(series.Points)-1].Value == r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsNotEq {
if series.Points[len(series.Points)-1].Value != r.targetVal() {
shouldAlert = true
}
}
}
return alertSmpl, shouldAlert
}
func (r *BaseRule) RecordRuleStateHistory(ctx context.Context, prevState, currentState model.AlertState, itemsToAdd []model.RuleStateHistory) error {
zap.L().Debug("recording rule state history", zap.String("ruleid", r.ID()), zap.Any("prevState", prevState), zap.Any("currentState", currentState), zap.Any("itemsToAdd", itemsToAdd))
revisedItemsToAdd := map[uint64]model.RuleStateHistory{}

View File

@ -1,6 +1,7 @@
package rules
import (
"github.com/stretchr/testify/require"
"testing"
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
@ -22,6 +23,15 @@ func TestBaseRule_RequireMinPoints(t *testing.T) {
RequireMinPoints: true,
RequiredNumPoints: 4,
},
Threshold: ruletypes.BasicRuleThresholds{
{
Name: "test-threshold",
TargetValue: &threshold,
CompareOp: ruletypes.ValueIsAbove,
MatchType: ruletypes.AtleastOnce,
},
},
},
series: &v3.Series{
Points: []v3.Point{
@ -41,6 +51,14 @@ func TestBaseRule_RequireMinPoints(t *testing.T) {
MatchType: ruletypes.AtleastOnce,
Target: &threshold,
},
Threshold: ruletypes.BasicRuleThresholds{
{
Name: "test-threshold",
TargetValue: &threshold,
CompareOp: ruletypes.ValueIsAbove,
MatchType: ruletypes.AtleastOnce,
},
},
},
series: &v3.Series{
Points: []v3.Point{
@ -56,10 +74,9 @@ func TestBaseRule_RequireMinPoints(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
_, shouldAlert := test.rule.ShouldAlert(*test.series)
if shouldAlert != test.shouldAlert {
t.Errorf("expected shouldAlert to be %v, got %v", test.shouldAlert, shouldAlert)
}
_, err := test.rule.Threshold.ShouldAlert(*test.series, "")
require.NoError(t, err)
require.Equal(t, len(test.series.Points) >= test.rule.ruleCondition.RequiredNumPoints, test.shouldAlert)
})
}
}

View File

@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
"log/slog"
"sort"
"strings"
@ -350,40 +351,36 @@ func (m *Manager) EditRule(ctx context.Context, ruleStr string, id valuer.UUID)
existingRule.Data = ruleStr
return m.ruleStore.EditRule(ctx, existingRule, func(ctx context.Context) error {
cfg, err := m.alertmanager.GetConfig(ctx, claims.OrgID)
if err != nil {
return err
}
var preferredChannels []string
if len(parsedRule.PreferredChannels) == 0 {
channels, err := m.alertmanager.ListChannels(ctx, claims.OrgID)
if err != nil {
return err
}
for _, channel := range channels {
preferredChannels = append(preferredChannels, channel.Name)
}
} else {
preferredChannels = parsedRule.PreferredChannels
}
err = cfg.UpdateRuleIDMatcher(id.StringValue(), preferredChannels)
if err != nil {
return err
}
if parsedRule.NotificationSettings != nil {
config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig()
err = m.alertmanager.SetNotificationConfig(ctx, orgID, existingRule.ID.StringValue(), &config)
err = m.alertmanager.SetNotificationConfig(ctx, orgID, id.StringValue(), &config)
if err != nil {
return err
}
if !parsedRule.NotificationSettings.UsePolicy {
request, err := parsedRule.GetRuleRouteRequest(id.StringValue())
if err != nil {
return err
}
err = m.alertmanager.UpdateAllRoutePoliciesByRuleId(ctx, id.StringValue(), request)
if err != nil {
return err
}
err = m.alertmanager.DeleteAllInhibitRulesByRuleId(ctx, orgID, id.StringValue())
if err != nil {
return err
}
err = m.alertmanager.SetConfig(ctx, cfg)
inhibitRules, err := parsedRule.GetInhibitRules(id.StringValue())
if err != nil {
return err
}
err = m.alertmanager.CreateInhibitRules(ctx, orgID, inhibitRules)
if err != nil {
return err
}
}
}
err = m.syncRuleStateWithTask(ctx, orgID, prepareTaskName(existingRule.ID.StringValue()), &parsedRule)
if err != nil {
return err
@ -488,6 +485,19 @@ func (m *Manager) DeleteRule(ctx context.Context, idStr string) error {
}
err = m.alertmanager.DeleteNotificationConfig(ctx, orgID, id.String())
if err != nil {
return err
}
err = m.alertmanager.DeleteAllRoutePoliciesByRuleId(ctx, id.String())
if err != nil {
return err
}
err = m.alertmanager.DeleteAllInhibitRulesByRuleId(ctx, orgID, id.String())
if err != nil {
return err
}
taskName := prepareTaskName(id.StringValue())
m.deleteTask(taskName)
@ -548,41 +558,30 @@ func (m *Manager) CreateRule(ctx context.Context, ruleStr string) (*ruletypes.Ge
}
id, err := m.ruleStore.CreateRule(ctx, storedRule, func(ctx context.Context, id valuer.UUID) error {
cfg, err := m.alertmanager.GetConfig(ctx, claims.OrgID)
if err != nil {
return err
}
var preferredChannels []string
if len(parsedRule.PreferredChannels) == 0 {
channels, err := m.alertmanager.ListChannels(ctx, claims.OrgID)
if err != nil {
return err
}
for _, channel := range channels {
preferredChannels = append(preferredChannels, channel.Name)
}
} else {
preferredChannels = parsedRule.PreferredChannels
}
if parsedRule.NotificationSettings != nil {
config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig()
err = m.alertmanager.SetNotificationConfig(ctx, orgID, storedRule.ID.StringValue(), &config)
err = m.alertmanager.SetNotificationConfig(ctx, orgID, id.StringValue(), &config)
if err != nil {
return err
}
if !parsedRule.NotificationSettings.UsePolicy {
request, err := parsedRule.GetRuleRouteRequest(id.StringValue())
if err != nil {
return err
}
_, err = m.alertmanager.CreateRoutePolicies(ctx, request)
if err != nil {
return err
}
inhibitRules, err := parsedRule.GetInhibitRules(id.StringValue())
if err != nil {
return err
}
err = m.alertmanager.CreateInhibitRules(ctx, orgID, inhibitRules)
if err != nil {
return err
}
}
err = cfg.CreateRuleIDMatcher(id.StringValue(), preferredChannels)
if err != nil {
return err
}
err = m.alertmanager.SetConfig(ctx, cfg)
if err != nil {
return err
}
taskName := prepareTaskName(id.StringValue())
@ -756,8 +755,9 @@ func (m *Manager) prepareTestNotifyFunc() NotifyFunc {
if len(alerts) == 0 {
return
}
alert := alerts[0]
ruleID := alerts[0].Labels.Map()[labels.AlertRuleIdLabel]
receiverMap := make(map[*alertmanagertypes.PostableAlert][]string)
for _, alert := range alerts {
generatorURL := alert.GeneratorURL
a := &alertmanagertypes.PostableAlert{}
@ -772,20 +772,13 @@ func (m *Manager) prepareTestNotifyFunc() NotifyFunc {
} else {
a.EndsAt = strfmt.DateTime(alert.ValidUntil)
}
if len(alert.Receivers) == 0 {
channels, err := m.alertmanager.ListChannels(ctx, orgID)
receiverMap[a] = alert.Receivers
}
err := m.alertmanager.TestAlert(ctx, orgID, ruleID, receiverMap)
if err != nil {
zap.L().Error("failed to list channels while sending test notification", zap.Error(err))
zap.L().Error("failed to send test notification", zap.Error(err))
return
}
for _, channel := range channels {
alert.Receivers = append(alert.Receivers, channel.Name)
}
}
m.alertmanager.TestAlert(ctx, orgID, a, alert.Receivers)
}
}
@ -983,6 +976,17 @@ func (m *Manager) TestNotification(ctx context.Context, orgID valuer.UUID, ruleS
if err != nil {
return 0, model.BadRequest(err)
}
if !parsedRule.NotificationSettings.UsePolicy {
parsedRule.NotificationSettings.GroupBy = append(parsedRule.NotificationSettings.GroupBy, ruletypes.LabelThresholdName)
}
config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig()
err = m.alertmanager.SetNotificationConfig(ctx, orgID, parsedRule.AlertName, &config)
if err != nil {
return 0, &model.ApiError{
Typ: model.ErrorBadData,
Err: err,
}
}
alertCount, apiErr := m.prepareTestRuleFunc(PrepareTestRuleOptions{
Rule: &parsedRule,

View File

@ -2,10 +2,15 @@ package rules
import (
"context"
"fmt"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification"
"github.com/prometheus/common/model"
"strings"
"testing"
"time"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfmanagertest"
"github.com/stretchr/testify/assert"
"go.uber.org/zap"
@ -32,19 +37,38 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
Email: "test@example.com",
Role: "admin",
}
manager, mockSQLRuleStore, orgId := setupTestManager(t)
manager, mockSQLRuleStore, mockRouteStore, nfmanager, orgId := setupTestManager(t)
claims.OrgID = orgId
testCases := []struct {
name string
originalData string
patchData string
Route []*alertmanagertypes.RoutePolicy
Config *alertmanagertypes.NotificationConfig
expectedResult func(*ruletypes.GettableRule) bool
expectError bool
description string
}{
{
name: "patch complete rule with task sync validation",
Route: []*alertmanagertypes.RoutePolicy{
{
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"test-alerts"},
Name: "{{.ruleId}}",
Enabled: true,
},
},
Config: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 4 * time.Hour,
NoDataInterval: 4 * time.Hour,
},
UsePolicy: false,
},
originalData: `{
"schemaVersion":"v1",
"alert": "test-original-alert",
@ -95,6 +119,23 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
},
{
name: "patch rule to disabled state",
Route: []*alertmanagertypes.RoutePolicy{
{
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"test-alerts"},
Name: "{{.ruleId}}",
Enabled: true,
},
},
Config: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 4 * time.Hour,
NoDataInterval: 4 * time.Hour,
},
UsePolicy: false,
},
originalData: `{
"schemaVersion":"v2",
"alert": "test-disable-alert",
@ -179,6 +220,20 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
OrgID: claims.OrgID,
}
// Update route expectations with actual rule ID
routesWithRuleID := make([]*alertmanagertypes.RoutePolicy, len(tc.Route))
for i, route := range tc.Route {
routesWithRuleID[i] = &alertmanagertypes.RoutePolicy{
Expression: strings.Replace(route.Expression, "{{.ruleId}}", ruleID.String(), -1),
ExpressionKind: route.ExpressionKind,
Channels: route.Channels,
Name: strings.Replace(route.Name, "{{.ruleId}}", ruleID.String(), -1),
Enabled: route.Enabled,
}
}
mockRouteStore.ExpectDeleteRouteByName(existingRule.OrgID, ruleID.String())
mockRouteStore.ExpectCreateBatch(routesWithRuleID)
mockSQLRuleStore.ExpectGetStoredRule(ruleID, existingRule)
mockSQLRuleStore.ExpectEditRule(existingRule)
@ -200,6 +255,12 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
assert.Nil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be removed for disabled rule")
} else {
syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second)
// Verify notification config
config, err := nfmanager.GetNotificationConfig(orgId, result.Id)
assert.NoError(t, err)
assert.Equal(t, tc.Config, config)
assert.True(t, syncCompleted, "Task synchronization should complete within timeout")
assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be created/updated for enabled rule")
assert.Greater(t, len(manager.Rules()), 0, "Rules should be updated in manager")
@ -234,7 +295,7 @@ func findTaskByName(tasks []Task, taskName string) Task {
return nil
}
func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore, string) {
func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore, *nfroutingstoretest.MockSQLRouteStore, nfmanager.NotificationManager, string) {
settings := instrumentationtest.New().ToProviderSettings()
testDB := utils.NewQueryServiceDBForTests(t)
@ -266,7 +327,11 @@ func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore,
t.Fatalf("Failed to create noop sharder: %v", err)
}
orgGetter := implorganization.NewGetter(implorganization.NewStore(testDB), noopSharder)
notificationManager := nfmanagertest.NewMock()
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
notificationManager, err := rulebasednotification.New(t.Context(), settings, nfmanager.Config{}, routeStore)
if err != nil {
t.Fatalf("Failed to create alert manager: %v", err)
}
alertManager, err := signozalertmanager.New(context.TODO(), settings, alertmanager.Config{Provider: "signoz", Signoz: alertmanager.Signoz{PollInterval: 10 * time.Second, Config: alertmanagerserver.NewConfig()}}, testDB, orgGetter, notificationManager)
if err != nil {
t.Fatalf("Failed to create alert manager: %v", err)
@ -290,21 +355,40 @@ func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore,
}
close(manager.block)
return manager, mockSQLRuleStore, testOrgID.StringValue()
return manager, mockSQLRuleStore, routeStore, notificationManager, testOrgID.StringValue()
}
func TestCreateRule(t *testing.T) {
claims := &authtypes.Claims{
Email: "test@example.com",
}
manager, mockSQLRuleStore, orgId := setupTestManager(t)
manager, mockSQLRuleStore, mockRouteStore, nfmanager, orgId := setupTestManager(t)
claims.OrgID = orgId
testCases := []struct {
name string
Route []*alertmanagertypes.RoutePolicy
Config *alertmanagertypes.NotificationConfig
ruleStr string
}{
{
name: "validate stored rule data structure",
Route: []*alertmanagertypes.RoutePolicy{
{
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"test-alerts"},
Name: "{{.ruleId}}",
Enabled: true,
},
},
Config: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 4 * time.Hour,
NoDataInterval: 4 * time.Hour,
},
UsePolicy: false,
},
ruleStr: `{
"alert": "cpu usage",
"ruleType": "threshold_rule",
@ -341,6 +425,30 @@ func TestCreateRule(t *testing.T) {
},
{
name: "create complete v2 rule with thresholds",
Route: []*alertmanagertypes.RoutePolicy{
{
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"critical\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"test-alerts"},
Name: "{{.ruleId}}",
Enabled: true,
},
{
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"test-alerts"},
Name: "{{.ruleId}}",
Enabled: true,
},
},
Config: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("k8s.node.name"): {}, model.LabelName("ruleId"): {}},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 10 * time.Minute,
NoDataInterval: 4 * time.Hour,
},
UsePolicy: false,
},
ruleStr: `{
"schemaVersion":"v2",
"state": "firing",
@ -398,6 +506,18 @@ func TestCreateRule(t *testing.T) {
"evalWindow": "6m",
"frequency": "1m"
}
},
"notificationSettings": {
"GroupBy": [
"k8s.node.name"
],
"renotify": {
"interval": "10m",
"enabled": true,
"alertStates": [
"firing"
]
}
},
"labels": {
"severity": "warning"
@ -429,6 +549,20 @@ func TestCreateRule(t *testing.T) {
},
OrgID: claims.OrgID,
}
// Update route expectations with actual rule ID
routesWithRuleID := make([]*alertmanagertypes.RoutePolicy, len(tc.Route))
for i, route := range tc.Route {
routesWithRuleID[i] = &alertmanagertypes.RoutePolicy{
Expression: strings.Replace(route.Expression, "{{.ruleId}}", rule.ID.String(), -1),
ExpressionKind: route.ExpressionKind,
Channels: route.Channels,
Name: strings.Replace(route.Name, "{{.ruleId}}", rule.ID.String(), -1),
Enabled: route.Enabled,
}
}
mockRouteStore.ExpectCreateBatch(routesWithRuleID)
mockSQLRuleStore.ExpectCreateRule(rule)
ctx := authtypes.NewContextWithClaims(context.Background(), *claims)
@ -441,6 +575,12 @@ func TestCreateRule(t *testing.T) {
// Wait for task creation with proper synchronization
taskName := prepareTaskName(result.Id)
syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second)
// Verify notification config
config, err := nfmanager.GetNotificationConfig(orgId, result.Id)
assert.NoError(t, err)
assert.Equal(t, tc.Config, config)
assert.True(t, syncCompleted, "Task creation should complete within timeout")
assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be created with correct name")
assert.Greater(t, len(manager.Rules()), 0, "Rules should be added to manager")
@ -455,14 +595,35 @@ func TestEditRule(t *testing.T) {
claims := &authtypes.Claims{
Email: "test@example.com",
}
manager, mockSQLRuleStore, orgId := setupTestManager(t)
manager, mockSQLRuleStore, mockRouteStore, nfmanager, orgId := setupTestManager(t)
claims.OrgID = orgId
testCases := []struct {
ruleID string
name string
Route []*alertmanagertypes.RoutePolicy
Config *alertmanagertypes.NotificationConfig
ruleStr string
}{
{
ruleID: "12345678-1234-1234-1234-123456789012",
name: "validate edit rule functionality",
Route: []*alertmanagertypes.RoutePolicy{
{
Expression: fmt.Sprintf("ruleId == \"rule1\" && threshold.name == \"critical\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"critical-alerts"},
Name: "12345678-1234-1234-1234-123456789012",
Enabled: true,
},
},
Config: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 4 * time.Hour,
NoDataInterval: 4 * time.Hour,
},
UsePolicy: false,
},
ruleStr: `{
"alert": "updated cpu usage",
"ruleType": "threshold_rule",
@ -498,7 +659,32 @@ func TestEditRule(t *testing.T) {
}`,
},
{
ruleID: "12345678-1234-1234-1234-123456789013",
name: "edit complete v2 rule with thresholds",
Route: []*alertmanagertypes.RoutePolicy{
{
Expression: fmt.Sprintf("ruleId == \"rule2\" && threshold.name == \"critical\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"test-alerts"},
Name: "12345678-1234-1234-1234-123456789013",
Enabled: true,
},
{
Expression: fmt.Sprintf("ruleId == \"rule2\" && threshold.name == \"warning\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"test-alerts"},
Name: "12345678-1234-1234-1234-123456789013",
Enabled: true,
},
},
Config: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}, model.LabelName("k8s.node.name"): {}},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 10 * time.Minute,
NoDataInterval: 4 * time.Hour,
},
UsePolicy: false,
},
ruleStr: `{
"schemaVersion":"v2",
"state": "firing",
@ -559,6 +745,18 @@ func TestEditRule(t *testing.T) {
},
"labels": {
"severity": "critical"
},
"notificationSettings": {
"GroupBy": [
"k8s.node.name"
],
"renotify": {
"interval": "10m",
"enabled": true,
"alertStates": [
"firing"
]
}
},
"annotations": {
"description": "This alert is fired when memory usage crosses the threshold",
@ -573,11 +771,13 @@ func TestEditRule(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
ruleID := valuer.GenerateUUID()
ruleId, err := valuer.NewUUID(tc.ruleID)
if err != nil {
t.Errorf("error creating ruleId: %s", err)
}
existingRule := &ruletypes.Rule{
Identifiable: types.Identifiable{
ID: ruleID,
ID: ruleId,
},
TimeAuditable: types.TimeAuditable{
CreatedAt: time.Now(),
@ -590,18 +790,24 @@ func TestEditRule(t *testing.T) {
Data: `{"alert": "original cpu usage", "disabled": false}`,
OrgID: claims.OrgID,
}
mockSQLRuleStore.ExpectGetStoredRule(ruleID, existingRule)
mockRouteStore.ExpectDeleteRouteByName(existingRule.OrgID, ruleId.String())
mockRouteStore.ExpectCreateBatch(tc.Route)
mockSQLRuleStore.ExpectGetStoredRule(ruleId, existingRule)
mockSQLRuleStore.ExpectEditRule(existingRule)
ctx := authtypes.NewContextWithClaims(context.Background(), *claims)
err := manager.EditRule(ctx, tc.ruleStr, ruleID)
err = manager.EditRule(ctx, tc.ruleStr, ruleId)
assert.NoError(t, err)
// Wait for task update with proper synchronization
taskName := prepareTaskName(ruleID.StringValue())
taskName := prepareTaskName(ruleId.String())
syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second)
config, err := nfmanager.GetNotificationConfig(orgId, ruleId.String())
assert.NoError(t, err)
assert.Equal(t, tc.Config, config)
assert.True(t, syncCompleted, "Task update should complete within timeout")
assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be updated with correct name")
assert.Greater(t, len(manager.Rules()), 0, "Rules should be updated in manager")

View File

@ -147,13 +147,19 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
var alerts = make(map[uint64]*ruletypes.Alert, len(res))
ruleReceivers := r.Threshold.GetRuleReceivers()
ruleReceiverMap := make(map[string][]string)
for _, value := range ruleReceivers {
ruleReceiverMap[value.Name] = value.Channels
}
for _, series := range res {
if len(series.Floats) == 0 {
continue
}
results, err := r.Threshold.ShouldAlert(toCommonSeries(series))
results, err := r.Threshold.ShouldAlert(toCommonSeries(series), r.Unit())
if err != nil {
return nil, err
}
@ -165,7 +171,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
}
r.logger.DebugContext(ctx, "alerting for series", "rule_name", r.Name(), "series", series)
threshold := valueFormatter.Format(r.targetVal(), r.Unit())
threshold := valueFormatter.Format(result.Target, result.TargetUnit)
tmplData := ruletypes.AlertTemplateData(l, valueFormatter.Format(result.V, r.Unit()), threshold)
// Inject some convenience variables that are easier to remember for users
@ -218,7 +224,6 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
r.lastError = err
return nil, err
}
alerts[h] = &ruletypes.Alert{
Labels: lbs,
QueryResultLables: resultLabels,
@ -227,13 +232,12 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
State: model.StatePending,
Value: result.V,
GeneratorURL: r.GeneratorURL(),
Receivers: r.preferredChannels,
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
}
}
}
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
// alerts[h] is ready, add or update active list now
for h, a := range alerts {
// Check whether we already have alerting state for the identifying label set.
@ -241,7 +245,9 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
alert.Value = a.Value
alert.Annotations = a.Annotations
alert.Receivers = r.preferredChannels
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
alert.Receivers = ruleReceiverMap[v]
}
continue
}

View File

@ -696,7 +696,7 @@ func TestPromRuleShouldAlert(t *testing.T) {
assert.NoError(t, err)
}
resultVectors, err := rule.Threshold.ShouldAlert(toCommonSeries(c.values))
resultVectors, err := rule.Threshold.ShouldAlert(toCommonSeries(c.values), rule.Unit())
assert.NoError(t, err)
// Compare full result vector with expected vector

View File

@ -38,7 +38,6 @@ func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError)
if parsedRule.RuleType == ruletypes.RuleTypeThreshold {
// add special labels for test alerts
parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target)
parsedRule.Labels[labels.RuleSourceLabel] = ""
parsedRule.Labels[labels.AlertRuleIdLabel] = ""

View File

@ -488,7 +488,7 @@ func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID,
continue
}
}
resultSeries, err := r.Threshold.ShouldAlert(*series)
resultSeries, err := r.Threshold.ShouldAlert(*series, r.Unit())
if err != nil {
return nil, err
}
@ -565,7 +565,7 @@ func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUI
continue
}
}
resultSeries, err := r.Threshold.ShouldAlert(*series)
resultSeries, err := r.Threshold.ShouldAlert(*series, r.Unit())
if err != nil {
return nil, err
}
@ -602,6 +602,12 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
resultFPs := map[uint64]struct{}{}
var alerts = make(map[uint64]*ruletypes.Alert, len(res))
ruleReceivers := r.Threshold.GetRuleReceivers()
ruleReceiverMap := make(map[string][]string)
for _, value := range ruleReceivers {
ruleReceiverMap[value.Name] = value.Channels
}
for _, smpl := range res {
l := make(map[string]string, len(smpl.Metric))
for _, lbl := range smpl.Metric {
@ -610,7 +616,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
value := valueFormatter.Format(smpl.V, r.Unit())
//todo(aniket): handle different threshold
threshold := valueFormatter.Format(r.targetVal(), r.Unit())
threshold := valueFormatter.Format(smpl.Target, smpl.TargetUnit)
r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold)
tmplData := ruletypes.AlertTemplateData(l, value, threshold)
@ -690,7 +696,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
State: model.StatePending,
Value: smpl.V,
GeneratorURL: r.GeneratorURL(),
Receivers: r.preferredChannels,
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
Missing: smpl.IsMissing,
}
}
@ -705,7 +711,9 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
alert.Value = a.Value
alert.Annotations = a.Annotations
alert.Receivers = r.preferredChannels
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
alert.Receivers = ruleReceiverMap[v]
}
continue
}

View File

@ -824,7 +824,7 @@ func TestThresholdRuleShouldAlert(t *testing.T) {
values.Points[i].Timestamp = time.Now().UnixMilli()
}
resultVectors, err := rule.Threshold.ShouldAlert(c.values)
resultVectors, err := rule.Threshold.ShouldAlert(c.values, rule.Unit())
assert.NoError(t, err, "Test case %d", idx)
// Compare result vectors with expected behavior
@ -1201,7 +1201,7 @@ func TestThresholdRuleLabelNormalization(t *testing.T) {
values.Points[i].Timestamp = time.Now().UnixMilli()
}
vector, err := rule.Threshold.ShouldAlert(c.values)
vector, err := rule.Threshold.ShouldAlert(c.values, rule.Unit())
assert.NoError(t, err)
for name, value := range c.values.Labels {
@ -1211,7 +1211,7 @@ func TestThresholdRuleLabelNormalization(t *testing.T) {
}
// Get result vectors from threshold evaluation
resultVectors, err := rule.Threshold.ShouldAlert(c.values)
resultVectors, err := rule.Threshold.ShouldAlert(c.values, rule.Unit())
assert.NoError(t, err, "Test case %d", idx)
// Compare result vectors with expected behavior
@ -1504,10 +1504,8 @@ func TestThresholdRuleUnitCombinations(t *testing.T) {
Name: postableRule.AlertName,
TargetValue: &c.target,
TargetUnit: c.targetUnit,
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
MatchType: ruletypes.MatchType(c.matchType),
CompareOp: ruletypes.CompareOp(c.compareOp),
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
},
},
}
@ -1614,10 +1612,8 @@ func TestThresholdRuleNoData(t *testing.T) {
{
Name: postableRule.AlertName,
TargetValue: &target,
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
MatchType: ruletypes.AtleastOnce,
CompareOp: ruletypes.ValueIsEq,
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
},
},
}
@ -1737,10 +1733,8 @@ func TestThresholdRuleTracesLink(t *testing.T) {
Name: postableRule.AlertName,
TargetValue: &c.target,
TargetUnit: c.targetUnit,
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
MatchType: ruletypes.MatchType(c.matchType),
CompareOp: ruletypes.CompareOp(c.compareOp),
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
},
},
}
@ -1876,10 +1870,8 @@ func TestThresholdRuleLogsLink(t *testing.T) {
Name: postableRule.AlertName,
TargetValue: &c.target,
TargetUnit: c.targetUnit,
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
MatchType: ruletypes.MatchType(c.matchType),
CompareOp: ruletypes.CompareOp(c.compareOp),
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
},
},
}
@ -2128,19 +2120,15 @@ func TestMultipleThresholdRule(t *testing.T) {
Name: "first_threshold",
TargetValue: &c.target,
TargetUnit: c.targetUnit,
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
MatchType: ruletypes.MatchType(c.matchType),
CompareOp: ruletypes.CompareOp(c.compareOp),
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
},
{
Name: "second_threshold",
TargetValue: &c.secondTarget,
TargetUnit: c.targetUnit,
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
MatchType: ruletypes.MatchType(c.matchType),
CompareOp: ruletypes.CompareOp(c.compareOp),
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
},
},
}

View File

@ -38,6 +38,7 @@ import (
"github.com/SigNoz/signoz/pkg/telemetrystore"
"github.com/SigNoz/signoz/pkg/telemetrystore/clickhousetelemetrystore"
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystorehook"
routeTypes "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/SigNoz/signoz/pkg/version"
"github.com/SigNoz/signoz/pkg/web"
"github.com/SigNoz/signoz/pkg/web/noopweb"
@ -133,6 +134,7 @@ func NewSQLMigrationProviderFactories(
sqlmigration.NewQueryBuilderV5MigrationFactory(sqlstore, telemetryStore),
sqlmigration.NewAddMeterQuickFiltersFactory(sqlstore, sqlschema),
sqlmigration.NewUpdateTTLSettingForCustomRetentionFactory(sqlstore, sqlschema),
sqlmigration.NewAddRoutePolicyFactory(sqlstore, sqlschema),
)
}
@ -155,9 +157,9 @@ func NewPrometheusProviderFactories(telemetryStore telemetrystore.TelemetryStore
)
}
func NewNotificationManagerProviderFactories() factory.NamedMap[factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config]] {
func NewNotificationManagerProviderFactories(routeStore routeTypes.RouteStore) factory.NamedMap[factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config]] {
return factory.MustNewNamedMap(
rulebasednotification.NewFactory(),
rulebasednotification.NewFactory(routeStore),
)
}

View File

@ -4,6 +4,7 @@ import (
"context"
"github.com/SigNoz/signoz/pkg/alertmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/sqlroutingstore"
"github.com/SigNoz/signoz/pkg/analytics"
"github.com/SigNoz/signoz/pkg/cache"
"github.com/SigNoz/signoz/pkg/emailing"
@ -230,12 +231,14 @@ func New(
// Initialize user getter
userGetter := impluser.NewGetter(impluser.NewStore(sqlstore, providerSettings))
// will need to create factory for all stores
routeStore := sqlroutingstore.NewStore(sqlstore)
// shared NotificationManager instance for both alertmanager and rules
notificationManager, err := factory.NewProviderFromNamedMap(
ctx,
providerSettings,
nfmanager.Config{},
NewNotificationManagerProviderFactories(),
NewNotificationManagerProviderFactories(routeStore),
"rulebased",
)
if err != nil {

View File

@ -0,0 +1,260 @@
package sqlmigration
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/sqlschema"
"github.com/SigNoz/signoz/pkg/sqlstore"
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/uptrace/bun"
"github.com/uptrace/bun/migrate"
"log/slog"
"time"
)
// Shared types for migration
type expressionRoute struct {
bun.BaseModel `bun:"table:route_policy"`
types.Identifiable
types.TimeAuditable
types.UserAuditable
Expression string `bun:"expression,type:text"`
ExpressionKind string `bun:"kind,type:text"`
Channels []string `bun:"channels,type:text"`
Name string `bun:"name,type:text"`
Description string `bun:"description,type:text"`
Enabled bool `bun:"enabled,type:boolean,default:true"`
Tags []string `bun:"tags,type:text"`
OrgID string `bun:"org_id,type:text"`
}
type rule struct {
bun.BaseModel `bun:"table:rule"`
types.Identifiable
types.TimeAuditable
types.UserAuditable
Deleted int `bun:"deleted,default:0"`
Data string `bun:"data,type:text"`
OrgID string `bun:"org_id,type:text"`
}
type addRoutePolicies struct {
sqlstore sqlstore.SQLStore
sqlschema sqlschema.SQLSchema
logger *slog.Logger
}
func NewAddRoutePolicyFactory(sqlstore sqlstore.SQLStore, sqlschema sqlschema.SQLSchema) factory.ProviderFactory[SQLMigration, Config] {
return factory.NewProviderFactory(factory.MustNewName("add_route_policy"), func(ctx context.Context, providerSettings factory.ProviderSettings, config Config) (SQLMigration, error) {
return newAddRoutePolicy(ctx, providerSettings, config, sqlstore, sqlschema)
})
}
func newAddRoutePolicy(_ context.Context, settings factory.ProviderSettings, _ Config, sqlstore sqlstore.SQLStore, sqlschema sqlschema.SQLSchema) (SQLMigration, error) {
return &addRoutePolicies{
sqlstore: sqlstore,
sqlschema: sqlschema,
logger: settings.Logger,
}, nil
}
func (migration *addRoutePolicies) Register(migrations *migrate.Migrations) error {
if err := migrations.Register(migration.Up, migration.Down); err != nil {
return err
}
return nil
}
func (migration *addRoutePolicies) Up(ctx context.Context, db *bun.DB) error {
_, _, err := migration.sqlschema.GetTable(ctx, sqlschema.TableName("route_policy"))
if err == nil {
return nil
}
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return err
}
defer func() {
_ = tx.Rollback()
}()
sqls := [][]byte{}
// Create the route_policy table
table := &sqlschema.Table{
Name: "route_policy",
Columns: []*sqlschema.Column{
{Name: "id", DataType: sqlschema.DataTypeText, Nullable: false},
{Name: "created_at", DataType: sqlschema.DataTypeTimestamp, Nullable: false},
{Name: "updated_at", DataType: sqlschema.DataTypeTimestamp, Nullable: false},
{Name: "created_by", DataType: sqlschema.DataTypeText, Nullable: false},
{Name: "updated_by", DataType: sqlschema.DataTypeText, Nullable: false},
{Name: "expression", DataType: sqlschema.DataTypeText, Nullable: false},
{Name: "kind", DataType: sqlschema.DataTypeText, Nullable: false},
{Name: "channels", DataType: sqlschema.DataTypeText, Nullable: false},
{Name: "name", DataType: sqlschema.DataTypeText, Nullable: false},
{Name: "description", DataType: sqlschema.DataTypeText, Nullable: true},
{Name: "enabled", DataType: sqlschema.DataTypeBoolean, Nullable: false, Default: "true"},
{Name: "tags", DataType: sqlschema.DataTypeText, Nullable: true},
{Name: "org_id", DataType: sqlschema.DataTypeText, Nullable: false},
},
PrimaryKeyConstraint: &sqlschema.PrimaryKeyConstraint{
ColumnNames: []sqlschema.ColumnName{"id"},
},
ForeignKeyConstraints: []*sqlschema.ForeignKeyConstraint{
{
ReferencingColumnName: "org_id",
ReferencedTableName: "organizations",
ReferencedColumnName: "id",
},
},
}
tableSQLs := migration.sqlschema.Operator().CreateTable(table)
sqls = append(sqls, tableSQLs...)
for _, sqlStmt := range sqls {
if _, err := tx.ExecContext(ctx, string(sqlStmt)); err != nil {
return err
}
}
err = migration.migrateRulesToRoutePolicies(ctx, tx)
if err != nil {
return err
}
if err := tx.Commit(); err != nil {
return err
}
return nil
}
func (migration *addRoutePolicies) migrateRulesToRoutePolicies(ctx context.Context, tx bun.Tx) error {
var rules []*rule
err := tx.NewSelect().
Model(&rules).
Where("deleted = ?", 0).
Scan(ctx)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil // No rules to migrate
}
return errors.NewInternalf(errors.CodeInternal, "failed to fetch rules")
}
channelsByOrg, err := migration.getAllChannels(ctx, tx)
if err != nil {
return errors.NewInternalf(errors.CodeInternal, "fetching channels error: %v", err)
}
var routesToInsert []*expressionRoute
routesToInsert, err = migration.convertRulesToRoutes(rules, channelsByOrg)
if err != nil {
return errors.NewInternalf(errors.CodeInternal, "converting rules to routes error: %v", err)
}
// Insert all routes in a single batch operation
if len(routesToInsert) > 0 {
_, err = tx.NewInsert().
Model(&routesToInsert).
Exec(ctx)
if err != nil {
return errors.NewInternalf(errors.CodeInternal, "failed to insert notification routes")
}
}
return nil
}
func (migration *addRoutePolicies) convertRulesToRoutes(rules []*rule, channelsByOrg map[string][]string) ([]*expressionRoute, error) {
var routes []*expressionRoute
for _, r := range rules {
var gettableRule ruletypes.GettableRule
if err := json.Unmarshal([]byte(r.Data), &gettableRule); err != nil {
return nil, errors.NewInternalf(errors.CodeInternal, "failed to unmarshal rule data for rule ID %s: %v", r.ID, err)
}
if len(gettableRule.PreferredChannels) == 0 {
channels, exists := channelsByOrg[r.OrgID]
if !exists || len(channels) == 0 {
continue
}
gettableRule.PreferredChannels = channels
}
severity := "critical"
if v, ok := gettableRule.Labels["severity"]; ok {
severity = v
}
expression := fmt.Sprintf(`%s == "%s" && %s == "%s"`, "threshold.name", severity, "ruleId", r.ID.String())
route := &expressionRoute{
Identifiable: types.Identifiable{
ID: valuer.GenerateUUID(),
},
TimeAuditable: types.TimeAuditable{
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
},
UserAuditable: types.UserAuditable{
CreatedBy: r.CreatedBy,
UpdatedBy: r.UpdatedBy,
},
Expression: expression,
ExpressionKind: "rule",
Channels: gettableRule.PreferredChannels,
Name: r.ID.StringValue(),
Enabled: true,
OrgID: r.OrgID,
}
routes = append(routes, route)
}
return routes, nil
}
func (migration *addRoutePolicies) getAllChannels(ctx context.Context, tx bun.Tx) (map[string][]string, error) {
type channel struct {
bun.BaseModel `bun:"table:notification_channel"`
types.Identifiable
types.TimeAuditable
Name string `json:"name" bun:"name"`
Type string `json:"type" bun:"type"`
Data string `json:"data" bun:"data"`
OrgID string `json:"org_id" bun:"org_id"`
}
var channels []*channel
err := tx.NewSelect().
Model(&channels).
Scan(ctx)
if err != nil {
return nil, errors.NewInternalf(errors.CodeInternal, "failed to fetch all channels")
}
// Group channels by org ID
channelsByOrg := make(map[string][]string)
for _, ch := range channels {
channelsByOrg[ch.OrgID] = append(channelsByOrg[ch.OrgID], ch.Name)
}
return channelsByOrg, nil
}
func (migration *addRoutePolicies) Down(ctx context.Context, db *bun.DB) error {
return nil
}

View File

@ -27,6 +27,8 @@ type (
// An alias for the Alert type from the alertmanager package.
Alert = types.Alert
AlertSlice = types.AlertSlice
PostableAlert = models.PostableAlert
PostableAlerts = models.PostableAlerts
@ -38,6 +40,10 @@ type (
GettableAlerts = models.GettableAlerts
)
const (
NoDataLabel = model.LabelName("nodata")
)
type DeprecatedGettableAlert struct {
*model.Alert
Status types.AlertStatus `json:"status"`
@ -307,3 +313,11 @@ func receiversMatchFilter(receivers []string, filter *regexp.Regexp) bool {
return false
}
func NoDataAlert(alert *types.Alert) bool {
if _, ok := alert.Labels[NoDataLabel]; ok {
return true
} else {
return false
}
}

View File

@ -21,6 +21,7 @@ import (
const (
DefaultReceiverName string = "default-receiver"
DefaultGroupBy string = "ruleId"
DefaultGroupByAll string = "__all__"
)
var (
@ -193,6 +194,20 @@ func (c *Config) SetRouteConfig(routeConfig RouteConfig) error {
return nil
}
func (c *Config) AddInhibitRules(rules []config.InhibitRule) error {
if c.alertmanagerConfig == nil {
return errors.New(errors.TypeInvalidInput, ErrCodeAlertmanagerConfigInvalid, "config is nil")
}
c.alertmanagerConfig.InhibitRules = append(c.alertmanagerConfig.InhibitRules, rules...)
c.storeableConfig.Config = string(newRawFromConfig(c.alertmanagerConfig))
c.storeableConfig.Hash = fmt.Sprintf("%x", newConfigHash(c.storeableConfig.Config))
c.storeableConfig.UpdatedAt = time.Now()
return nil
}
func (c *Config) AlertmanagerConfig() *config.Config {
return c.alertmanagerConfig
}
@ -304,6 +319,27 @@ func (c *Config) CreateRuleIDMatcher(ruleID string, receiverNames []string) erro
return nil
}
func (c *Config) DeleteRuleIDInhibitor(ruleID string) error {
if c.alertmanagerConfig.InhibitRules == nil {
return nil // already nil
}
var filteredRules []config.InhibitRule
for _, inhibitor := range c.alertmanagerConfig.InhibitRules {
sourceContainsRuleID := matcherContainsRuleID(inhibitor.SourceMatchers, ruleID)
targetContainsRuleID := matcherContainsRuleID(inhibitor.TargetMatchers, ruleID)
if !sourceContainsRuleID && !targetContainsRuleID {
filteredRules = append(filteredRules, inhibitor)
}
}
c.alertmanagerConfig.InhibitRules = filteredRules
c.storeableConfig.Config = string(newRawFromConfig(c.alertmanagerConfig))
c.storeableConfig.Hash = fmt.Sprintf("%x", newConfigHash(c.storeableConfig.Config))
c.storeableConfig.UpdatedAt = time.Now()
return nil
}
func (c *Config) UpdateRuleIDMatcher(ruleID string, receiverNames []string) error {
err := c.DeleteRuleIDMatcher(ruleID)
if err != nil {
@ -405,6 +441,8 @@ func init() {
type NotificationConfig struct {
NotificationGroup map[model.LabelName]struct{}
Renotify ReNotificationConfig
UsePolicy bool
GroupByAll bool
}
func (nc *NotificationConfig) DeepCopy() NotificationConfig {
@ -415,6 +453,7 @@ func (nc *NotificationConfig) DeepCopy() NotificationConfig {
for k, v := range nc.NotificationGroup {
deepCopy.NotificationGroup[k] = v
}
deepCopy.UsePolicy = nc.UsePolicy
return deepCopy
}
@ -423,7 +462,7 @@ type ReNotificationConfig struct {
RenotifyInterval time.Duration
}
func NewNotificationConfig(groups []string, renotifyInterval time.Duration, noDataRenotifyInterval time.Duration) NotificationConfig {
func NewNotificationConfig(groups []string, renotifyInterval time.Duration, noDataRenotifyInterval time.Duration, policy bool) NotificationConfig {
notificationConfig := GetDefaultNotificationConfig()
if renotifyInterval != 0 {
@ -435,7 +474,12 @@ func NewNotificationConfig(groups []string, renotifyInterval time.Duration, noDa
}
for _, group := range groups {
notificationConfig.NotificationGroup[model.LabelName(group)] = struct{}{}
if group == DefaultGroupByAll {
notificationConfig.GroupByAll = true
}
}
notificationConfig.UsePolicy = policy
return notificationConfig
}

View File

@ -0,0 +1,139 @@
package alertmanagertypes
import (
"context"
"github.com/expr-lang/expr"
"time"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/uptrace/bun"
)
type PostableRoutePolicy struct {
Expression string `json:"expression"`
ExpressionKind ExpressionKind `json:"kind"`
Channels []string `json:"channels"`
Name string `json:"name"`
Description string `json:"description"`
Tags []string `json:"tags,omitempty"`
}
func (p *PostableRoutePolicy) Validate() error {
if p.Expression == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "expression is required")
}
if p.Name == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "name is required")
}
if len(p.Channels) == 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "at least one channel is required")
}
// Validate channels are not empty
for i, channel := range p.Channels {
if channel == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "channel at index %d cannot be empty", i)
}
}
if p.ExpressionKind != PolicyBasedExpression && p.ExpressionKind != RuleBasedExpression {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported expression kind: %s", p.ExpressionKind.StringValue())
}
_, err := expr.Compile(p.Expression)
if err != nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid expression syntax: %v", err)
}
return nil
}
type GettableRoutePolicy struct {
PostableRoutePolicy // Embedded
ID string `json:"id"`
// Audit fields
CreatedAt *time.Time `json:"createdAt"`
UpdatedAt *time.Time `json:"updatedAt"`
CreatedBy *string `json:"createdBy"`
UpdatedBy *string `json:"updatedBy"`
}
type ExpressionKind struct {
valuer.String
}
var (
RuleBasedExpression = ExpressionKind{valuer.NewString("rule")}
PolicyBasedExpression = ExpressionKind{valuer.NewString("policy")}
)
// RoutePolicy represents the database model for expression routes
type RoutePolicy struct {
bun.BaseModel `bun:"table:route_policy"`
types.Identifiable
types.TimeAuditable
types.UserAuditable
Expression string `bun:"expression,type:text,notnull" json:"expression"`
ExpressionKind ExpressionKind `bun:"kind,type:text" json:"kind"`
Channels []string `bun:"channels,type:jsonb" json:"channels"`
Name string `bun:"name,type:text" json:"name"`
Description string `bun:"description,type:text" json:"description"`
Enabled bool `bun:"enabled,type:boolean,default:true" json:"enabled"`
Tags []string `bun:"tags,type:jsonb" json:"tags,omitempty"`
OrgID string `bun:"org_id,type:text,notnull" json:"orgId"`
}
func (er *RoutePolicy) Validate() error {
if er == nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route_policy cannot be nil")
}
if er.Expression == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "expression is required")
}
if er.Name == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "name is required")
}
if er.OrgID == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "organization ID is required")
}
if len(er.Channels) == 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "at least one channel is required")
}
// Validate channels are not empty
for i, channel := range er.Channels {
if channel == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "channel at index %d cannot be empty", i)
}
}
if er.ExpressionKind != PolicyBasedExpression && er.ExpressionKind != RuleBasedExpression {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported expression kind: %s", er.ExpressionKind.StringValue())
}
return nil
}
type RouteStore interface {
GetByID(ctx context.Context, orgId string, id string) (*RoutePolicy, error)
Create(ctx context.Context, route *RoutePolicy) error
CreateBatch(ctx context.Context, routes []*RoutePolicy) error
Delete(ctx context.Context, orgId string, id string) error
GetAllByKind(ctx context.Context, orgID string, kind ExpressionKind) ([]*RoutePolicy, error)
GetAllByName(ctx context.Context, orgID string, name string) ([]*RoutePolicy, error)
DeleteRouteByName(ctx context.Context, orgID string, name string) error
}

View File

@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/prometheus/common/model"
"log/slog"
"time"
@ -49,9 +50,9 @@ func NewReceiver(input string) (Receiver, error) {
return receiverWithDefaults, nil
}
func TestReceiver(ctx context.Context, receiver Receiver, receiverIntegrationsFunc ReceiverIntegrationsFunc, config *Config, tmpl *template.Template, logger *slog.Logger, alert *Alert) error {
ctx = notify.WithGroupKey(ctx, fmt.Sprintf("%s-%s-%d", receiver.Name, alert.Labels.Fingerprint(), time.Now().Unix()))
ctx = notify.WithGroupLabels(ctx, alert.Labels)
func TestReceiver(ctx context.Context, receiver Receiver, receiverIntegrationsFunc ReceiverIntegrationsFunc, config *Config, tmpl *template.Template, logger *slog.Logger, lSet model.LabelSet, alert ...*Alert) error {
ctx = notify.WithGroupKey(ctx, fmt.Sprintf("%s-%s-%d", receiver.Name, lSet.Fingerprint(), time.Now().Unix()))
ctx = notify.WithGroupLabels(ctx, lSet)
ctx = notify.WithReceiverName(ctx, receiver.Name)
// We need to create a new config with the same global and route config but empty receivers and routes
@ -80,7 +81,7 @@ func TestReceiver(ctx context.Context, receiver Receiver, receiverIntegrationsFu
return errors.Newf(errors.TypeNotFound, errors.CodeNotFound, "no integrations found for receiver %s", receiver.Name)
}
if _, err = integrations[0].Notify(ctx, alert); err != nil {
if _, err = integrations[0].Notify(ctx, alert...); err != nil {
return err
}

View File

@ -15,6 +15,8 @@ import (
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/prometheus/alertmanager/config"
)
type AlertType string
@ -65,21 +67,95 @@ type PostableRule struct {
}
type NotificationSettings struct {
NotificationGroupBy []string `json:"notificationGroupBy,omitempty"`
ReNotifyInterval Duration `json:"renotify,omitempty"`
GroupBy []string `json:"groupBy,omitempty"`
Renotify Renotify `json:"renotify,omitempty"`
UsePolicy bool `json:"usePolicy,omitempty"`
}
type Renotify struct {
Enabled bool `json:"enabled"`
ReNotifyInterval Duration `json:"interval,omitempty"`
AlertStates []model.AlertState `json:"alertStates,omitempty"`
}
func (ns *NotificationSettings) GetAlertManagerNotificationConfig() alertmanagertypes.NotificationConfig {
var renotifyInterval Duration
var noDataRenotifyInterval Duration
if slices.Contains(ns.AlertStates, model.StateNoData) {
noDataRenotifyInterval = ns.ReNotifyInterval
var renotifyInterval time.Duration
var noDataRenotifyInterval time.Duration
if ns.Renotify.Enabled {
if slices.Contains(ns.Renotify.AlertStates, model.StateNoData) {
noDataRenotifyInterval = time.Duration(ns.Renotify.ReNotifyInterval)
}
if slices.Contains(ns.AlertStates, model.StateFiring) {
renotifyInterval = ns.ReNotifyInterval
if slices.Contains(ns.Renotify.AlertStates, model.StateFiring) {
renotifyInterval = time.Duration(ns.Renotify.ReNotifyInterval)
}
return alertmanagertypes.NewNotificationConfig(ns.NotificationGroupBy, time.Duration(renotifyInterval), time.Duration(noDataRenotifyInterval))
} else {
renotifyInterval = 8760 * time.Hour //1 year for no renotify substitute
noDataRenotifyInterval = 8760 * time.Hour
}
return alertmanagertypes.NewNotificationConfig(ns.GroupBy, renotifyInterval, noDataRenotifyInterval, ns.UsePolicy)
}
func (r *PostableRule) GetRuleRouteRequest(ruleId string) ([]*alertmanagertypes.PostableRoutePolicy, error) {
threshold, err := r.RuleCondition.Thresholds.GetRuleThreshold()
if err != nil {
return nil, err
}
receivers := threshold.GetRuleReceivers()
routeRequests := make([]*alertmanagertypes.PostableRoutePolicy, 0)
for _, receiver := range receivers {
expression := fmt.Sprintf(`%s == "%s" && %s == "%s"`, LabelThresholdName, receiver.Name, LabelRuleId, ruleId)
routeRequests = append(routeRequests, &alertmanagertypes.PostableRoutePolicy{
Expression: expression,
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: receiver.Channels,
Name: ruleId,
Description: fmt.Sprintf("Auto-generated route for rule %s", ruleId),
Tags: []string{"auto-generated", "rule-based"},
})
}
return routeRequests, nil
}
func (r *PostableRule) GetInhibitRules(ruleId string) ([]config.InhibitRule, error) {
threshold, err := r.RuleCondition.Thresholds.GetRuleThreshold()
if err != nil {
return nil, err
}
var groups []string
if r.NotificationSettings != nil {
for k := range r.NotificationSettings.GetAlertManagerNotificationConfig().NotificationGroup {
groups = append(groups, string(k))
}
}
receivers := threshold.GetRuleReceivers()
var inhibitRules []config.InhibitRule
for i := 0; i < len(receivers)-1; i++ {
rule := config.InhibitRule{
SourceMatchers: config.Matchers{
{
Name: LabelThresholdName,
Value: receivers[i].Name,
},
{
Name: LabelRuleId,
Value: ruleId,
},
},
TargetMatchers: config.Matchers{
{
Name: LabelThresholdName,
Value: receivers[i+1].Name,
},
{
Name: LabelRuleId,
Value: ruleId,
},
},
Equal: groups,
}
inhibitRules = append(inhibitRules, rule)
}
return inhibitRules, nil
}
func (ns *NotificationSettings) UnmarshalJSON(data []byte) error {
@ -95,7 +171,7 @@ func (ns *NotificationSettings) UnmarshalJSON(data []byte) error {
}
// Validate states after unmarshaling
for _, state := range ns.AlertStates {
for _, state := range ns.Renotify.AlertStates {
if state != model.StateFiring && state != model.StateNoData {
return fmt.Errorf("invalid alert state: %s", state)
}
@ -143,15 +219,25 @@ func (r *PostableRule) processRuleDefaults() error {
Kind: BasicThresholdKind,
Spec: BasicRuleThresholds{{
Name: thresholdName,
RuleUnit: r.RuleCondition.CompositeQuery.Unit,
TargetUnit: r.RuleCondition.TargetUnit,
TargetValue: r.RuleCondition.Target,
MatchType: r.RuleCondition.MatchType,
CompareOp: r.RuleCondition.CompareOp,
Channels: r.PreferredChannels,
}},
}
r.RuleCondition.Thresholds = &thresholdData
r.Evaluation = &EvaluationEnvelope{RollingEvaluation, RollingWindow{EvalWindow: r.EvalWindow, Frequency: r.Frequency}}
r.NotificationSettings = &NotificationSettings{
Renotify: Renotify{
Enabled: true,
ReNotifyInterval: Duration(4 * time.Hour),
AlertStates: []model.AlertState{model.StateFiring},
},
}
if r.RuleCondition.AlertOnAbsent {
r.NotificationSettings.Renotify.AlertStates = append(r.NotificationSettings.Renotify.AlertStates, model.StateNoData)
}
}
}
@ -170,6 +256,7 @@ func (r *PostableRule) MarshalJSON() ([]byte, error) {
}
aux.Evaluation = nil
aux.SchemaVersion = ""
aux.NotificationSettings = nil
return json.Marshal(aux)
default:
copyStruct := *r
@ -192,7 +279,7 @@ func isValidLabelName(ln string) bool {
return false
}
for i, b := range ln {
if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || (b >= '0' && b <= '9' && i > 0)) {
if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == '.' || (b >= '0' && b <= '9' && i > 0)) {
return false
}
}
@ -347,6 +434,7 @@ func (g *GettableRule) MarshalJSON() ([]byte, error) {
}
aux.Evaluation = nil
aux.SchemaVersion = ""
aux.NotificationSettings = nil
return json.Marshal(aux)
default:
copyStruct := *g

View File

@ -2,10 +2,11 @@ package ruletypes
import (
"encoding/json"
"github.com/stretchr/testify/assert"
"testing"
"time"
"github.com/stretchr/testify/assert"
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
)
@ -303,10 +304,6 @@ func TestParseIntoRuleSchemaVersioning(t *testing.T) {
t.Errorf("Expected threshold name 'warning' from severity label, got '%s'", spec.Name)
}
// Verify all fields are copied from RuleCondition
if spec.RuleUnit != "percent" {
t.Errorf("Expected RuleUnit 'percent', got '%s'", spec.RuleUnit)
}
if spec.TargetUnit != "%" {
t.Errorf("Expected TargetUnit '%%', got '%s'", spec.TargetUnit)
}
@ -455,9 +452,6 @@ func TestParseIntoRuleSchemaVersioning(t *testing.T) {
if spec.TargetUnit != "%" {
t.Errorf("Expected TargetUnit '%%' (overwritten), got '%s'", spec.TargetUnit)
}
if spec.RuleUnit != "percent" {
t.Errorf("Expected RuleUnit 'percent' (overwritten), got '%s'", spec.RuleUnit)
}
if rule.Evaluation == nil {
t.Fatal("Expected Evaluation to be populated")
@ -630,9 +624,9 @@ func TestParseIntoRuleThresholdGeneration(t *testing.T) {
vector, err := threshold.ShouldAlert(v3.Series{
Points: []v3.Point{{Value: 0.15, Timestamp: 1000}}, // 150ms in seconds
Labels: map[string]string{"test": "label"},
})
}, "")
if err != nil {
t.Fatalf("Unexpected error in ShouldAlert: %v", err)
t.Fatalf("Unexpected error in shouldAlert: %v", err)
}
if len(vector) == 0 {
@ -707,9 +701,9 @@ func TestParseIntoRuleMultipleThresholds(t *testing.T) {
vector, err := threshold.ShouldAlert(v3.Series{
Points: []v3.Point{{Value: 95.0, Timestamp: 1000}}, // 95% CPU usage
Labels: map[string]string{"service": "test"},
})
}, "")
if err != nil {
t.Fatalf("Unexpected error in ShouldAlert: %v", err)
t.Fatalf("Unexpected error in shouldAlert: %v", err)
}
assert.Equal(t, 2, len(vector))
@ -717,9 +711,9 @@ func TestParseIntoRuleMultipleThresholds(t *testing.T) {
vector, err = threshold.ShouldAlert(v3.Series{
Points: []v3.Point{{Value: 75.0, Timestamp: 1000}}, // 75% CPU usage
Labels: map[string]string{"service": "test"},
})
}, "")
if err != nil {
t.Fatalf("Unexpected error in ShouldAlert: %v", err)
t.Fatalf("Unexpected error in shouldAlert: %v", err)
}
assert.Equal(t, 1, len(vector))

View File

@ -2,3 +2,4 @@ package ruletypes
const CriticalThresholdName = "CRITICAL"
const LabelThresholdName = "threshold.name"
const LabelRuleId = "ruleId"

View File

@ -18,6 +18,10 @@ type Sample struct {
Metric labels.Labels
IsMissing bool
Target float64
TargetUnit string
}
func (s Sample) String() string {

View File

@ -51,23 +51,41 @@ func (r *RuleThresholdData) UnmarshalJSON(data []byte) error {
return nil
}
type RuleReceivers struct {
Channels []string `json:"channels"`
Name string `json:"name"`
}
type RuleThreshold interface {
ShouldAlert(series v3.Series) (Vector, error)
ShouldAlert(series v3.Series, unit string) (Vector, error)
GetRuleReceivers() []RuleReceivers
}
type BasicRuleThreshold struct {
Name string `json:"name"`
TargetValue *float64 `json:"target"`
TargetUnit string `json:"targetUnit"`
RuleUnit string `json:"ruleUnit"`
RecoveryTarget *float64 `json:"recoveryTarget"`
MatchType MatchType `json:"matchType"`
CompareOp CompareOp `json:"op"`
SelectedQuery string `json:"selectedQuery"`
Channels []string `json:"channels"`
}
type BasicRuleThresholds []BasicRuleThreshold
func (r BasicRuleThresholds) GetRuleReceivers() []RuleReceivers {
thresholds := []BasicRuleThreshold(r)
var receiverRoutes []RuleReceivers
sortThresholds(thresholds)
for _, threshold := range thresholds {
receiverRoutes = append(receiverRoutes, RuleReceivers{
Name: threshold.Name,
Channels: threshold.Channels,
})
}
return receiverRoutes
}
func (r BasicRuleThresholds) Validate() error {
var errs []error
for _, basicThreshold := range r {
@ -78,13 +96,27 @@ func (r BasicRuleThresholds) Validate() error {
return errors.Join(errs...)
}
func (r BasicRuleThresholds) ShouldAlert(series v3.Series) (Vector, error) {
func (r BasicRuleThresholds) ShouldAlert(series v3.Series, unit string) (Vector, error) {
var resultVector Vector
thresholds := []BasicRuleThreshold(r)
sortThresholds(thresholds)
for _, threshold := range thresholds {
smpl, shouldAlert := threshold.shouldAlert(series, unit)
if shouldAlert {
smpl.Target = threshold.target(unit)
smpl.TargetUnit = threshold.TargetUnit
resultVector = append(resultVector, smpl)
}
}
return resultVector, nil
}
func sortThresholds(thresholds []BasicRuleThreshold) {
sort.Slice(thresholds, func(i, j int) bool {
compareOp := thresholds[i].GetCompareOp()
targetI := thresholds[i].Target()
targetJ := thresholds[j].Target()
compareOp := thresholds[i].getCompareOp()
targetI := thresholds[i].target(thresholds[i].TargetUnit) //for sorting we dont need rule unit
targetJ := thresholds[j].target(thresholds[j].TargetUnit)
switch compareOp {
case ValueIsAbove, ValueAboveOrEq, ValueOutsideBounds:
@ -98,49 +130,22 @@ func (r BasicRuleThresholds) ShouldAlert(series v3.Series) (Vector, error) {
return targetI > targetJ
}
})
for _, threshold := range thresholds {
smpl, shouldAlert := threshold.ShouldAlert(series)
if shouldAlert {
resultVector = append(resultVector, smpl)
}
}
return resultVector, nil
}
func (b BasicRuleThreshold) GetName() string {
return b.Name
}
func (b BasicRuleThreshold) Target() float64 {
func (b BasicRuleThreshold) target(ruleUnit string) float64 {
unitConverter := converter.FromUnit(converter.Unit(b.TargetUnit))
// convert the target value to the y-axis unit
value := unitConverter.Convert(converter.Value{
F: *b.TargetValue,
U: converter.Unit(b.TargetUnit),
}, converter.Unit(b.RuleUnit))
}, converter.Unit(ruleUnit))
return value.F
}
func (b BasicRuleThreshold) GetRecoveryTarget() float64 {
if b.RecoveryTarget == nil {
return 0
} else {
return *b.RecoveryTarget
}
}
func (b BasicRuleThreshold) GetMatchType() MatchType {
return b.MatchType
}
func (b BasicRuleThreshold) GetCompareOp() CompareOp {
func (b BasicRuleThreshold) getCompareOp() CompareOp {
return b.CompareOp
}
func (b BasicRuleThreshold) GetSelectedQuery() string {
return b.SelectedQuery
}
func (b BasicRuleThreshold) Validate() error {
var errs []error
if b.Name == "" {
@ -182,7 +187,7 @@ func removeGroupinSetPoints(series v3.Series) []v3.Point {
return result
}
func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
func (b BasicRuleThreshold) shouldAlert(series v3.Series, ruleUnit string) (Sample, bool) {
var shouldAlert bool
var alertSmpl Sample
var lbls labels.Labels
@ -191,6 +196,8 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
lbls = append(lbls, labels.Label{Name: name, Value: value})
}
target := b.target(ruleUnit)
lbls = append(lbls, labels.Label{Name: LabelThresholdName, Value: b.Name})
series.Points = removeGroupinSetPoints(series)
@ -205,7 +212,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
// If any sample matches the condition, the rule is firing.
if b.CompareOp == ValueIsAbove {
for _, smpl := range series.Points {
if smpl.Value > b.Target() {
if smpl.Value > target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
@ -213,7 +220,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
} else if b.CompareOp == ValueIsBelow {
for _, smpl := range series.Points {
if smpl.Value < b.Target() {
if smpl.Value < target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
@ -221,7 +228,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
} else if b.CompareOp == ValueIsEq {
for _, smpl := range series.Points {
if smpl.Value == b.Target() {
if smpl.Value == target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
@ -229,7 +236,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
} else if b.CompareOp == ValueIsNotEq {
for _, smpl := range series.Points {
if smpl.Value != b.Target() {
if smpl.Value != target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
@ -237,7 +244,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
} else if b.CompareOp == ValueOutsideBounds {
for _, smpl := range series.Points {
if math.Abs(smpl.Value) >= b.Target() {
if math.Abs(smpl.Value) >= target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
@ -247,10 +254,10 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
case AllTheTimes:
// If all samples match the condition, the rule is firing.
shouldAlert = true
alertSmpl = Sample{Point: Point{V: b.Target()}, Metric: lbls}
alertSmpl = Sample{Point: Point{V: target}, Metric: lbls}
if b.CompareOp == ValueIsAbove {
for _, smpl := range series.Points {
if smpl.Value <= b.Target() {
if smpl.Value <= target {
shouldAlert = false
break
}
@ -267,7 +274,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
} else if b.CompareOp == ValueIsBelow {
for _, smpl := range series.Points {
if smpl.Value >= b.Target() {
if smpl.Value >= target {
shouldAlert = false
break
}
@ -283,14 +290,14 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
} else if b.CompareOp == ValueIsEq {
for _, smpl := range series.Points {
if smpl.Value != b.Target() {
if smpl.Value != target {
shouldAlert = false
break
}
}
} else if b.CompareOp == ValueIsNotEq {
for _, smpl := range series.Points {
if smpl.Value == b.Target() {
if smpl.Value == target {
shouldAlert = false
break
}
@ -306,7 +313,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
} else if b.CompareOp == ValueOutsideBounds {
for _, smpl := range series.Points {
if math.Abs(smpl.Value) < b.Target() {
if math.Abs(smpl.Value) < target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = false
break
@ -326,23 +333,23 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
avg := sum / count
alertSmpl = Sample{Point: Point{V: avg}, Metric: lbls}
if b.CompareOp == ValueIsAbove {
if avg > b.Target() {
if avg > target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsBelow {
if avg < b.Target() {
if avg < target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsEq {
if avg == b.Target() {
if avg == target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsNotEq {
if avg != b.Target() {
if avg != target {
shouldAlert = true
}
} else if b.CompareOp == ValueOutsideBounds {
if math.Abs(avg) >= b.Target() {
if math.Abs(avg) >= target {
shouldAlert = true
}
}
@ -358,23 +365,23 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
}
alertSmpl = Sample{Point: Point{V: sum}, Metric: lbls}
if b.CompareOp == ValueIsAbove {
if sum > b.Target() {
if sum > target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsBelow {
if sum < b.Target() {
if sum < target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsEq {
if sum == b.Target() {
if sum == target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsNotEq {
if sum != b.Target() {
if sum != target {
shouldAlert = true
}
} else if b.CompareOp == ValueOutsideBounds {
if math.Abs(sum) >= b.Target() {
if math.Abs(sum) >= target {
shouldAlert = true
}
}
@ -383,19 +390,19 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
shouldAlert = false
alertSmpl = Sample{Point: Point{V: series.Points[len(series.Points)-1].Value}, Metric: lbls}
if b.CompareOp == ValueIsAbove {
if series.Points[len(series.Points)-1].Value > b.Target() {
if series.Points[len(series.Points)-1].Value > target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsBelow {
if series.Points[len(series.Points)-1].Value < b.Target() {
if series.Points[len(series.Points)-1].Value < target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsEq {
if series.Points[len(series.Points)-1].Value == b.Target() {
if series.Points[len(series.Points)-1].Value == target {
shouldAlert = true
}
} else if b.CompareOp == ValueIsNotEq {
if series.Points[len(series.Points)-1].Value != b.Target() {
if series.Points[len(series.Points)-1].Value != target {
shouldAlert = true
}
}