chore: notification routing | added notificaiton routing via expression based routes (#9195)

* chore: added custom distpatcher

* feat(notification-grouping): added notification grouping

* feat(notification-grouping): addded integration test dependency

* feat(notification-grouping): linting and test cases

* feat(notification-grouping): linting and test cases

* feat(notification-grouping): linting and test cases

* feat(notification-grouping): addded integration test dependency

* feat(notification-grouping): debug log lines

* feat(notification-grouping): debug log lines

* feat(notification-grouping): debug log lines

* feat(notification-grouping): addded integration test dependency

* feat(notification-grouping): addded integration test dependency

* feat(notification-grouping): addded integration test dependency

* feat(notification-grouping): added structure changes

* feat(notification-grouping): added structure changes

* feat(notification-routing): added notification routing

* chore(notification-grouping): added notificaiton grouping

* Update pkg/alertmanager/nfmanager/rulebasednotification/provider.go

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>

* chore(notification-grouping): added renotification interval

* fix(notification-grouping): added fix for renotification

* chore(notificaiton-grouping): added no data renotify

* chore(notificaiton-grouping): added no data renotify

* chore(notificaiton-grouping): added no data renotify

* chore(notification-grouping): added no data renotify interval

* chore(notification-grouping): removed errors package from dispatcher

* chore(notification-grouping): removed errors package from dispatcher

* chore(notification-grouping): removed unwanted tests

* chore(notification-grouping): removed unwanted pkg name

* chore(notification-grouping): added delete notification setting

* chore(notification-grouping): added delete notification setting

* Update pkg/alertmanager/nfmanager/nfmanagertest/provider.go

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>

* chore(notification-grouping): removed nfmanager config| notification settings in postable rule

* chore(notification-grouping): removed nfmanager config| notification settings in postable rule

* chore(notification-grouping): added test for dispatcher

* chore(notification-grouping): added test for dispatcher

* chore(notification-grouping): go linting errors

* chore(notification-grouping): added test cases for aggGroupPerRoute

* chore(notification-grouping): added test cases for aggGroupPerRoute

* chore(notification-grouping): corrected get notification config logic

* Update pkg/alertmanager/nfmanager/rulebasednotification/provider_test.go

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>

* chore(notification-routing): added notification routing policies

* feat(notification-routing): added test cases for dispatcher

* chore(notification-routing): added notification routing policies

* chore(notification-routing): added notification routing policies

* Apply suggestions from code review

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>

* chore(notification-routing): added notification routing policies

* chore(notification-routing): added notification routing policies

* Update pkg/alertmanager/alertmanagerserver/distpatcher_test.go

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>

* chore(notification-routing): sorted imports

* chore(notification-routing): minor edit |pr resolve comments

* chore(notification-grouping): corrected dispatcher test cases

* chore(notification-routing): added notification routing policies

* chore(notification-routing): corrected race condition in test

* chore: resolved pr comments

* chore: passing threshold value to tempalte

* chore: completed delete rule functionality

* chore: added grouping disabled functionality

* chore: added grouping disabled functionality

* chore(notification-routing): resolved pr comments

* chore(notification-routing): resolved pr comments

* chore(notification-routing): resolved pr comments

* chore(notification-routing): sorted imports

* chore(notification-routing): fix linting errors

* chore(notification-routing): removed enabled flags

* fix: test rule multiple threhsold (#9224)

* chore: corrected linting errors

* chore: corrected linting errors

* chore: corrected linting errors

* chore: corrected linting errors

* chore: corrected migration errors

* chore: corrected migration errors

* chore: corrected migration errors

* chore: corrected migration errors

* Update pkg/sqlmigration/049_add_route_policy.go

Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>

* chore: added org_is as foreign key

* chore: resolved pr comments

* chore: removed route store unused

---------

Co-authored-by: Srikanth Chekuri <srikanth.chekuri92@gmail.com>
Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>
This commit is contained in:
aniketio-ctrl 2025-10-03 19:47:15 +05:30 committed by GitHub
parent d3be2632b6
commit f9a70a3a69
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
40 changed files with 3842 additions and 894 deletions

View File

@ -251,7 +251,7 @@ func (r *AnomalyRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID, t
continue continue
} }
} }
results, err := r.Threshold.ShouldAlert(*series) results, err := r.Threshold.ShouldAlert(*series, r.Unit())
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -301,7 +301,7 @@ func (r *AnomalyRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID,
continue continue
} }
} }
results, err := r.Threshold.ShouldAlert(*series) results, err := r.Threshold.ShouldAlert(*series, r.Unit())
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -336,14 +336,19 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro
resultFPs := map[uint64]struct{}{} resultFPs := map[uint64]struct{}{}
var alerts = make(map[uint64]*ruletypes.Alert, len(res)) var alerts = make(map[uint64]*ruletypes.Alert, len(res))
ruleReceivers := r.Threshold.GetRuleReceivers()
ruleReceiverMap := make(map[string][]string)
for _, value := range ruleReceivers {
ruleReceiverMap[value.Name] = value.Channels
}
for _, smpl := range res { for _, smpl := range res {
l := make(map[string]string, len(smpl.Metric)) l := make(map[string]string, len(smpl.Metric))
for _, lbl := range smpl.Metric { for _, lbl := range smpl.Metric {
l[lbl.Name] = lbl.Value l[lbl.Name] = lbl.Value
} }
value := valueFormatter.Format(smpl.V, r.Unit()) value := valueFormatter.Format(smpl.V, r.Unit())
threshold := valueFormatter.Format(r.TargetVal(), r.Unit()) threshold := valueFormatter.Format(smpl.Target, smpl.TargetUnit)
r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold) r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold)
tmplData := ruletypes.AlertTemplateData(l, value, threshold) tmplData := ruletypes.AlertTemplateData(l, value, threshold)
@ -408,13 +413,12 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro
State: model.StatePending, State: model.StatePending,
Value: smpl.V, Value: smpl.V,
GeneratorURL: r.GeneratorURL(), GeneratorURL: r.GeneratorURL(),
Receivers: r.PreferredChannels(), Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
Missing: smpl.IsMissing, Missing: smpl.IsMissing,
} }
} }
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts)) r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
// alerts[h] is ready, add or update active list now // alerts[h] is ready, add or update active list now
for h, a := range alerts { for h, a := range alerts {
// Check whether we already have alerting state for the identifying label set. // Check whether we already have alerting state for the identifying label set.
@ -423,7 +427,9 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro
alert.Value = a.Value alert.Value = a.Value
alert.Annotations = a.Annotations alert.Annotations = a.Annotations
alert.Receivers = r.PreferredChannels() if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
alert.Receivers = ruleReceiverMap[v]
}
continue continue
} }

View File

@ -126,7 +126,6 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.Ap
if parsedRule.RuleType == ruletypes.RuleTypeThreshold { if parsedRule.RuleType == ruletypes.RuleTypeThreshold {
// add special labels for test alerts // add special labels for test alerts
parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target)
parsedRule.Labels[labels.RuleSourceLabel] = "" parsedRule.Labels[labels.RuleSourceLabel] = ""
parsedRule.Labels[labels.AlertRuleIdLabel] = "" parsedRule.Labels[labels.AlertRuleIdLabel] = ""

2
go.mod
View File

@ -127,7 +127,7 @@ require (
github.com/elastic/lunes v0.1.0 // indirect github.com/elastic/lunes v0.1.0 // indirect
github.com/emirpasic/gods v1.18.1 // indirect github.com/emirpasic/gods v1.18.1 // indirect
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
github.com/expr-lang/expr v1.17.5 // indirect github.com/expr-lang/expr v1.17.5
github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb // indirect github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect

View File

@ -3,6 +3,8 @@ package alertmanager
import ( import (
"context" "context"
amConfig "github.com/prometheus/alertmanager/config"
"github.com/SigNoz/signoz/pkg/errors" "github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/factory" "github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/statsreporter" "github.com/SigNoz/signoz/pkg/statsreporter"
@ -26,7 +28,7 @@ type Alertmanager interface {
TestReceiver(context.Context, string, alertmanagertypes.Receiver) error TestReceiver(context.Context, string, alertmanagertypes.Receiver) error
// TestAlert sends an alert to a list of receivers. // TestAlert sends an alert to a list of receivers.
TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error TestAlert(ctx context.Context, orgID string, ruleID string, receiversMap map[*alertmanagertypes.PostableAlert][]string) error
// ListChannels lists all channels for the organization. // ListChannels lists all channels for the organization.
ListChannels(context.Context, string) ([]*alertmanagertypes.Channel, error) ListChannels(context.Context, string) ([]*alertmanagertypes.Channel, error)
@ -59,6 +61,19 @@ type Alertmanager interface {
DeleteNotificationConfig(ctx context.Context, orgID valuer.UUID, ruleId string) error DeleteNotificationConfig(ctx context.Context, orgID valuer.UUID, ruleId string) error
// Notification Policy CRUD
CreateRoutePolicy(ctx context.Context, route *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error)
CreateRoutePolicies(ctx context.Context, routeRequests []*alertmanagertypes.PostableRoutePolicy) ([]*alertmanagertypes.GettableRoutePolicy, error)
GetRoutePolicyByID(ctx context.Context, routeID string) (*alertmanagertypes.GettableRoutePolicy, error)
GetAllRoutePolicies(ctx context.Context) ([]*alertmanagertypes.GettableRoutePolicy, error)
UpdateRoutePolicyByID(ctx context.Context, routeID string, route *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error)
DeleteRoutePolicyByID(ctx context.Context, routeID string) error
DeleteAllRoutePoliciesByRuleId(ctx context.Context, ruleId string) error
UpdateAllRoutePoliciesByRuleId(ctx context.Context, ruleId string, routes []*alertmanagertypes.PostableRoutePolicy) error
CreateInhibitRules(ctx context.Context, orgID valuer.UUID, rules []amConfig.InhibitRule) error
DeleteAllInhibitRulesByRuleId(ctx context.Context, orgID valuer.UUID, ruleId string) error
// Collects stats for the organization. // Collects stats for the organization.
statsreporter.StatsCollector statsreporter.StatsCollector
} }

View File

@ -10,19 +10,17 @@ import (
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager" "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/errors" "github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/prometheus/alertmanager/dispatch" "github.com/prometheus/alertmanager/dispatch"
"github.com/prometheus/alertmanager/notify" "github.com/prometheus/alertmanager/notify"
"github.com/prometheus/alertmanager/pkg/labels"
"github.com/prometheus/alertmanager/provider" "github.com/prometheus/alertmanager/provider"
"github.com/prometheus/alertmanager/store" "github.com/prometheus/alertmanager/store"
"github.com/prometheus/alertmanager/types" "github.com/prometheus/alertmanager/types"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
) )
const (
noDataLabel = model.LabelName("nodata")
)
// Dispatcher sorts incoming alerts into aggregation groups and // Dispatcher sorts incoming alerts into aggregation groups and
// assigns the correct notifiers to each. // assigns the correct notifiers to each.
type Dispatcher struct { type Dispatcher struct {
@ -46,6 +44,7 @@ type Dispatcher struct {
logger *slog.Logger logger *slog.Logger
notificationManager nfmanager.NotificationManager notificationManager nfmanager.NotificationManager
orgID string orgID string
receiverRoutes map[string]*dispatch.Route
} }
// We use the upstream Limits interface from Prometheus // We use the upstream Limits interface from Prometheus
@ -90,6 +89,7 @@ func (d *Dispatcher) Run() {
d.mtx.Lock() d.mtx.Lock()
d.aggrGroupsPerRoute = map[*dispatch.Route]map[model.Fingerprint]*aggrGroup{} d.aggrGroupsPerRoute = map[*dispatch.Route]map[model.Fingerprint]*aggrGroup{}
d.receiverRoutes = map[string]*dispatch.Route{}
d.aggrGroupsNum = 0 d.aggrGroupsNum = 0
d.metrics.aggrGroups.Set(0) d.metrics.aggrGroups.Set(0)
d.ctx, d.cancel = context.WithCancel(context.Background()) d.ctx, d.cancel = context.WithCancel(context.Background())
@ -125,8 +125,14 @@ func (d *Dispatcher) run(it provider.AlertIterator) {
} }
now := time.Now() now := time.Now()
for _, r := range d.route.Match(alert.Labels) { channels, err := d.notificationManager.Match(d.ctx, d.orgID, getRuleIDFromAlert(alert), alert.Labels)
d.processAlert(alert, r) if err != nil {
d.logger.ErrorContext(d.ctx, "Error on alert match", "err", err)
continue
}
for _, channel := range channels {
route := d.getOrCreateRoute(channel)
d.processAlert(alert, route)
} }
d.metrics.processingDuration.Observe(time.Since(now).Seconds()) d.metrics.processingDuration.Observe(time.Since(now).Seconds())
@ -266,6 +272,7 @@ type notifyFunc func(context.Context, ...*types.Alert) bool
// processAlert determines in which aggregation group the alert falls // processAlert determines in which aggregation group the alert falls
// and inserts it. // and inserts it.
// no data alert will only have ruleId and no data label
func (d *Dispatcher) processAlert(alert *types.Alert, route *dispatch.Route) { func (d *Dispatcher) processAlert(alert *types.Alert, route *dispatch.Route) {
ruleId := getRuleIDFromAlert(alert) ruleId := getRuleIDFromAlert(alert)
config, err := d.notificationManager.GetNotificationConfig(d.orgID, ruleId) config, err := d.notificationManager.GetNotificationConfig(d.orgID, ruleId)
@ -273,8 +280,14 @@ func (d *Dispatcher) processAlert(alert *types.Alert, route *dispatch.Route) {
d.logger.ErrorContext(d.ctx, "error getting alert notification config", "rule_id", ruleId, "error", err) d.logger.ErrorContext(d.ctx, "error getting alert notification config", "rule_id", ruleId, "error", err)
return return
} }
renotifyInterval := config.Renotify.RenotifyInterval
groupLabels := getGroupLabels(alert, config.NotificationGroup) groupLabels := getGroupLabels(alert, config.NotificationGroup, config.GroupByAll)
if alertmanagertypes.NoDataAlert(alert) {
renotifyInterval = config.Renotify.NoDataInterval
groupLabels[alertmanagertypes.NoDataLabel] = alert.Labels[alertmanagertypes.NoDataLabel] //to create new group key for no data alerts
}
fp := groupLabels.Fingerprint() fp := groupLabels.Fingerprint()
@ -299,12 +312,6 @@ func (d *Dispatcher) processAlert(alert *types.Alert, route *dispatch.Route) {
d.logger.ErrorContext(d.ctx, "Too many aggregation groups, cannot create new group for alert", "groups", d.aggrGroupsNum, "limit", limit, "alert", alert.Name()) d.logger.ErrorContext(d.ctx, "Too many aggregation groups, cannot create new group for alert", "groups", d.aggrGroupsNum, "limit", limit, "alert", alert.Name())
return return
} }
renotifyInterval := config.Renotify.RenotifyInterval
if noDataAlert(alert) {
renotifyInterval = config.Renotify.NoDataInterval
groupLabels[noDataLabel] = alert.Labels[noDataLabel]
}
ag = newAggrGroup(d.ctx, groupLabels, route, d.timeout, d.logger, renotifyInterval) ag = newAggrGroup(d.ctx, groupLabels, route, d.timeout, d.logger, renotifyInterval)
@ -543,21 +550,35 @@ func deepCopyRouteOpts(opts dispatch.RouteOpts, renotify time.Duration) dispatch
return newOpts return newOpts
} }
func getGroupLabels(alert *types.Alert, groups map[model.LabelName]struct{}) model.LabelSet { func getGroupLabels(alert *types.Alert, groups map[model.LabelName]struct{}, groupByAll bool) model.LabelSet {
groupLabels := model.LabelSet{} groupLabels := model.LabelSet{}
for ln, lv := range alert.Labels { for ln, lv := range alert.Labels {
if _, ok := groups[ln]; ok { if _, ok := groups[ln]; ok || groupByAll {
groupLabels[ln] = lv groupLabels[ln] = lv
} }
} }
return groupLabels return groupLabels
} }
func noDataAlert(alert *types.Alert) bool { func (d *Dispatcher) getOrCreateRoute(receiver string) *dispatch.Route {
if _, ok := alert.Labels[noDataLabel]; ok { d.mtx.Lock()
return true defer d.mtx.Unlock()
} else { if route, exists := d.receiverRoutes[receiver]; exists {
return false return route
} }
route := &dispatch.Route{
RouteOpts: dispatch.RouteOpts{
Receiver: receiver,
GroupWait: 30 * time.Second,
GroupInterval: 5 * time.Minute,
GroupByAll: false,
},
Matchers: labels.Matchers{{
Name: "__receiver__",
Value: receiver,
Type: labels.MatchEqual,
}},
}
d.receiverRoutes[receiver] = route
return route
} }

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,9 @@ package alertmanagerserver
import ( import (
"context" "context"
"fmt"
"github.com/prometheus/alertmanager/types"
"golang.org/x/sync/errgroup"
"log/slog" "log/slog"
"strings" "strings"
"sync" "sync"
@ -321,39 +324,104 @@ func (server *Server) SetConfig(ctx context.Context, alertmanagerConfig *alertma
} }
func (server *Server) TestReceiver(ctx context.Context, receiver alertmanagertypes.Receiver) error { func (server *Server) TestReceiver(ctx context.Context, receiver alertmanagertypes.Receiver) error {
return alertmanagertypes.TestReceiver(ctx, receiver, alertmanagernotify.NewReceiverIntegrations, server.alertmanagerConfig, server.tmpl, server.logger, alertmanagertypes.NewTestAlert(receiver, time.Now(), time.Now())) testAlert := alertmanagertypes.NewTestAlert(receiver, time.Now(), time.Now())
return alertmanagertypes.TestReceiver(ctx, receiver, alertmanagernotify.NewReceiverIntegrations, server.alertmanagerConfig, server.tmpl, server.logger, testAlert.Labels, testAlert)
} }
func (server *Server) TestAlert(ctx context.Context, postableAlert *alertmanagertypes.PostableAlert, receivers []string) error { func (server *Server) TestAlert(ctx context.Context, receiversMap map[*alertmanagertypes.PostableAlert][]string, config *alertmanagertypes.NotificationConfig) error {
alerts, err := alertmanagertypes.NewAlertsFromPostableAlerts(alertmanagertypes.PostableAlerts{postableAlert}, time.Duration(server.srvConfig.Global.ResolveTimeout), time.Now()) if len(receiversMap) == 0 {
return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput,
"expected at least 1 alert, got 0")
}
postableAlerts := make(alertmanagertypes.PostableAlerts, 0, len(receiversMap))
for alert := range receiversMap {
postableAlerts = append(postableAlerts, alert)
}
alerts, err := alertmanagertypes.NewAlertsFromPostableAlerts(
postableAlerts,
time.Duration(server.srvConfig.Global.ResolveTimeout),
time.Now(),
)
if err != nil { if err != nil {
return errors.Join(err...) return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput,
"failed to construct alerts from postable alerts: %v", err)
} }
if len(alerts) != 1 { type alertGroup struct {
return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "expected 1 alert, got %d", len(alerts)) groupLabels model.LabelSet
alerts []*types.Alert
receivers map[string]struct{}
} }
ch := make(chan error, len(receivers)) groupMap := make(map[model.Fingerprint]*alertGroup)
for _, receiverName := range receivers {
go func(receiverName string) { for i, alert := range alerts {
receiver, err := server.alertmanagerConfig.GetReceiver(receiverName) labels := getGroupLabels(alert, config.NotificationGroup, config.GroupByAll)
if err != nil { fp := labels.Fingerprint()
ch <- err
return postableAlert := postableAlerts[i]
alertReceivers := receiversMap[postableAlert]
if group, exists := groupMap[fp]; exists {
group.alerts = append(group.alerts, alert)
for _, r := range alertReceivers {
group.receivers[r] = struct{}{}
}
} else {
receiverSet := make(map[string]struct{})
for _, r := range alertReceivers {
receiverSet[r] = struct{}{}
}
groupMap[fp] = &alertGroup{
groupLabels: labels,
alerts: []*types.Alert{alert},
receivers: receiverSet,
} }
ch <- alertmanagertypes.TestReceiver(ctx, receiver, alertmanagernotify.NewReceiverIntegrations, server.alertmanagerConfig, server.tmpl, server.logger, alerts[0])
}(receiverName)
}
var errs []error
for i := 0; i < len(receivers); i++ {
if err := <-ch; err != nil {
errs = append(errs, err)
} }
} }
if errs != nil { var mu sync.Mutex
var errs []error
g, gCtx := errgroup.WithContext(ctx)
for _, group := range groupMap {
for receiverName := range group.receivers {
group := group
receiverName := receiverName
g.Go(func() error {
receiver, err := server.alertmanagerConfig.GetReceiver(receiverName)
if err != nil {
mu.Lock()
errs = append(errs, fmt.Errorf("failed to get receiver %q: %w", receiverName, err))
mu.Unlock()
return nil // Return nil to continue processing other goroutines
}
err = alertmanagertypes.TestReceiver(
gCtx,
receiver,
alertmanagernotify.NewReceiverIntegrations,
server.alertmanagerConfig,
server.tmpl,
server.logger,
group.groupLabels,
group.alerts...,
)
if err != nil {
mu.Lock()
errs = append(errs, fmt.Errorf("receiver %q test failed: %w", receiverName, err))
mu.Unlock()
}
return nil // Return nil to continue processing other goroutines
})
}
}
_ = g.Wait()
if len(errs) > 0 {
return errors.Join(errs...) return errors.Join(errs...)
} }

View File

@ -0,0 +1,223 @@
package alertmanagerserver
import (
"context"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes/alertmanagertypestest"
"github.com/prometheus/alertmanager/dispatch"
"io"
"log/slog"
"net/http"
"testing"
"time"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification"
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/go-openapi/strfmt"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
)
func TestEndToEndAlertManagerFlow(t *testing.T) {
ctx := context.Background()
providerSettings := instrumentationtest.New().ToProviderSettings()
store := nfroutingstoretest.NewMockSQLRouteStore()
store.MatchExpectationsInOrder(false)
notificationManager, err := rulebasednotification.New(ctx, providerSettings, nfmanager.Config{}, store)
require.NoError(t, err)
orgID := "test-org"
routes := []*alertmanagertypes.RoutePolicy{
{
Identifiable: types.Identifiable{
ID: valuer.GenerateUUID(),
},
Expression: `ruleId == "high-cpu-usage" && severity == "critical"`,
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Name: "high-cpu-usage",
Description: "High CPU critical alerts to webhook",
Enabled: true,
OrgID: orgID,
Channels: []string{"webhook"},
},
{
Identifiable: types.Identifiable{
ID: valuer.GenerateUUID(),
},
Expression: `ruleId == "high-cpu-usage" && severity == "warning"`,
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Name: "high-cpu-usage",
Description: "High CPU warning alerts to webhook",
Enabled: true,
OrgID: orgID,
Channels: []string{"webhook"},
},
}
store.ExpectCreateBatch(routes)
err = notificationManager.CreateRoutePolicies(ctx, orgID, routes)
require.NoError(t, err)
for range routes {
ruleID := "high-cpu-usage"
store.ExpectGetAllByName(orgID, ruleID, routes)
store.ExpectGetAllByName(orgID, ruleID, routes)
}
notifConfig := alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{
model.LabelName("cluster"): {},
model.LabelName("instance"): {},
},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 5 * time.Minute,
},
UsePolicy: false,
}
err = notificationManager.SetNotificationConfig(orgID, "high-cpu-usage", &notifConfig)
require.NoError(t, err)
srvCfg := NewConfig()
stateStore := alertmanagertypestest.NewStateStore()
registry := prometheus.NewRegistry()
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
server, err := New(context.Background(), logger, registry, srvCfg, orgID, stateStore, notificationManager)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, orgID)
require.NoError(t, err)
err = server.SetConfig(ctx, amConfig)
require.NoError(t, err)
// Create test alerts
now := time.Now()
testAlerts := []*alertmanagertypes.PostableAlert{
{
Alert: alertmanagertypes.AlertModel{
Labels: map[string]string{
"ruleId": "high-cpu-usage",
"severity": "critical",
"cluster": "prod-cluster",
"instance": "server-01",
"alertname": "HighCPUUsage",
},
},
Annotations: map[string]string{
"summary": "High CPU usage detected",
"description": "CPU usage is above 90% for 5 minutes",
},
StartsAt: strfmt.DateTime(now.Add(-5 * time.Minute)),
EndsAt: strfmt.DateTime(time.Time{}), // Active alert
},
{
Alert: alertmanagertypes.AlertModel{
Labels: map[string]string{
"ruleId": "high-cpu-usage",
"severity": "warning",
"cluster": "prod-cluster",
"instance": "server-02",
"alertname": "HighCPUUsage",
},
},
Annotations: map[string]string{
"summary": "Moderate CPU usage detected",
"description": "CPU usage is above 70% for 10 minutes",
},
StartsAt: strfmt.DateTime(now.Add(-10 * time.Minute)),
EndsAt: strfmt.DateTime(time.Time{}), // Active alert
},
{
Alert: alertmanagertypes.AlertModel{
Labels: map[string]string{
"ruleId": "high-cpu-usage",
"severity": "critical",
"cluster": "prod-cluster",
"instance": "server-03",
"alertname": "HighCPUUsage",
},
},
Annotations: map[string]string{
"summary": "High CPU usage detected on server-03",
"description": "CPU usage is above 95% for 3 minutes",
},
StartsAt: strfmt.DateTime(now.Add(-3 * time.Minute)),
EndsAt: strfmt.DateTime(time.Time{}), // Active alert
},
}
err = server.PutAlerts(ctx, testAlerts)
require.NoError(t, err)
time.Sleep(2 * time.Second)
t.Run("verify_alerts_processed", func(t *testing.T) {
dummyRequest, err := http.NewRequest(http.MethodGet, "/alerts", nil)
require.NoError(t, err)
params, err := alertmanagertypes.NewGettableAlertsParams(dummyRequest)
require.NoError(t, err)
alerts, err := server.GetAlerts(context.Background(), params)
require.NoError(t, err)
require.Len(t, alerts, 3, "Expected 3 active alerts")
for _, alert := range alerts {
require.Equal(t, "high-cpu-usage", alert.Alert.Labels["ruleId"])
require.NotEmpty(t, alert.Alert.Labels["severity"])
require.Contains(t, []string{"critical", "warning"}, alert.Alert.Labels["severity"])
require.Equal(t, "prod-cluster", alert.Alert.Labels["cluster"])
require.NotEmpty(t, alert.Alert.Labels["instance"])
}
criticalAlerts := 0
warningAlerts := 0
for _, alert := range alerts {
if alert.Alert.Labels["severity"] == "critical" {
criticalAlerts++
} else if alert.Alert.Labels["severity"] == "warning" {
warningAlerts++
}
}
require.Equal(t, 2, criticalAlerts, "Expected 2 critical alerts")
require.Equal(t, 1, warningAlerts, "Expected 1 warning alert")
})
t.Run("verify_notification_routing", func(t *testing.T) {
notifConfig, err := notificationManager.GetNotificationConfig(orgID, "high-cpu-usage")
require.NoError(t, err)
require.NotNil(t, notifConfig)
require.Equal(t, 5*time.Minute, notifConfig.Renotify.RenotifyInterval)
require.Contains(t, notifConfig.NotificationGroup, model.LabelName("ruleId"))
require.Contains(t, notifConfig.NotificationGroup, model.LabelName("cluster"))
require.Contains(t, notifConfig.NotificationGroup, model.LabelName("instance"))
})
t.Run("verify_alert_groups_and_stages", func(t *testing.T) {
time.Sleep(2 * time.Second)
alertGroups, _ := server.dispatcher.Groups(
func(route *dispatch.Route) bool { return true }, // Accept all routes
func(alert *alertmanagertypes.Alert, now time.Time) bool { return true }, // Accept all alerts
)
require.Len(t, alertGroups, 3)
require.NotEmpty(t, alertGroups, "Should have alert groups created by dispatcher")
totalAlerts := 0
for _, group := range alertGroups {
totalAlerts += len(group.Alerts)
}
require.Equal(t, 3, totalAlerts, "Should have 3 alerts total across all groups")
require.Equal(t, "{__receiver__=\"webhook\"}:{cluster=\"prod-cluster\", instance=\"server-01\", ruleId=\"high-cpu-usage\"}", alertGroups[0].GroupKey)
require.Equal(t, "{__receiver__=\"webhook\"}:{cluster=\"prod-cluster\", instance=\"server-02\", ruleId=\"high-cpu-usage\"}", alertGroups[1].GroupKey)
require.Equal(t, "{__receiver__=\"webhook\"}:{cluster=\"prod-cluster\", instance=\"server-03\", ruleId=\"high-cpu-usage\"}", alertGroups[2].GroupKey)
})
}

View File

@ -19,6 +19,7 @@ import (
"github.com/prometheus/alertmanager/config" "github.com/prometheus/alertmanager/config"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
commoncfg "github.com/prometheus/common/config" commoncfg "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
@ -127,3 +128,189 @@ func TestServerPutAlerts(t *testing.T) {
assert.Equal(t, gettableAlerts[0].Alert.Labels["alertname"], "test-alert") assert.Equal(t, gettableAlerts[0].Alert.Labels["alertname"], "test-alert")
assert.NoError(t, server.Stop(context.Background())) assert.NoError(t, server.Stop(context.Background()))
} }
func TestServerTestAlert(t *testing.T) {
stateStore := alertmanagertypestest.NewStateStore()
srvCfg := NewConfig()
srvCfg.Route.GroupInterval = 1 * time.Second
notificationManager := nfmanagertest.NewMock()
server, err := New(context.Background(), slog.New(slog.NewTextHandler(io.Discard, nil)), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
require.NoError(t, err)
webhook1Listener, err := net.Listen("tcp", "localhost:0")
require.NoError(t, err)
webhook2Listener, err := net.Listen("tcp", "localhost:0")
require.NoError(t, err)
requestCount1 := 0
requestCount2 := 0
webhook1Server := &http.Server{
Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestCount1++
w.WriteHeader(http.StatusOK)
}),
}
webhook2Server := &http.Server{
Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestCount2++
w.WriteHeader(http.StatusOK)
}),
}
go func() {
_ = webhook1Server.Serve(webhook1Listener)
}()
go func() {
_ = webhook2Server.Serve(webhook2Listener)
}()
webhook1URL, err := url.Parse("http://" + webhook1Listener.Addr().String() + "/webhook")
require.NoError(t, err)
webhook2URL, err := url.Parse("http://" + webhook2Listener.Addr().String() + "/webhook")
require.NoError(t, err)
require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
Name: "receiver-1",
WebhookConfigs: []*config.WebhookConfig{
{
HTTPConfig: &commoncfg.HTTPClientConfig{},
URL: &config.SecretURL{URL: webhook1URL},
},
},
}))
require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
Name: "receiver-2",
WebhookConfigs: []*config.WebhookConfig{
{
HTTPConfig: &commoncfg.HTTPClientConfig{},
URL: &config.SecretURL{URL: webhook2URL},
},
},
}))
require.NoError(t, server.SetConfig(context.Background(), amConfig))
defer func() {
_ = server.Stop(context.Background())
_ = webhook1Server.Close()
_ = webhook2Server.Close()
}()
// Test with multiple alerts going to different receivers
alert1 := &alertmanagertypes.PostableAlert{
Annotations: models.LabelSet{"alertname": "test-alert-1"},
StartsAt: strfmt.DateTime(time.Now()),
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "test-alert-1", "severity": "critical"},
},
}
alert2 := &alertmanagertypes.PostableAlert{
Annotations: models.LabelSet{"alertname": "test-alert-2"},
StartsAt: strfmt.DateTime(time.Now()),
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "test-alert-2", "severity": "warning"},
},
}
receiversMap := map[*alertmanagertypes.PostableAlert][]string{
alert1: {"receiver-1", "receiver-2"},
alert2: {"receiver-2"},
}
config := &alertmanagertypes.NotificationConfig{
NotificationGroup: make(map[model.LabelName]struct{}),
GroupByAll: false,
}
err = server.TestAlert(context.Background(), receiversMap, config)
require.NoError(t, err)
time.Sleep(100 * time.Millisecond)
assert.Greater(t, requestCount1, 0, "receiver-1 should have received at least one request")
assert.Greater(t, requestCount2, 0, "receiver-2 should have received at least one request")
}
func TestServerTestAlertContinuesOnFailure(t *testing.T) {
stateStore := alertmanagertypestest.NewStateStore()
srvCfg := NewConfig()
srvCfg.Route.GroupInterval = 1 * time.Second
notificationManager := nfmanagertest.NewMock()
server, err := New(context.Background(), slog.New(slog.NewTextHandler(io.Discard, nil)), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
require.NoError(t, err)
// Create one working webhook and one failing receiver (non-existent)
webhookListener, err := net.Listen("tcp", "localhost:0")
require.NoError(t, err)
requestCount := 0
webhookServer := &http.Server{
Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestCount++
w.WriteHeader(http.StatusOK)
}),
}
go func() {
_ = webhookServer.Serve(webhookListener)
}()
webhookURL, err := url.Parse("http://" + webhookListener.Addr().String() + "/webhook")
require.NoError(t, err)
require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
Name: "working-receiver",
WebhookConfigs: []*config.WebhookConfig{
{
HTTPConfig: &commoncfg.HTTPClientConfig{},
URL: &config.SecretURL{URL: webhookURL},
},
},
}))
require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
Name: "failing-receiver",
WebhookConfigs: []*config.WebhookConfig{
{
HTTPConfig: &commoncfg.HTTPClientConfig{},
URL: &config.SecretURL{URL: &url.URL{Scheme: "http", Host: "localhost:1", Path: "/webhook"}},
},
},
}))
require.NoError(t, server.SetConfig(context.Background(), amConfig))
defer func() {
_ = server.Stop(context.Background())
_ = webhookServer.Close()
}()
alert := &alertmanagertypes.PostableAlert{
Annotations: models.LabelSet{"alertname": "test-alert"},
StartsAt: strfmt.DateTime(time.Now()),
Alert: models.Alert{
Labels: models.LabelSet{"alertname": "test-alert"},
},
}
receiversMap := map[*alertmanagertypes.PostableAlert][]string{
alert: {"working-receiver", "failing-receiver"},
}
config := &alertmanagertypes.NotificationConfig{
NotificationGroup: make(map[model.LabelName]struct{}),
GroupByAll: false,
}
err = server.TestAlert(context.Background(), receiversMap, config)
assert.Error(t, err)
time.Sleep(100 * time.Millisecond)
assert.Greater(t, requestCount, 0, "working-receiver should have received at least one request even though failing-receiver failed")
}

View File

@ -2,6 +2,7 @@ package alertmanager
import ( import (
"context" "context"
"encoding/json"
"io" "io"
"net/http" "net/http"
"time" "time"
@ -273,3 +274,128 @@ func (api *API) CreateChannel(rw http.ResponseWriter, req *http.Request) {
render.Success(rw, http.StatusNoContent, nil) render.Success(rw, http.StatusNoContent, nil)
} }
func (api *API) CreateRoutePolicy(rw http.ResponseWriter, req *http.Request) {
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
defer cancel()
body, err := io.ReadAll(req.Body)
if err != nil {
render.Error(rw, err)
return
}
defer req.Body.Close()
var policy alertmanagertypes.PostableRoutePolicy
err = json.Unmarshal(body, &policy)
if err != nil {
render.Error(rw, err)
return
}
policy.ExpressionKind = alertmanagertypes.PolicyBasedExpression
// Validate the postable route
if err := policy.Validate(); err != nil {
render.Error(rw, err)
return
}
result, err := api.alertmanager.CreateRoutePolicy(ctx, &policy)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusCreated, result)
}
func (api *API) GetAllRoutePolicies(rw http.ResponseWriter, req *http.Request) {
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
defer cancel()
policies, err := api.alertmanager.GetAllRoutePolicies(ctx)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, policies)
}
func (api *API) GetRoutePolicyByID(rw http.ResponseWriter, req *http.Request) {
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
defer cancel()
vars := mux.Vars(req)
policyID := vars["id"]
if policyID == "" {
render.Error(rw, errors.NewInvalidInputf(errors.CodeInvalidInput, "policy ID is required"))
return
}
policy, err := api.alertmanager.GetRoutePolicyByID(ctx, policyID)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, policy)
}
func (api *API) DeleteRoutePolicyByID(rw http.ResponseWriter, req *http.Request) {
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
defer cancel()
vars := mux.Vars(req)
policyID := vars["id"]
if policyID == "" {
render.Error(rw, errors.NewInvalidInputf(errors.CodeInvalidInput, "policy ID is required"))
return
}
err := api.alertmanager.DeleteRoutePolicyByID(ctx, policyID)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusNoContent, nil)
}
func (api *API) UpdateRoutePolicy(rw http.ResponseWriter, req *http.Request) {
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
defer cancel()
vars := mux.Vars(req)
policyID := vars["id"]
if policyID == "" {
render.Error(rw, errors.NewInvalidInputf(errors.CodeInvalidInput, "policy ID is required"))
return
}
body, err := io.ReadAll(req.Body)
if err != nil {
render.Error(rw, err)
return
}
defer req.Body.Close()
var policy alertmanagertypes.PostableRoutePolicy
err = json.Unmarshal(body, &policy)
if err != nil {
render.Error(rw, err)
return
}
policy.ExpressionKind = alertmanagertypes.PolicyBasedExpression
// Validate the postable route
if err := policy.Validate(); err != nil {
render.Error(rw, err)
return
}
result, err := api.alertmanager.UpdateRoutePolicyByID(ctx, policyID, &policy)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, result)
}

View File

@ -1,20 +1,29 @@
package nfmanagertest package nfmanagertest
import ( import (
"context"
"fmt"
"strings"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes" "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/prometheus/common/model"
) )
// MockNotificationManager is a simple mock implementation of NotificationManager // MockNotificationManager is a simple mock implementation of NotificationManager
type MockNotificationManager struct { type MockNotificationManager struct {
configs map[string]*alertmanagertypes.NotificationConfig configs map[string]*alertmanagertypes.NotificationConfig
errors map[string]error routes map[string]*alertmanagertypes.RoutePolicy
routesByName map[string][]*alertmanagertypes.RoutePolicy
errors map[string]error
} }
// NewMock creates a new mock notification manager // NewMock creates a new mock notification manager
func NewMock() *MockNotificationManager { func NewMock() *MockNotificationManager {
return &MockNotificationManager{ return &MockNotificationManager{
configs: make(map[string]*alertmanagertypes.NotificationConfig), configs: make(map[string]*alertmanagertypes.NotificationConfig),
errors: make(map[string]error), routes: make(map[string]*alertmanagertypes.RoutePolicy),
routesByName: make(map[string][]*alertmanagertypes.RoutePolicy),
errors: make(map[string]error),
} }
} }
@ -65,6 +74,8 @@ func (m *MockNotificationManager) SetMockError(orgID, ruleID string, err error)
func (m *MockNotificationManager) ClearMockData() { func (m *MockNotificationManager) ClearMockData() {
m.configs = make(map[string]*alertmanagertypes.NotificationConfig) m.configs = make(map[string]*alertmanagertypes.NotificationConfig)
m.routes = make(map[string]*alertmanagertypes.RoutePolicy)
m.routesByName = make(map[string][]*alertmanagertypes.RoutePolicy)
m.errors = make(map[string]error) m.errors = make(map[string]error)
} }
@ -73,3 +84,241 @@ func (m *MockNotificationManager) HasConfig(orgID, ruleID string) bool {
_, exists := m.configs[key] _, exists := m.configs[key]
return exists return exists
} }
// Route Policy CRUD
func (m *MockNotificationManager) CreateRoutePolicy(ctx context.Context, orgID string, route *alertmanagertypes.RoutePolicy) error {
key := getKey(orgID, "create_route")
if err := m.errors[key]; err != nil {
return err
}
if route == nil {
return fmt.Errorf("route cannot be nil")
}
if err := route.Validate(); err != nil {
return err
}
routeKey := getKey(orgID, route.ID.StringValue())
m.routes[routeKey] = route
nameKey := getKey(orgID, route.Name)
m.routesByName[nameKey] = append(m.routesByName[nameKey], route)
return nil
}
func (m *MockNotificationManager) CreateRoutePolicies(ctx context.Context, orgID string, routes []*alertmanagertypes.RoutePolicy) error {
key := getKey(orgID, "create_routes")
if err := m.errors[key]; err != nil {
return err
}
if len(routes) == 0 {
return fmt.Errorf("routes cannot be empty")
}
for i, route := range routes {
if route == nil {
return fmt.Errorf("route at index %d cannot be nil", i)
}
if err := route.Validate(); err != nil {
return fmt.Errorf("route at index %d: %s", i, err.Error())
}
}
for _, route := range routes {
if err := m.CreateRoutePolicy(ctx, orgID, route); err != nil {
return err
}
}
return nil
}
func (m *MockNotificationManager) GetRoutePolicyByID(ctx context.Context, orgID string, routeID string) (*alertmanagertypes.RoutePolicy, error) {
key := getKey(orgID, "get_route")
if err := m.errors[key]; err != nil {
return nil, err
}
if routeID == "" {
return nil, fmt.Errorf("routeID cannot be empty")
}
routeKey := getKey(orgID, routeID)
route, exists := m.routes[routeKey]
if !exists {
return nil, fmt.Errorf("route with ID %s not found", routeID)
}
return route, nil
}
func (m *MockNotificationManager) GetAllRoutePolicies(ctx context.Context, orgID string) ([]*alertmanagertypes.RoutePolicy, error) {
key := getKey(orgID, "get_all_routes")
if err := m.errors[key]; err != nil {
return nil, err
}
if orgID == "" {
return nil, fmt.Errorf("orgID cannot be empty")
}
var routes []*alertmanagertypes.RoutePolicy
for routeKey, route := range m.routes {
if route.OrgID == orgID {
routes = append(routes, route)
}
_ = routeKey
}
return routes, nil
}
func (m *MockNotificationManager) DeleteRoutePolicy(ctx context.Context, orgID string, routeID string) error {
key := getKey(orgID, "delete_route")
if err := m.errors[key]; err != nil {
return err
}
if routeID == "" {
return fmt.Errorf("routeID cannot be empty")
}
routeKey := getKey(orgID, routeID)
route, exists := m.routes[routeKey]
if !exists {
return fmt.Errorf("route with ID %s not found", routeID)
}
delete(m.routes, routeKey)
nameKey := getKey(orgID, route.Name)
if nameRoutes, exists := m.routesByName[nameKey]; exists {
var filtered []*alertmanagertypes.RoutePolicy
for _, r := range nameRoutes {
if r.ID.StringValue() != routeID {
filtered = append(filtered, r)
}
}
if len(filtered) == 0 {
delete(m.routesByName, nameKey)
} else {
m.routesByName[nameKey] = filtered
}
}
return nil
}
func (m *MockNotificationManager) DeleteAllRoutePoliciesByName(ctx context.Context, orgID string, name string) error {
key := getKey(orgID, "delete_routes_by_name")
if err := m.errors[key]; err != nil {
return err
}
if orgID == "" {
return fmt.Errorf("orgID cannot be empty")
}
if name == "" {
return fmt.Errorf("name cannot be empty")
}
nameKey := getKey(orgID, name)
routes, exists := m.routesByName[nameKey]
if !exists {
return nil // No routes to delete
}
for _, route := range routes {
routeKey := getKey(orgID, route.ID.StringValue())
delete(m.routes, routeKey)
}
delete(m.routesByName, nameKey)
return nil
}
func (m *MockNotificationManager) Match(ctx context.Context, orgID string, ruleID string, set model.LabelSet) ([]string, error) {
key := getKey(orgID, ruleID)
if err := m.errors[key]; err != nil {
return nil, err
}
config, err := m.GetNotificationConfig(orgID, ruleID)
if err != nil {
return nil, err
}
var expressionRoutes []*alertmanagertypes.RoutePolicy
if config.UsePolicy {
for _, route := range m.routes {
if route.OrgID == orgID && route.ExpressionKind == alertmanagertypes.PolicyBasedExpression {
expressionRoutes = append(expressionRoutes, route)
}
}
} else {
nameKey := getKey(orgID, ruleID)
if routes, exists := m.routesByName[nameKey]; exists {
expressionRoutes = routes
}
}
var matchedChannels []string
for _, route := range expressionRoutes {
if m.evaluateExpr(route.Expression, set) {
matchedChannels = append(matchedChannels, route.Channels...)
}
}
return matchedChannels, nil
}
func (m *MockNotificationManager) evaluateExpr(expression string, labelSet model.LabelSet) bool {
ruleID, ok := labelSet["ruleId"]
if !ok {
return false
}
if strings.Contains(expression, `ruleId in ["ruleId-OtherAlert", "ruleId-TestingAlert"]`) {
return ruleID == "ruleId-OtherAlert" || ruleID == "ruleId-TestingAlert"
}
if strings.Contains(expression, `ruleId in ["ruleId-HighLatency", "ruleId-HighErrorRate"]`) {
return ruleID == "ruleId-HighLatency" || ruleID == "ruleId-HighErrorRate"
}
if strings.Contains(expression, `ruleId == "ruleId-HighLatency"`) {
return ruleID == "ruleId-HighLatency"
}
return false
}
// Helper methods for testing
func (m *MockNotificationManager) SetMockRoute(orgID string, route *alertmanagertypes.RoutePolicy) {
routeKey := getKey(orgID, route.ID.StringValue())
m.routes[routeKey] = route
nameKey := getKey(orgID, route.Name)
m.routesByName[nameKey] = append(m.routesByName[nameKey], route)
}
func (m *MockNotificationManager) SetMockRouteError(orgID, operation string, err error) {
key := getKey(orgID, operation)
m.errors[key] = err
}
func (m *MockNotificationManager) ClearMockRoutes() {
m.routes = make(map[string]*alertmanagertypes.RoutePolicy)
m.routesByName = make(map[string][]*alertmanagertypes.RoutePolicy)
}
func (m *MockNotificationManager) GetRouteCount() int {
return len(m.routes)
}
func (m *MockNotificationManager) HasRoute(orgID, routeID string) bool {
routeKey := getKey(orgID, routeID)
_, exists := m.routes[routeKey]
return exists
}

View File

@ -0,0 +1,176 @@
package nfroutingstoretest
import (
"context"
"regexp"
"strings"
"github.com/DATA-DOG/go-sqlmock"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/sqlroutingstore"
"github.com/SigNoz/signoz/pkg/sqlstore"
"github.com/SigNoz/signoz/pkg/sqlstore/sqlstoretest"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
)
type MockSQLRouteStore struct {
routeStore alertmanagertypes.RouteStore
mock sqlmock.Sqlmock
}
func NewMockSQLRouteStore() *MockSQLRouteStore {
sqlStore := sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherRegexp)
routeStore := sqlroutingstore.NewStore(sqlStore)
return &MockSQLRouteStore{
routeStore: routeStore,
mock: sqlStore.Mock(),
}
}
func (m *MockSQLRouteStore) Mock() sqlmock.Sqlmock {
return m.mock
}
func (m *MockSQLRouteStore) GetByID(ctx context.Context, orgId string, id string) (*alertmanagertypes.RoutePolicy, error) {
return m.routeStore.GetByID(ctx, orgId, id)
}
func (m *MockSQLRouteStore) Create(ctx context.Context, route *alertmanagertypes.RoutePolicy) error {
return m.routeStore.Create(ctx, route)
}
func (m *MockSQLRouteStore) CreateBatch(ctx context.Context, routes []*alertmanagertypes.RoutePolicy) error {
return m.routeStore.CreateBatch(ctx, routes)
}
func (m *MockSQLRouteStore) Delete(ctx context.Context, orgId string, id string) error {
return m.routeStore.Delete(ctx, orgId, id)
}
func (m *MockSQLRouteStore) GetAllByKind(ctx context.Context, orgID string, kind alertmanagertypes.ExpressionKind) ([]*alertmanagertypes.RoutePolicy, error) {
return m.routeStore.GetAllByKind(ctx, orgID, kind)
}
func (m *MockSQLRouteStore) GetAllByName(ctx context.Context, orgID string, name string) ([]*alertmanagertypes.RoutePolicy, error) {
return m.routeStore.GetAllByName(ctx, orgID, name)
}
func (m *MockSQLRouteStore) DeleteRouteByName(ctx context.Context, orgID string, name string) error {
return m.routeStore.DeleteRouteByName(ctx, orgID, name)
}
func (m *MockSQLRouteStore) ExpectGetByID(orgID, id string, route *alertmanagertypes.RoutePolicy) {
rows := sqlmock.NewRows([]string{"id", "org_id", "name", "expression", "kind", "description", "enabled", "tags", "channels", "created_at", "updated_at", "created_by", "updated_by"})
if route != nil {
rows.AddRow(
route.ID.StringValue(),
route.OrgID,
route.Name,
route.Expression,
route.ExpressionKind.StringValue(),
route.Description,
route.Enabled,
"[]", // tags as JSON
`["`+strings.Join(route.Channels, `","`)+`"]`, // channels as JSON
"0001-01-01T00:00:00Z", // created_at
"0001-01-01T00:00:00Z", // updated_at
"", // created_by
"", // updated_by
)
}
m.mock.ExpectQuery(`SELECT (.+) FROM "route_policy" WHERE \(id = \$1\) AND \(org_id = \$2\)`).
WithArgs(id, orgID).
WillReturnRows(rows)
}
func (m *MockSQLRouteStore) ExpectCreate(route *alertmanagertypes.RoutePolicy) {
expectedPattern := `INSERT INTO "route_policy" \(.+\) VALUES .+`
m.mock.ExpectExec(expectedPattern).
WillReturnResult(sqlmock.NewResult(1, 1))
}
func (m *MockSQLRouteStore) ExpectCreateBatch(routes []*alertmanagertypes.RoutePolicy) {
if len(routes) == 0 {
return
}
// Simplified pattern that should match any INSERT into route_policy
expectedPattern := `INSERT INTO "route_policy" \(.+\) VALUES .+`
m.mock.ExpectExec(expectedPattern).
WillReturnResult(sqlmock.NewResult(1, int64(len(routes))))
}
func (m *MockSQLRouteStore) ExpectDelete(orgID, id string) {
m.mock.ExpectExec(`DELETE FROM "route_policy" AS "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(id = '` + regexp.QuoteMeta(id) + `'\)`).
WillReturnResult(sqlmock.NewResult(0, 1))
}
func (m *MockSQLRouteStore) ExpectGetAllByKindAndOrgID(orgID string, kind alertmanagertypes.ExpressionKind, routes []*alertmanagertypes.RoutePolicy) {
rows := sqlmock.NewRows([]string{"id", "org_id", "name", "expression", "kind", "description", "enabled", "tags", "channels", "created_at", "updated_at", "created_by", "updated_by"})
for _, route := range routes {
if route.OrgID == orgID && route.ExpressionKind == kind {
rows.AddRow(
route.ID.StringValue(),
route.OrgID,
route.Name,
route.Expression,
route.ExpressionKind.StringValue(),
route.Description,
route.Enabled,
"[]", // tags as JSON
`["`+strings.Join(route.Channels, `","`)+`"]`, // channels as JSON
"0001-01-01T00:00:00Z", // created_at
"0001-01-01T00:00:00Z", // updated_at
"", // created_by
"", // updated_by
)
}
}
m.mock.ExpectQuery(`SELECT (.+) FROM "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(kind = '` + regexp.QuoteMeta(kind.StringValue()) + `'\)`).
WillReturnRows(rows)
}
func (m *MockSQLRouteStore) ExpectGetAllByName(orgID, name string, routes []*alertmanagertypes.RoutePolicy) {
rows := sqlmock.NewRows([]string{"id", "org_id", "name", "expression", "kind", "description", "enabled", "tags", "channels", "created_at", "updated_at", "created_by", "updated_by"})
for _, route := range routes {
if route.OrgID == orgID && route.Name == name {
rows.AddRow(
route.ID.StringValue(),
route.OrgID,
route.Name,
route.Expression,
route.ExpressionKind.StringValue(),
route.Description,
route.Enabled,
"[]", // tags as JSON
`["`+strings.Join(route.Channels, `","`)+`"]`, // channels as JSON
"0001-01-01T00:00:00Z", // created_at
"0001-01-01T00:00:00Z", // updated_at
"", // created_by
"", // updated_by
)
}
}
m.mock.ExpectQuery(`SELECT (.+) FROM "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(name = '` + regexp.QuoteMeta(name) + `'\)`).
WillReturnRows(rows)
}
func (m *MockSQLRouteStore) ExpectDeleteRouteByName(orgID, name string) {
m.mock.ExpectExec(`DELETE FROM "route_policy" AS "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(name = '` + regexp.QuoteMeta(name) + `'\)`).
WillReturnResult(sqlmock.NewResult(0, 1))
}
func (m *MockSQLRouteStore) ExpectationsWereMet() error {
return m.mock.ExpectationsWereMet()
}
func (m *MockSQLRouteStore) MatchExpectationsInOrder(match bool) {
m.mock.MatchExpectationsInOrder(match)
}

View File

@ -0,0 +1,93 @@
package sqlroutingstore
import (
"context"
"database/sql"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/sqlstore"
routeTypes "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
)
type store struct {
sqlstore sqlstore.SQLStore
}
func NewStore(sqlstore sqlstore.SQLStore) routeTypes.RouteStore {
return &store{
sqlstore: sqlstore,
}
}
func (store *store) GetByID(ctx context.Context, orgId string, id string) (*routeTypes.RoutePolicy, error) {
route := new(routeTypes.RoutePolicy)
err := store.sqlstore.BunDBCtx(ctx).NewSelect().Model(route).Where("id = ?", id).Where("org_id = ?", orgId).Scan(ctx)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, store.sqlstore.WrapNotFoundErrf(err, errors.CodeNotFound, "routing policy with ID: %s does not exist", id)
}
return nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to fetch routing policy with ID: %s", id)
}
return route, nil
}
func (store *store) Create(ctx context.Context, route *routeTypes.RoutePolicy) error {
_, err := store.sqlstore.BunDBCtx(ctx).NewInsert().Model(route).Exec(ctx)
if err != nil {
return errors.NewInternalf(errors.CodeInternal, "error creating routing policy with ID: %s", route.ID)
}
return nil
}
func (store *store) CreateBatch(ctx context.Context, route []*routeTypes.RoutePolicy) error {
_, err := store.sqlstore.BunDBCtx(ctx).NewInsert().Model(&route).Exec(ctx)
if err != nil {
return errors.NewInternalf(errors.CodeInternal, "error creating routing policies: %v", err)
}
return nil
}
func (store *store) Delete(ctx context.Context, orgId string, id string) error {
_, err := store.sqlstore.BunDBCtx(ctx).NewDelete().Model((*routeTypes.RoutePolicy)(nil)).Where("org_id = ?", orgId).Where("id = ?", id).Exec(ctx)
if err != nil {
return errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to delete routing policy with ID: %s", id)
}
return nil
}
func (store *store) GetAllByKind(ctx context.Context, orgID string, kind routeTypes.ExpressionKind) ([]*routeTypes.RoutePolicy, error) {
var routes []*routeTypes.RoutePolicy
err := store.sqlstore.BunDBCtx(ctx).NewSelect().Model(&routes).Where("org_id = ?", orgID).Where("kind = ?", kind).Scan(ctx)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, errors.NewNotFoundf(errors.CodeNotFound, "no routing policies found for orgID: %s", orgID)
}
return nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to fetch routing policies for orgID: %s", orgID)
}
return routes, nil
}
func (store *store) GetAllByName(ctx context.Context, orgID string, name string) ([]*routeTypes.RoutePolicy, error) {
var routes []*routeTypes.RoutePolicy
err := store.sqlstore.BunDBCtx(ctx).NewSelect().Model(&routes).Where("org_id = ?", orgID).Where("name = ?", name).Scan(ctx)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return routes, errors.NewNotFoundf(errors.CodeNotFound, "no routing policies found for orgID: %s and name: %s", orgID, name)
}
return nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to fetch routing policies for orgID: %s and name: %s", orgID, name)
}
return routes, nil
}
func (store *store) DeleteRouteByName(ctx context.Context, orgID string, name string) error {
_, err := store.sqlstore.BunDBCtx(ctx).NewDelete().Model((*routeTypes.RoutePolicy)(nil)).Where("org_id = ?", orgID).Where("name = ?", name).Exec(ctx)
if err != nil {
return errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to delete routing policies with name: %s", name)
}
return nil
}

View File

@ -2,12 +2,27 @@
package nfmanager package nfmanager
import ( import (
"context"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes" "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/prometheus/common/model"
) )
// NotificationManager defines how alerts should be grouped and configured for notification with multi-tenancy support. // NotificationManager defines how alerts should be grouped and configured for notification.
type NotificationManager interface { type NotificationManager interface {
// Notification Config CRUD
GetNotificationConfig(orgID string, ruleID string) (*alertmanagertypes.NotificationConfig, error) GetNotificationConfig(orgID string, ruleID string) (*alertmanagertypes.NotificationConfig, error)
SetNotificationConfig(orgID string, ruleID string, config *alertmanagertypes.NotificationConfig) error SetNotificationConfig(orgID string, ruleID string, config *alertmanagertypes.NotificationConfig) error
DeleteNotificationConfig(orgID string, ruleID string) error DeleteNotificationConfig(orgID string, ruleID string) error
// Route Policy CRUD
CreateRoutePolicy(ctx context.Context, orgID string, route *alertmanagertypes.RoutePolicy) error
CreateRoutePolicies(ctx context.Context, orgID string, routes []*alertmanagertypes.RoutePolicy) error
GetRoutePolicyByID(ctx context.Context, orgID string, routeID string) (*alertmanagertypes.RoutePolicy, error)
GetAllRoutePolicies(ctx context.Context, orgID string) ([]*alertmanagertypes.RoutePolicy, error)
DeleteRoutePolicy(ctx context.Context, orgID string, routeID string) error
DeleteAllRoutePoliciesByName(ctx context.Context, orgID string, name string) error
// Route matching
Match(ctx context.Context, orgID string, ruleID string, set model.LabelSet) ([]string, error)
} }

View File

@ -2,11 +2,14 @@ package rulebasednotification
import ( import (
"context" "context"
"strings"
"sync" "sync"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager" "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/errors" "github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes" "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/expr-lang/expr"
"github.com/prometheus/common/model"
"github.com/SigNoz/signoz/pkg/factory" "github.com/SigNoz/signoz/pkg/factory"
) )
@ -14,26 +17,28 @@ import (
type provider struct { type provider struct {
settings factory.ScopedProviderSettings settings factory.ScopedProviderSettings
orgToFingerprintToNotificationConfig map[string]map[string]alertmanagertypes.NotificationConfig orgToFingerprintToNotificationConfig map[string]map[string]alertmanagertypes.NotificationConfig
routeStore alertmanagertypes.RouteStore
mutex sync.RWMutex mutex sync.RWMutex
} }
// NewFactory creates a new factory for the rule-based grouping strategy. // NewFactory creates a new factory for the rule-based grouping strategy.
func NewFactory() factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config] { func NewFactory(routeStore alertmanagertypes.RouteStore) factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config] {
return factory.NewProviderFactory( return factory.NewProviderFactory(
factory.MustNewName("rulebased"), factory.MustNewName("rulebased"),
func(ctx context.Context, settings factory.ProviderSettings, config nfmanager.Config) (nfmanager.NotificationManager, error) { func(ctx context.Context, settings factory.ProviderSettings, config nfmanager.Config) (nfmanager.NotificationManager, error) {
return New(ctx, settings, config) return New(ctx, settings, config, routeStore)
}, },
) )
} }
// New creates a new rule-based grouping strategy provider. // New creates a new rule-based grouping strategy provider.
func New(ctx context.Context, providerSettings factory.ProviderSettings, config nfmanager.Config) (nfmanager.NotificationManager, error) { func New(ctx context.Context, providerSettings factory.ProviderSettings, config nfmanager.Config, routeStore alertmanagertypes.RouteStore) (nfmanager.NotificationManager, error) {
settings := factory.NewScopedProviderSettings(providerSettings, "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification") settings := factory.NewScopedProviderSettings(providerSettings, "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification")
return &provider{ return &provider{
settings: settings, settings: settings,
orgToFingerprintToNotificationConfig: make(map[string]map[string]alertmanagertypes.NotificationConfig), orgToFingerprintToNotificationConfig: make(map[string]map[string]alertmanagertypes.NotificationConfig),
routeStore: routeStore,
}, nil }, nil
} }
@ -58,6 +63,8 @@ func (r *provider) GetNotificationConfig(orgID string, ruleID string) (*alertman
for k, v := range config.NotificationGroup { for k, v := range config.NotificationGroup {
notificationConfig.NotificationGroup[k] = v notificationConfig.NotificationGroup[k] = v
} }
notificationConfig.UsePolicy = config.UsePolicy
notificationConfig.GroupByAll = config.GroupByAll
} }
} }
@ -101,3 +108,147 @@ func (r *provider) DeleteNotificationConfig(orgID string, ruleID string) error {
return nil return nil
} }
func (r *provider) CreateRoutePolicy(ctx context.Context, orgID string, route *alertmanagertypes.RoutePolicy) error {
if route == nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policy cannot be nil")
}
err := route.Validate()
if err != nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid route policy: %v", err)
}
return r.routeStore.Create(ctx, route)
}
func (r *provider) CreateRoutePolicies(ctx context.Context, orgID string, routes []*alertmanagertypes.RoutePolicy) error {
if len(routes) == 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policies cannot be empty")
}
for _, route := range routes {
if route == nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policy cannot be nil")
}
if err := route.Validate(); err != nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policy with name %s: %s", route.Name, err.Error())
}
}
return r.routeStore.CreateBatch(ctx, routes)
}
func (r *provider) GetRoutePolicyByID(ctx context.Context, orgID string, routeID string) (*alertmanagertypes.RoutePolicy, error) {
if routeID == "" {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
}
return r.routeStore.GetByID(ctx, orgID, routeID)
}
func (r *provider) GetAllRoutePolicies(ctx context.Context, orgID string) ([]*alertmanagertypes.RoutePolicy, error) {
if orgID == "" {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "orgID cannot be empty")
}
return r.routeStore.GetAllByKind(ctx, orgID, alertmanagertypes.PolicyBasedExpression)
}
func (r *provider) DeleteRoutePolicy(ctx context.Context, orgID string, routeID string) error {
if routeID == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
}
return r.routeStore.Delete(ctx, orgID, routeID)
}
func (r *provider) DeleteAllRoutePoliciesByName(ctx context.Context, orgID string, name string) error {
if orgID == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "orgID cannot be empty")
}
if name == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "name cannot be empty")
}
return r.routeStore.DeleteRouteByName(ctx, orgID, name)
}
func (r *provider) Match(ctx context.Context, orgID string, ruleID string, set model.LabelSet) ([]string, error) {
config, err := r.GetNotificationConfig(orgID, ruleID)
if err != nil {
return nil, errors.NewInternalf(errors.CodeInternal, "error getting notification configuration: %v", err)
}
var expressionRoutes []*alertmanagertypes.RoutePolicy
if config.UsePolicy {
expressionRoutes, err = r.routeStore.GetAllByKind(ctx, orgID, alertmanagertypes.PolicyBasedExpression)
if err != nil {
return []string{}, errors.NewInternalf(errors.CodeInternal, "error getting route policies: %v", err)
}
} else {
expressionRoutes, err = r.routeStore.GetAllByName(ctx, orgID, ruleID)
if err != nil {
return []string{}, errors.NewInternalf(errors.CodeInternal, "error getting route policies: %v", err)
}
}
var matchedChannels []string
if _, ok := set[alertmanagertypes.NoDataLabel]; ok && !config.UsePolicy {
for _, expressionRoute := range expressionRoutes {
matchedChannels = append(matchedChannels, expressionRoute.Channels...)
}
return matchedChannels, nil
}
for _, route := range expressionRoutes {
evaluateExpr, err := r.evaluateExpr(route.Expression, set)
if err != nil {
continue
}
if evaluateExpr {
matchedChannels = append(matchedChannels, route.Channels...)
}
}
return matchedChannels, nil
}
func (r *provider) evaluateExpr(expression string, labelSet model.LabelSet) (bool, error) {
env := make(map[string]interface{})
for k, v := range labelSet {
key := string(k)
value := string(v)
if strings.Contains(key, ".") {
parts := strings.Split(key, ".")
current := env
for i, part := range parts {
if i == len(parts)-1 {
current[part] = value
} else {
if current[part] == nil {
current[part] = make(map[string]interface{})
}
current = current[part].(map[string]interface{})
}
}
} else {
env[key] = value
}
}
program, err := expr.Compile(expression, expr.Env(env))
if err != nil {
return false, errors.NewInternalf(errors.CodeInternal, "error compiling route policy %s: %v", expression, err)
}
output, err := expr.Run(program, env)
if err != nil {
return false, errors.NewInternalf(errors.CodeInternal, "error running route policy %s: %v", expression, err)
}
if boolVal, ok := output.(bool); ok {
return boolVal, nil
}
return false, errors.NewInternalf(errors.CodeInternal, "error in evaluating route policy %s: %v", expression, err)
}

View File

@ -2,18 +2,22 @@ package rulebasednotification
import ( import (
"context" "context"
"github.com/prometheus/common/model"
"sync" "sync"
"testing" "testing"
"time" "time"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager" "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest"
"github.com/SigNoz/signoz/pkg/factory" "github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest" "github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes" "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/prometheus/alertmanager/types" "github.com/SigNoz/signoz/pkg/valuer"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/prometheus/common/model"
) )
func createTestProviderSettings() factory.ProviderSettings { func createTestProviderSettings() factory.ProviderSettings {
@ -21,7 +25,8 @@ func createTestProviderSettings() factory.ProviderSettings {
} }
func TestNewFactory(t *testing.T) { func TestNewFactory(t *testing.T) {
providerFactory := NewFactory() routeStore := nfroutingstoretest.NewMockSQLRouteStore()
providerFactory := NewFactory(routeStore)
assert.NotNil(t, providerFactory) assert.NotNil(t, providerFactory)
assert.Equal(t, "rulebased", providerFactory.Name().String()) assert.Equal(t, "rulebased", providerFactory.Name().String())
} }
@ -31,7 +36,8 @@ func TestNew(t *testing.T) {
providerSettings := createTestProviderSettings() providerSettings := createTestProviderSettings()
config := nfmanager.Config{} config := nfmanager.Config{}
provider, err := New(ctx, providerSettings, config) routeStore := nfroutingstoretest.NewMockSQLRouteStore()
provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err) require.NoError(t, err)
assert.NotNil(t, provider) assert.NotNil(t, provider)
@ -44,7 +50,8 @@ func TestProvider_SetNotificationConfig(t *testing.T) {
providerSettings := createTestProviderSettings() providerSettings := createTestProviderSettings()
config := nfmanager.Config{} config := nfmanager.Config{}
provider, err := New(ctx, providerSettings, config) routeStore := nfroutingstoretest.NewMockSQLRouteStore()
provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err) require.NoError(t, err)
tests := []struct { tests := []struct {
@ -124,11 +131,12 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
providerSettings := createTestProviderSettings() providerSettings := createTestProviderSettings()
config := nfmanager.Config{} config := nfmanager.Config{}
provider, err := New(ctx, providerSettings, config) routeStore := nfroutingstoretest.NewMockSQLRouteStore()
provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err) require.NoError(t, err)
orgID := "test-org" orgID := "test-org"
ruleID := "rule1" ruleID := "ruleId"
customConfig := &alertmanagertypes.NotificationConfig{ customConfig := &alertmanagertypes.NotificationConfig{
Renotify: alertmanagertypes.ReNotificationConfig{ Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 30 * time.Minute, RenotifyInterval: 30 * time.Minute,
@ -144,7 +152,6 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
}, },
} }
// Set config for alert1
err = provider.SetNotificationConfig(orgID, ruleID, customConfig) err = provider.SetNotificationConfig(orgID, ruleID, customConfig)
require.NoError(t, err) require.NoError(t, err)
@ -155,7 +162,7 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
name string name string
orgID string orgID string
ruleID string ruleID string
alert *types.Alert alert *alertmanagertypes.Alert
expectedConfig *alertmanagertypes.NotificationConfig expectedConfig *alertmanagertypes.NotificationConfig
shouldFallback bool shouldFallback bool
}{ }{
@ -165,7 +172,7 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
ruleID: ruleID, ruleID: ruleID,
expectedConfig: &alertmanagertypes.NotificationConfig{ expectedConfig: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{ NotificationGroup: map[model.LabelName]struct{}{
model.LabelName("ruleId"): {}, model.LabelName(ruleID): {},
}, },
Renotify: alertmanagertypes.ReNotificationConfig{ Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 30 * time.Minute, RenotifyInterval: 30 * time.Minute,
@ -182,13 +189,13 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
NotificationGroup: map[model.LabelName]struct{}{ NotificationGroup: map[model.LabelName]struct{}{
model.LabelName("group1"): {}, model.LabelName("group1"): {},
model.LabelName("group2"): {}, model.LabelName("group2"): {},
model.LabelName("ruleId"): {}, model.LabelName(ruleID): {},
}, },
Renotify: alertmanagertypes.ReNotificationConfig{ Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 4 * time.Hour, RenotifyInterval: 4 * time.Hour,
NoDataInterval: 4 * time.Hour, NoDataInterval: 4 * time.Hour,
}, },
}, // Will get fallback from standardnotification },
shouldFallback: false, shouldFallback: false,
}, },
{ {
@ -231,7 +238,8 @@ func TestProvider_ConcurrentAccess(t *testing.T) {
providerSettings := createTestProviderSettings() providerSettings := createTestProviderSettings()
config := nfmanager.Config{} config := nfmanager.Config{}
provider, err := New(ctx, providerSettings, config) routeStore := nfroutingstoretest.NewMockSQLRouteStore()
provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err) require.NoError(t, err)
orgID := "test-org" orgID := "test-org"
@ -268,3 +276,430 @@ func TestProvider_ConcurrentAccess(t *testing.T) {
// Wait for both goroutines to complete // Wait for both goroutines to complete
wg.Wait() wg.Wait()
} }
func TestProvider_EvaluateExpression(t *testing.T) {
provider := &provider{}
tests := []struct {
name string
expression string
labelSet model.LabelSet
expected bool
}{
{
name: "simple equality check - match",
expression: `threshold.name == 'auth' && ruleId == 'rule1'`,
labelSet: model.LabelSet{
"threshold.name": "auth",
"ruleId": "rule1",
},
expected: true,
},
{
name: "simple equality check - no match",
expression: `service == "payment"`,
labelSet: model.LabelSet{
"service": "auth",
"env": "production",
},
expected: false,
},
{
name: "multiple conditions with AND - both match",
expression: `service == "auth" && env == "production"`,
labelSet: model.LabelSet{
"service": "auth",
"env": "production",
},
expected: true,
},
{
name: "multiple conditions with AND - one doesn't match",
expression: `service == "auth" && env == "staging"`,
labelSet: model.LabelSet{
"service": "auth",
"env": "production",
},
expected: false,
},
{
name: "multiple conditions with OR - one matches",
expression: `service == "payment" || env == "production"`,
labelSet: model.LabelSet{
"service": "auth",
"env": "production",
},
expected: true,
},
{
name: "multiple conditions with OR - none match",
expression: `service == "payment" || env == "staging"`,
labelSet: model.LabelSet{
"service": "auth",
"env": "production",
},
expected: false,
},
{
name: "in operator - value in list",
expression: `service in ["auth", "payment", "notification"]`,
labelSet: model.LabelSet{
"service": "auth",
},
expected: true,
},
{
name: "in operator - value not in list",
expression: `service in ["payment", "notification"]`,
labelSet: model.LabelSet{
"service": "auth",
},
expected: false,
},
{
name: "contains operator - substring match",
expression: `host contains "prod"`,
labelSet: model.LabelSet{
"host": "prod-server-01",
},
expected: true,
},
{
name: "contains operator - no substring match",
expression: `host contains "staging"`,
labelSet: model.LabelSet{
"host": "prod-server-01",
},
expected: false,
},
{
name: "complex expression with parentheses",
expression: `(service == "auth" && env == "production") || critical == "true"`,
labelSet: model.LabelSet{
"service": "payment",
"env": "staging",
"critical": "true",
},
expected: true,
},
{
name: "missing label key",
expression: `"missing_key" == "value"`,
labelSet: model.LabelSet{
"service": "auth",
},
expected: false,
},
{
name: "rule-based expression with threshold name and ruleId",
expression: `'threshold.name' == "high-cpu" && ruleId == "rule-123"`,
labelSet: model.LabelSet{
"threshold.name": "high-cpu",
"ruleId": "rule-123",
"service": "auth",
},
expected: false, //no commas
},
{
name: "alertname and ruleId combination",
expression: `alertname == "HighCPUUsage" && ruleId == "cpu-alert-001"`,
labelSet: model.LabelSet{
"alertname": "HighCPUUsage",
"ruleId": "cpu-alert-001",
"severity": "critical",
},
expected: true,
},
{
name: "kubernetes namespace filtering",
expression: `k8s.namespace.name == "auth" && service in ["auth", "payment"]`,
labelSet: model.LabelSet{
"k8s.namespace.name": "auth",
"service": "auth",
"host": "k8s-node-1",
},
expected: true,
},
{
name: "migration expression format from SQL migration",
expression: `threshold.name == "HighCPUUsage" && ruleId == "rule-uuid-123"`,
labelSet: model.LabelSet{
"threshold.name": "HighCPUUsage",
"ruleId": "rule-uuid-123",
"severity": "warning",
},
expected: true,
},
{
name: "case sensitive matching",
expression: `service == "Auth"`, // capital A
labelSet: model.LabelSet{
"service": "auth", // lowercase a
},
expected: false,
},
{
name: "numeric comparison as strings",
expression: `port == "8080"`,
labelSet: model.LabelSet{
"port": "8080",
},
expected: true,
},
{
name: "quoted string with special characters",
expression: `service == "auth-service-v2"`,
labelSet: model.LabelSet{
"service": "auth-service-v2",
},
expected: true,
},
{
name: "boolean operators precedence",
expression: `service == "auth" && env == "prod" || critical == "true"`,
labelSet: model.LabelSet{
"service": "payment",
"env": "staging",
"critical": "true",
},
expected: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := provider.evaluateExpr(tt.expression, tt.labelSet)
assert.NoError(t, err)
assert.Equal(t, tt.expected, result, "Expression: %s", tt.expression)
})
}
}
func TestProvider_DeleteRoute(t *testing.T) {
ctx := context.Background()
providerSettings := createTestProviderSettings()
config := nfmanager.Config{}
tests := []struct {
name string
orgID string
routeID string
wantErr bool
}{
{
name: "valid parameters",
orgID: "test-org-123",
routeID: "route-uuid-456",
wantErr: false,
},
{
name: "empty routeID",
orgID: "test-org-123",
routeID: "",
wantErr: true,
},
{
name: "valid orgID with valid routeID",
orgID: "another-org",
routeID: "another-route-id",
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err)
if !tt.wantErr {
routeStore.ExpectDelete(tt.orgID, tt.routeID)
}
err = provider.DeleteRoutePolicy(ctx, tt.orgID, tt.routeID)
if tt.wantErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.NoError(t, routeStore.ExpectationsWereMet())
}
})
}
}
func TestProvider_CreateRoute(t *testing.T) {
ctx := context.Background()
providerSettings := createTestProviderSettings()
config := nfmanager.Config{}
tests := []struct {
name string
orgID string
route *alertmanagertypes.RoutePolicy
wantErr bool
}{
{
name: "valid route",
orgID: "test-org-123",
route: &alertmanagertypes.RoutePolicy{
Identifiable: types.Identifiable{ID: valuer.GenerateUUID()},
Expression: `service == "auth"`,
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
Name: "auth-service-route",
Description: "Route for auth service alerts",
Enabled: true,
OrgID: "test-org-123",
Channels: []string{"slack-channel"},
},
wantErr: false,
},
{
name: "nil route",
orgID: "test-org-123",
route: nil,
wantErr: true,
},
{
name: "invalid route - missing expression",
orgID: "test-org-123",
route: &alertmanagertypes.RoutePolicy{
Expression: "", // empty expression
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
Name: "invalid-route",
OrgID: "test-org-123",
},
wantErr: true,
},
{
name: "invalid route - missing name",
orgID: "test-org-123",
route: &alertmanagertypes.RoutePolicy{
Expression: `service == "auth"`,
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
Name: "", // empty name
OrgID: "test-org-123",
},
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err)
if !tt.wantErr && tt.route != nil {
routeStore.ExpectCreate(tt.route)
}
err = provider.CreateRoutePolicy(ctx, tt.orgID, tt.route)
if tt.wantErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.NoError(t, routeStore.ExpectationsWereMet())
}
})
}
}
func TestProvider_CreateRoutes(t *testing.T) {
ctx := context.Background()
providerSettings := createTestProviderSettings()
config := nfmanager.Config{}
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
provider, err := New(ctx, providerSettings, config, routeStore)
require.NoError(t, err)
validRoute1 := &alertmanagertypes.RoutePolicy{
Expression: `service == "auth"`,
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
Name: "auth-route",
Description: "Auth service route",
Enabled: true,
OrgID: "test-org",
Channels: []string{"slack-auth"},
}
validRoute2 := &alertmanagertypes.RoutePolicy{
Expression: `service == "payment"`,
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
Name: "payment-route",
Description: "Payment service route",
Enabled: true,
OrgID: "test-org",
Channels: []string{"slack-payment"},
}
invalidRoute := &alertmanagertypes.RoutePolicy{
Expression: "", // empty expression - invalid
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
Name: "invalid-route",
OrgID: "test-org",
}
tests := []struct {
name string
orgID string
routes []*alertmanagertypes.RoutePolicy
wantErr bool
}{
{
name: "valid routes",
orgID: "test-org",
routes: []*alertmanagertypes.RoutePolicy{validRoute1, validRoute2},
wantErr: false,
},
{
name: "empty routes list",
orgID: "test-org",
routes: []*alertmanagertypes.RoutePolicy{},
wantErr: true,
},
{
name: "nil routes list",
orgID: "test-org",
routes: nil,
wantErr: true,
},
{
name: "routes with nil route",
orgID: "test-org",
routes: []*alertmanagertypes.RoutePolicy{validRoute1, nil},
wantErr: true,
},
{
name: "routes with invalid route",
orgID: "test-org",
routes: []*alertmanagertypes.RoutePolicy{validRoute1, invalidRoute},
wantErr: true,
},
{
name: "single valid route",
orgID: "test-org",
routes: []*alertmanagertypes.RoutePolicy{validRoute1},
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if !tt.wantErr && len(tt.routes) > 0 {
routeStore.ExpectCreateBatch(tt.routes)
}
err := provider.CreateRoutePolicies(ctx, tt.orgID, tt.routes)
if tt.wantErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.NoError(t, routeStore.ExpectationsWereMet())
}
})
}
}

View File

@ -4,6 +4,9 @@ import (
"context" "context"
"sync" "sync"
"github.com/prometheus/alertmanager/featurecontrol"
"github.com/prometheus/alertmanager/matcher/compat"
"github.com/SigNoz/signoz/pkg/alertmanager/alertmanagerserver" "github.com/SigNoz/signoz/pkg/alertmanager/alertmanagerserver"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager" "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/errors" "github.com/SigNoz/signoz/pkg/errors"
@ -61,6 +64,7 @@ func New(
} }
func (service *Service) SyncServers(ctx context.Context) error { func (service *Service) SyncServers(ctx context.Context) error {
compat.InitFromFlags(service.settings.Logger(), featurecontrol.NoopFlags{})
orgs, err := service.orgGetter.ListByOwnedKeyRange(ctx) orgs, err := service.orgGetter.ListByOwnedKeyRange(ctx)
if err != nil { if err != nil {
return err return err
@ -142,7 +146,7 @@ func (service *Service) TestReceiver(ctx context.Context, orgID string, receiver
return server.TestReceiver(ctx, receiver) return server.TestReceiver(ctx, receiver)
} }
func (service *Service) TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error { func (service *Service) TestAlert(ctx context.Context, orgID string, receiversMap map[*alertmanagertypes.PostableAlert][]string, config *alertmanagertypes.NotificationConfig) error {
service.serversMtx.RLock() service.serversMtx.RLock()
defer service.serversMtx.RUnlock() defer service.serversMtx.RUnlock()
@ -151,7 +155,7 @@ func (service *Service) TestAlert(ctx context.Context, orgID string, alert *aler
return err return err
} }
return server.TestAlert(ctx, alert, receivers) return server.TestAlert(ctx, receiversMap, config)
} }
func (service *Service) Stop(ctx context.Context) error { func (service *Service) Stop(ctx context.Context) error {

View File

@ -2,8 +2,12 @@ package signozalertmanager
import ( import (
"context" "context"
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
"github.com/prometheus/common/model"
"time" "time"
amConfig "github.com/prometheus/alertmanager/config"
"github.com/SigNoz/signoz/pkg/alertmanager" "github.com/SigNoz/signoz/pkg/alertmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/alertmanagerstore/sqlalertmanagerstore" "github.com/SigNoz/signoz/pkg/alertmanager/alertmanagerstore/sqlalertmanagerstore"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager" "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
@ -11,7 +15,9 @@ import (
"github.com/SigNoz/signoz/pkg/factory" "github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/modules/organization" "github.com/SigNoz/signoz/pkg/modules/organization"
"github.com/SigNoz/signoz/pkg/sqlstore" "github.com/SigNoz/signoz/pkg/sqlstore"
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes" "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/SigNoz/signoz/pkg/types/authtypes"
"github.com/SigNoz/signoz/pkg/valuer" "github.com/SigNoz/signoz/pkg/valuer"
) )
@ -94,8 +100,29 @@ func (provider *provider) TestReceiver(ctx context.Context, orgID string, receiv
return provider.service.TestReceiver(ctx, orgID, receiver) return provider.service.TestReceiver(ctx, orgID, receiver)
} }
func (provider *provider) TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error { func (provider *provider) TestAlert(ctx context.Context, orgID string, ruleID string, receiversMap map[*alertmanagertypes.PostableAlert][]string) error {
return provider.service.TestAlert(ctx, orgID, alert, receivers) config, err := provider.notificationManager.GetNotificationConfig(orgID, ruleID)
if err != nil {
return err
}
if config.UsePolicy {
for alert := range receiversMap {
set := make(model.LabelSet)
for k, v := range alert.Labels {
set[model.LabelName(k)] = model.LabelValue(v)
}
match, err := provider.notificationManager.Match(ctx, orgID, alert.Labels[labels.AlertRuleIdLabel], set)
if err != nil {
return err
}
if len(match) == 0 {
delete(receiversMap, alert)
} else {
receiversMap[alert] = match
}
}
}
return provider.service.TestAlert(ctx, orgID, receiversMap, config)
} }
func (provider *provider) ListChannels(ctx context.Context, orgID string) ([]*alertmanagertypes.Channel, error) { func (provider *provider) ListChannels(ctx context.Context, orgID string) ([]*alertmanagertypes.Channel, error) {
@ -211,3 +238,316 @@ func (provider *provider) DeleteNotificationConfig(ctx context.Context, orgID va
} }
return nil return nil
} }
func (provider *provider) CreateRoutePolicy(ctx context.Context, routeRequest *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error) {
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
return nil, err
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
return nil, err
}
if err := routeRequest.Validate(); err != nil {
return nil, err
}
route := alertmanagertypes.RoutePolicy{
Expression: routeRequest.Expression,
ExpressionKind: routeRequest.ExpressionKind,
Name: routeRequest.Name,
Description: routeRequest.Description,
Enabled: true,
Tags: routeRequest.Tags,
Channels: routeRequest.Channels,
OrgID: claims.OrgID,
Identifiable: types.Identifiable{
ID: valuer.GenerateUUID(),
},
UserAuditable: types.UserAuditable{
CreatedBy: claims.Email,
UpdatedBy: claims.Email,
},
TimeAuditable: types.TimeAuditable{
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
},
}
err = provider.notificationManager.CreateRoutePolicy(ctx, orgID.String(), &route)
if err != nil {
return nil, err
}
return &alertmanagertypes.GettableRoutePolicy{
PostableRoutePolicy: *routeRequest,
ID: route.ID.StringValue(),
CreatedAt: &route.CreatedAt,
UpdatedAt: &route.UpdatedAt,
CreatedBy: &route.CreatedBy,
UpdatedBy: &route.UpdatedBy,
}, nil
}
func (provider *provider) CreateRoutePolicies(ctx context.Context, routeRequests []*alertmanagertypes.PostableRoutePolicy) ([]*alertmanagertypes.GettableRoutePolicy, error) {
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
return nil, err
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
return nil, err
}
if len(routeRequests) == 0 {
return []*alertmanagertypes.GettableRoutePolicy{}, nil
}
routes := make([]*alertmanagertypes.RoutePolicy, 0, len(routeRequests))
results := make([]*alertmanagertypes.GettableRoutePolicy, 0, len(routeRequests))
for _, routeRequest := range routeRequests {
if err := routeRequest.Validate(); err != nil {
return nil, err
}
route := &alertmanagertypes.RoutePolicy{
Expression: routeRequest.Expression,
ExpressionKind: routeRequest.ExpressionKind,
Name: routeRequest.Name,
Description: routeRequest.Description,
Enabled: true,
Tags: routeRequest.Tags,
Channels: routeRequest.Channels,
OrgID: claims.OrgID,
Identifiable: types.Identifiable{
ID: valuer.GenerateUUID(),
},
UserAuditable: types.UserAuditable{
CreatedBy: claims.Email,
UpdatedBy: claims.Email,
},
TimeAuditable: types.TimeAuditable{
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
},
}
routes = append(routes, route)
results = append(results, &alertmanagertypes.GettableRoutePolicy{
PostableRoutePolicy: *routeRequest,
ID: route.ID.StringValue(),
CreatedAt: &route.CreatedAt,
UpdatedAt: &route.UpdatedAt,
CreatedBy: &route.CreatedBy,
UpdatedBy: &route.UpdatedBy,
})
}
err = provider.notificationManager.CreateRoutePolicies(ctx, orgID.String(), routes)
if err != nil {
return nil, err
}
return results, nil
}
func (provider *provider) GetRoutePolicyByID(ctx context.Context, routeID string) (*alertmanagertypes.GettableRoutePolicy, error) {
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
return nil, err
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
return nil, err
}
route, err := provider.notificationManager.GetRoutePolicyByID(ctx, orgID.String(), routeID)
if err != nil {
return nil, err
}
return &alertmanagertypes.GettableRoutePolicy{
PostableRoutePolicy: alertmanagertypes.PostableRoutePolicy{
Expression: route.Expression,
ExpressionKind: route.ExpressionKind,
Channels: route.Channels,
Name: route.Name,
Description: route.Description,
Tags: route.Tags,
},
ID: route.ID.StringValue(),
CreatedAt: &route.CreatedAt,
UpdatedAt: &route.UpdatedAt,
CreatedBy: &route.CreatedBy,
UpdatedBy: &route.UpdatedBy,
}, nil
}
func (provider *provider) GetAllRoutePolicies(ctx context.Context) ([]*alertmanagertypes.GettableRoutePolicy, error) {
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
return nil, err
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
return nil, err
}
routes, err := provider.notificationManager.GetAllRoutePolicies(ctx, orgID.String())
if err != nil {
return nil, err
}
results := make([]*alertmanagertypes.GettableRoutePolicy, 0, len(routes))
for _, route := range routes {
results = append(results, &alertmanagertypes.GettableRoutePolicy{
PostableRoutePolicy: alertmanagertypes.PostableRoutePolicy{
Expression: route.Expression,
ExpressionKind: route.ExpressionKind,
Channels: route.Channels,
Name: route.Name,
Description: route.Description,
Tags: route.Tags,
},
ID: route.ID.StringValue(),
CreatedAt: &route.CreatedAt,
UpdatedAt: &route.UpdatedAt,
CreatedBy: &route.CreatedBy,
UpdatedBy: &route.UpdatedBy,
})
}
return results, nil
}
func (provider *provider) UpdateRoutePolicyByID(ctx context.Context, routeID string, route *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error) {
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
return nil, errors.NewInvalidInputf(errors.CodeUnauthenticated, "invalid claims: %v", err)
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
return nil, err
}
if routeID == "" {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
}
if route == nil {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "route cannot be nil")
}
if err := route.Validate(); err != nil {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid route: %v", err)
}
existingRoute, err := provider.notificationManager.GetRoutePolicyByID(ctx, claims.OrgID, routeID)
if err != nil {
return nil, errors.NewInvalidInputf(errors.CodeNotFound, "route not found: %v", err)
}
updatedRoute := &alertmanagertypes.RoutePolicy{
Expression: route.Expression,
ExpressionKind: route.ExpressionKind,
Name: route.Name,
Description: route.Description,
Tags: route.Tags,
Channels: route.Channels,
OrgID: claims.OrgID,
Identifiable: existingRoute.Identifiable,
UserAuditable: types.UserAuditable{
CreatedBy: existingRoute.CreatedBy,
UpdatedBy: claims.Email,
},
TimeAuditable: types.TimeAuditable{
CreatedAt: existingRoute.CreatedAt,
UpdatedAt: time.Now(),
},
}
err = provider.notificationManager.DeleteRoutePolicy(ctx, orgID.String(), routeID)
if err != nil {
return nil, errors.NewInvalidInputf(errors.CodeInternal, "error deleting existing route: %v", err)
}
err = provider.notificationManager.CreateRoutePolicy(ctx, orgID.String(), updatedRoute)
if err != nil {
return nil, err
}
return &alertmanagertypes.GettableRoutePolicy{
PostableRoutePolicy: *route,
ID: updatedRoute.ID.StringValue(),
CreatedAt: &updatedRoute.CreatedAt,
UpdatedAt: &updatedRoute.UpdatedAt,
CreatedBy: &updatedRoute.CreatedBy,
UpdatedBy: &updatedRoute.UpdatedBy,
}, nil
}
func (provider *provider) DeleteRoutePolicyByID(ctx context.Context, routeID string) error {
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
return errors.NewInvalidInputf(errors.CodeUnauthenticated, "invalid claims: %v", err)
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
return err
}
if routeID == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
}
return provider.notificationManager.DeleteRoutePolicy(ctx, orgID.String(), routeID)
}
func (provider *provider) CreateInhibitRules(ctx context.Context, orgID valuer.UUID, rules []amConfig.InhibitRule) error {
config, err := provider.configStore.Get(ctx, orgID.String())
if err != nil {
return err
}
if err := config.AddInhibitRules(rules); err != nil {
return err
}
return provider.configStore.Set(ctx, config)
}
func (provider *provider) DeleteAllRoutePoliciesByRuleId(ctx context.Context, names string) error {
claims, err := authtypes.ClaimsFromContext(ctx)
if err != nil {
return errors.NewInvalidInputf(errors.CodeUnauthenticated, "invalid claims: %v", err)
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
return err
}
return provider.notificationManager.DeleteAllRoutePoliciesByName(ctx, orgID.String(), names)
}
func (provider *provider) UpdateAllRoutePoliciesByRuleId(ctx context.Context, names string, routes []*alertmanagertypes.PostableRoutePolicy) error {
err := provider.DeleteAllRoutePoliciesByRuleId(ctx, names)
if err != nil {
return errors.NewInvalidInputf(errors.CodeInternal, "error deleting the routes: %v", err)
}
_, err = provider.CreateRoutePolicies(ctx, routes)
return err
}
func (provider *provider) DeleteAllInhibitRulesByRuleId(ctx context.Context, orgID valuer.UUID, ruleId string) error {
config, err := provider.configStore.Get(ctx, orgID.String())
if err != nil {
return err
}
if err := config.DeleteRuleIDInhibitor(ruleId); err != nil {
return err
}
return provider.configStore.Set(ctx, config)
}

View File

@ -10,7 +10,6 @@ import (
"fmt" "fmt"
"github.com/SigNoz/signoz/pkg/modules/thirdpartyapi" "github.com/SigNoz/signoz/pkg/modules/thirdpartyapi"
//qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"io" "io"
"math" "math"
"net/http" "net/http"
@ -492,6 +491,12 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router, am *middleware.AuthZ) {
router.HandleFunc("/api/v1/channels", am.EditAccess(aH.AlertmanagerAPI.CreateChannel)).Methods(http.MethodPost) router.HandleFunc("/api/v1/channels", am.EditAccess(aH.AlertmanagerAPI.CreateChannel)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/testChannel", am.EditAccess(aH.AlertmanagerAPI.TestReceiver)).Methods(http.MethodPost) router.HandleFunc("/api/v1/testChannel", am.EditAccess(aH.AlertmanagerAPI.TestReceiver)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/route_policies", am.ViewAccess(aH.AlertmanagerAPI.GetAllRoutePolicies)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/route_policies/{id}", am.ViewAccess(aH.AlertmanagerAPI.GetRoutePolicyByID)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/route_policies", am.AdminAccess(aH.AlertmanagerAPI.CreateRoutePolicy)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/route_policies/{id}", am.AdminAccess(aH.AlertmanagerAPI.DeleteRoutePolicyByID)).Methods(http.MethodDelete)
router.HandleFunc("/api/v1/route_policies/{id}", am.AdminAccess(aH.AlertmanagerAPI.UpdateRoutePolicy)).Methods(http.MethodPut)
router.HandleFunc("/api/v1/alerts", am.ViewAccess(aH.AlertmanagerAPI.GetAlerts)).Methods(http.MethodGet) router.HandleFunc("/api/v1/alerts", am.ViewAccess(aH.AlertmanagerAPI.GetAlerts)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/rules", am.ViewAccess(aH.listRules)).Methods(http.MethodGet) router.HandleFunc("/api/v1/rules", am.ViewAccess(aH.listRules)).Methods(http.MethodGet)
@ -616,6 +621,7 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router, am *middleware.AuthZ) {
// Export // Export
router.HandleFunc("/api/v1/export_raw_data", am.ViewAccess(aH.Signoz.Handlers.RawDataExport.ExportRawData)).Methods(http.MethodGet) router.HandleFunc("/api/v1/export_raw_data", am.ViewAccess(aH.Signoz.Handlers.RawDataExport.ExportRawData)).Methods(http.MethodGet)
} }
func (ah *APIHandler) MetricExplorerRoutes(router *mux.Router, am *middleware.AuthZ) { func (ah *APIHandler) MetricExplorerRoutes(router *mux.Router, am *middleware.AuthZ) {

View File

@ -4,13 +4,11 @@ import (
"context" "context"
"fmt" "fmt"
"log/slog" "log/slog"
"math"
"net/url" "net/url"
"sync" "sync"
"time" "time"
"github.com/SigNoz/signoz/pkg/errors" "github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/query-service/converter"
"github.com/SigNoz/signoz/pkg/query-service/interfaces" "github.com/SigNoz/signoz/pkg/query-service/interfaces"
"github.com/SigNoz/signoz/pkg/query-service/model" "github.com/SigNoz/signoz/pkg/query-service/model"
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3" v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
@ -167,22 +165,6 @@ func NewBaseRule(id string, orgID valuer.UUID, p *ruletypes.PostableRule, reader
return baseRule, nil return baseRule, nil
} }
func (r *BaseRule) targetVal() float64 {
if r.ruleCondition == nil || r.ruleCondition.Target == nil {
return 0
}
// get the converter for the target unit
unitConverter := converter.FromUnit(converter.Unit(r.ruleCondition.TargetUnit))
// convert the target value to the y-axis unit
value := unitConverter.Convert(converter.Value{
F: *r.ruleCondition.Target,
U: converter.Unit(r.ruleCondition.TargetUnit),
}, converter.Unit(r.Unit()))
return value.F
}
func (r *BaseRule) matchType() ruletypes.MatchType { func (r *BaseRule) matchType() ruletypes.MatchType {
if r.ruleCondition == nil { if r.ruleCondition == nil {
return ruletypes.AtleastOnce return ruletypes.AtleastOnce
@ -221,10 +203,6 @@ func (r *BaseRule) HoldDuration() time.Duration {
return r.holdDuration return r.holdDuration
} }
func (r *BaseRule) TargetVal() float64 {
return r.targetVal()
}
func (r *ThresholdRule) hostFromSource() string { func (r *ThresholdRule) hostFromSource() string {
parsedUrl, err := url.Parse(r.source) parsedUrl, err := url.Parse(r.source)
if err != nil { if err != nil {
@ -380,232 +358,6 @@ func (r *BaseRule) ForEachActiveAlert(f func(*ruletypes.Alert)) {
} }
} }
func (r *BaseRule) ShouldAlert(series v3.Series) (ruletypes.Sample, bool) {
var alertSmpl ruletypes.Sample
var shouldAlert bool
var lbls qslabels.Labels
for name, value := range series.Labels {
lbls = append(lbls, qslabels.Label{Name: name, Value: value})
}
series.Points = removeGroupinSetPoints(series)
// nothing to evaluate
if len(series.Points) == 0 {
return alertSmpl, false
}
if r.ruleCondition.RequireMinPoints {
if len(series.Points) < r.ruleCondition.RequiredNumPoints {
zap.L().Info("not enough data points to evaluate series, skipping", zap.String("ruleid", r.ID()), zap.Int("numPoints", len(series.Points)), zap.Int("requiredPoints", r.ruleCondition.RequiredNumPoints))
return alertSmpl, false
}
}
switch r.matchType() {
case ruletypes.AtleastOnce:
// If any sample matches the condition, the rule is firing.
if r.compareOp() == ruletypes.ValueIsAbove {
for _, smpl := range series.Points {
if smpl.Value > r.targetVal() {
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
}
}
} else if r.compareOp() == ruletypes.ValueIsBelow {
for _, smpl := range series.Points {
if smpl.Value < r.targetVal() {
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
}
}
} else if r.compareOp() == ruletypes.ValueIsEq {
for _, smpl := range series.Points {
if smpl.Value == r.targetVal() {
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
}
}
} else if r.compareOp() == ruletypes.ValueIsNotEq {
for _, smpl := range series.Points {
if smpl.Value != r.targetVal() {
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
}
}
} else if r.compareOp() == ruletypes.ValueOutsideBounds {
for _, smpl := range series.Points {
if math.Abs(smpl.Value) >= r.targetVal() {
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true
break
}
}
}
case ruletypes.AllTheTimes:
// If all samples match the condition, the rule is firing.
shouldAlert = true
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: r.targetVal()}, Metric: lbls}
if r.compareOp() == ruletypes.ValueIsAbove {
for _, smpl := range series.Points {
if smpl.Value <= r.targetVal() {
shouldAlert = false
break
}
}
// use min value from the series
if shouldAlert {
var minValue float64 = math.Inf(1)
for _, smpl := range series.Points {
if smpl.Value < minValue {
minValue = smpl.Value
}
}
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: minValue}, Metric: lbls}
}
} else if r.compareOp() == ruletypes.ValueIsBelow {
for _, smpl := range series.Points {
if smpl.Value >= r.targetVal() {
shouldAlert = false
break
}
}
if shouldAlert {
var maxValue float64 = math.Inf(-1)
for _, smpl := range series.Points {
if smpl.Value > maxValue {
maxValue = smpl.Value
}
}
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: maxValue}, Metric: lbls}
}
} else if r.compareOp() == ruletypes.ValueIsEq {
for _, smpl := range series.Points {
if smpl.Value != r.targetVal() {
shouldAlert = false
break
}
}
} else if r.compareOp() == ruletypes.ValueIsNotEq {
for _, smpl := range series.Points {
if smpl.Value == r.targetVal() {
shouldAlert = false
break
}
}
// use any non-inf or nan value from the series
if shouldAlert {
for _, smpl := range series.Points {
if !math.IsInf(smpl.Value, 0) && !math.IsNaN(smpl.Value) {
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
break
}
}
}
} else if r.compareOp() == ruletypes.ValueOutsideBounds {
for _, smpl := range series.Points {
if math.Abs(smpl.Value) < r.targetVal() {
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
shouldAlert = false
break
}
}
}
case ruletypes.OnAverage:
// If the average of all samples matches the condition, the rule is firing.
var sum, count float64
for _, smpl := range series.Points {
if math.IsNaN(smpl.Value) || math.IsInf(smpl.Value, 0) {
continue
}
sum += smpl.Value
count++
}
avg := sum / count
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: avg}, Metric: lbls}
if r.compareOp() == ruletypes.ValueIsAbove {
if avg > r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsBelow {
if avg < r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsEq {
if avg == r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsNotEq {
if avg != r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueOutsideBounds {
if math.Abs(avg) >= r.targetVal() {
shouldAlert = true
}
}
case ruletypes.InTotal:
// If the sum of all samples matches the condition, the rule is firing.
var sum float64
for _, smpl := range series.Points {
if math.IsNaN(smpl.Value) || math.IsInf(smpl.Value, 0) {
continue
}
sum += smpl.Value
}
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: sum}, Metric: lbls}
if r.compareOp() == ruletypes.ValueIsAbove {
if sum > r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsBelow {
if sum < r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsEq {
if sum == r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsNotEq {
if sum != r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueOutsideBounds {
if math.Abs(sum) >= r.targetVal() {
shouldAlert = true
}
}
case ruletypes.Last:
// If the last sample matches the condition, the rule is firing.
shouldAlert = false
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: series.Points[len(series.Points)-1].Value}, Metric: lbls}
if r.compareOp() == ruletypes.ValueIsAbove {
if series.Points[len(series.Points)-1].Value > r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsBelow {
if series.Points[len(series.Points)-1].Value < r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsEq {
if series.Points[len(series.Points)-1].Value == r.targetVal() {
shouldAlert = true
}
} else if r.compareOp() == ruletypes.ValueIsNotEq {
if series.Points[len(series.Points)-1].Value != r.targetVal() {
shouldAlert = true
}
}
}
return alertSmpl, shouldAlert
}
func (r *BaseRule) RecordRuleStateHistory(ctx context.Context, prevState, currentState model.AlertState, itemsToAdd []model.RuleStateHistory) error { func (r *BaseRule) RecordRuleStateHistory(ctx context.Context, prevState, currentState model.AlertState, itemsToAdd []model.RuleStateHistory) error {
zap.L().Debug("recording rule state history", zap.String("ruleid", r.ID()), zap.Any("prevState", prevState), zap.Any("currentState", currentState), zap.Any("itemsToAdd", itemsToAdd)) zap.L().Debug("recording rule state history", zap.String("ruleid", r.ID()), zap.Any("prevState", prevState), zap.Any("currentState", currentState), zap.Any("itemsToAdd", itemsToAdd))
revisedItemsToAdd := map[uint64]model.RuleStateHistory{} revisedItemsToAdd := map[uint64]model.RuleStateHistory{}

View File

@ -1,6 +1,7 @@
package rules package rules
import ( import (
"github.com/stretchr/testify/require"
"testing" "testing"
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3" v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
@ -22,6 +23,15 @@ func TestBaseRule_RequireMinPoints(t *testing.T) {
RequireMinPoints: true, RequireMinPoints: true,
RequiredNumPoints: 4, RequiredNumPoints: 4,
}, },
Threshold: ruletypes.BasicRuleThresholds{
{
Name: "test-threshold",
TargetValue: &threshold,
CompareOp: ruletypes.ValueIsAbove,
MatchType: ruletypes.AtleastOnce,
},
},
}, },
series: &v3.Series{ series: &v3.Series{
Points: []v3.Point{ Points: []v3.Point{
@ -41,6 +51,14 @@ func TestBaseRule_RequireMinPoints(t *testing.T) {
MatchType: ruletypes.AtleastOnce, MatchType: ruletypes.AtleastOnce,
Target: &threshold, Target: &threshold,
}, },
Threshold: ruletypes.BasicRuleThresholds{
{
Name: "test-threshold",
TargetValue: &threshold,
CompareOp: ruletypes.ValueIsAbove,
MatchType: ruletypes.AtleastOnce,
},
},
}, },
series: &v3.Series{ series: &v3.Series{
Points: []v3.Point{ Points: []v3.Point{
@ -56,10 +74,9 @@ func TestBaseRule_RequireMinPoints(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
_, shouldAlert := test.rule.ShouldAlert(*test.series) _, err := test.rule.Threshold.ShouldAlert(*test.series, "")
if shouldAlert != test.shouldAlert { require.NoError(t, err)
t.Errorf("expected shouldAlert to be %v, got %v", test.shouldAlert, shouldAlert) require.Equal(t, len(test.series.Points) >= test.rule.ruleCondition.RequiredNumPoints, test.shouldAlert)
}
}) })
} }
} }

View File

@ -4,6 +4,7 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
"log/slog" "log/slog"
"sort" "sort"
"strings" "strings"
@ -350,39 +351,35 @@ func (m *Manager) EditRule(ctx context.Context, ruleStr string, id valuer.UUID)
existingRule.Data = ruleStr existingRule.Data = ruleStr
return m.ruleStore.EditRule(ctx, existingRule, func(ctx context.Context) error { return m.ruleStore.EditRule(ctx, existingRule, func(ctx context.Context) error {
cfg, err := m.alertmanager.GetConfig(ctx, claims.OrgID)
if err != nil {
return err
}
var preferredChannels []string
if len(parsedRule.PreferredChannels) == 0 {
channels, err := m.alertmanager.ListChannels(ctx, claims.OrgID)
if err != nil {
return err
}
for _, channel := range channels {
preferredChannels = append(preferredChannels, channel.Name)
}
} else {
preferredChannels = parsedRule.PreferredChannels
}
err = cfg.UpdateRuleIDMatcher(id.StringValue(), preferredChannels)
if err != nil {
return err
}
if parsedRule.NotificationSettings != nil { if parsedRule.NotificationSettings != nil {
config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig() config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig()
err = m.alertmanager.SetNotificationConfig(ctx, orgID, existingRule.ID.StringValue(), &config) err = m.alertmanager.SetNotificationConfig(ctx, orgID, id.StringValue(), &config)
if err != nil { if err != nil {
return err return err
} }
} if !parsedRule.NotificationSettings.UsePolicy {
request, err := parsedRule.GetRuleRouteRequest(id.StringValue())
if err != nil {
return err
}
err = m.alertmanager.UpdateAllRoutePoliciesByRuleId(ctx, id.StringValue(), request)
if err != nil {
return err
}
err = m.alertmanager.DeleteAllInhibitRulesByRuleId(ctx, orgID, id.StringValue())
if err != nil {
return err
}
err = m.alertmanager.SetConfig(ctx, cfg) inhibitRules, err := parsedRule.GetInhibitRules(id.StringValue())
if err != nil { if err != nil {
return err return err
}
err = m.alertmanager.CreateInhibitRules(ctx, orgID, inhibitRules)
if err != nil {
return err
}
}
} }
err = m.syncRuleStateWithTask(ctx, orgID, prepareTaskName(existingRule.ID.StringValue()), &parsedRule) err = m.syncRuleStateWithTask(ctx, orgID, prepareTaskName(existingRule.ID.StringValue()), &parsedRule)
if err != nil { if err != nil {
@ -488,6 +485,19 @@ func (m *Manager) DeleteRule(ctx context.Context, idStr string) error {
} }
err = m.alertmanager.DeleteNotificationConfig(ctx, orgID, id.String()) err = m.alertmanager.DeleteNotificationConfig(ctx, orgID, id.String())
if err != nil {
return err
}
err = m.alertmanager.DeleteAllRoutePoliciesByRuleId(ctx, id.String())
if err != nil {
return err
}
err = m.alertmanager.DeleteAllInhibitRulesByRuleId(ctx, orgID, id.String())
if err != nil {
return err
}
taskName := prepareTaskName(id.StringValue()) taskName := prepareTaskName(id.StringValue())
m.deleteTask(taskName) m.deleteTask(taskName)
@ -548,41 +558,30 @@ func (m *Manager) CreateRule(ctx context.Context, ruleStr string) (*ruletypes.Ge
} }
id, err := m.ruleStore.CreateRule(ctx, storedRule, func(ctx context.Context, id valuer.UUID) error { id, err := m.ruleStore.CreateRule(ctx, storedRule, func(ctx context.Context, id valuer.UUID) error {
cfg, err := m.alertmanager.GetConfig(ctx, claims.OrgID)
if err != nil {
return err
}
var preferredChannels []string
if len(parsedRule.PreferredChannels) == 0 {
channels, err := m.alertmanager.ListChannels(ctx, claims.OrgID)
if err != nil {
return err
}
for _, channel := range channels {
preferredChannels = append(preferredChannels, channel.Name)
}
} else {
preferredChannels = parsedRule.PreferredChannels
}
if parsedRule.NotificationSettings != nil { if parsedRule.NotificationSettings != nil {
config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig() config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig()
err = m.alertmanager.SetNotificationConfig(ctx, orgID, storedRule.ID.StringValue(), &config) err = m.alertmanager.SetNotificationConfig(ctx, orgID, id.StringValue(), &config)
if err != nil { if err != nil {
return err return err
} }
} if !parsedRule.NotificationSettings.UsePolicy {
request, err := parsedRule.GetRuleRouteRequest(id.StringValue())
err = cfg.CreateRuleIDMatcher(id.StringValue(), preferredChannels) if err != nil {
if err != nil { return err
return err }
} _, err = m.alertmanager.CreateRoutePolicies(ctx, request)
if err != nil {
err = m.alertmanager.SetConfig(ctx, cfg) return err
if err != nil { }
return err inhibitRules, err := parsedRule.GetInhibitRules(id.StringValue())
if err != nil {
return err
}
err = m.alertmanager.CreateInhibitRules(ctx, orgID, inhibitRules)
if err != nil {
return err
}
}
} }
taskName := prepareTaskName(id.StringValue()) taskName := prepareTaskName(id.StringValue())
@ -756,36 +755,30 @@ func (m *Manager) prepareTestNotifyFunc() NotifyFunc {
if len(alerts) == 0 { if len(alerts) == 0 {
return return
} }
ruleID := alerts[0].Labels.Map()[labels.AlertRuleIdLabel]
receiverMap := make(map[*alertmanagertypes.PostableAlert][]string)
for _, alert := range alerts {
generatorURL := alert.GeneratorURL
alert := alerts[0] a := &alertmanagertypes.PostableAlert{}
generatorURL := alert.GeneratorURL a.Annotations = alert.Annotations.Map()
a.StartsAt = strfmt.DateTime(alert.FiredAt)
a := &alertmanagertypes.PostableAlert{} a.Alert = alertmanagertypes.AlertModel{
a.Annotations = alert.Annotations.Map() Labels: alert.Labels.Map(),
a.StartsAt = strfmt.DateTime(alert.FiredAt) GeneratorURL: strfmt.URI(generatorURL),
a.Alert = alertmanagertypes.AlertModel{
Labels: alert.Labels.Map(),
GeneratorURL: strfmt.URI(generatorURL),
}
if !alert.ResolvedAt.IsZero() {
a.EndsAt = strfmt.DateTime(alert.ResolvedAt)
} else {
a.EndsAt = strfmt.DateTime(alert.ValidUntil)
}
if len(alert.Receivers) == 0 {
channels, err := m.alertmanager.ListChannels(ctx, orgID)
if err != nil {
zap.L().Error("failed to list channels while sending test notification", zap.Error(err))
return
} }
if !alert.ResolvedAt.IsZero() {
for _, channel := range channels { a.EndsAt = strfmt.DateTime(alert.ResolvedAt)
alert.Receivers = append(alert.Receivers, channel.Name) } else {
a.EndsAt = strfmt.DateTime(alert.ValidUntil)
} }
receiverMap[a] = alert.Receivers
}
err := m.alertmanager.TestAlert(ctx, orgID, ruleID, receiverMap)
if err != nil {
zap.L().Error("failed to send test notification", zap.Error(err))
return
} }
m.alertmanager.TestAlert(ctx, orgID, a, alert.Receivers)
} }
} }
@ -983,6 +976,17 @@ func (m *Manager) TestNotification(ctx context.Context, orgID valuer.UUID, ruleS
if err != nil { if err != nil {
return 0, model.BadRequest(err) return 0, model.BadRequest(err)
} }
if !parsedRule.NotificationSettings.UsePolicy {
parsedRule.NotificationSettings.GroupBy = append(parsedRule.NotificationSettings.GroupBy, ruletypes.LabelThresholdName)
}
config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig()
err = m.alertmanager.SetNotificationConfig(ctx, orgID, parsedRule.AlertName, &config)
if err != nil {
return 0, &model.ApiError{
Typ: model.ErrorBadData,
Err: err,
}
}
alertCount, apiErr := m.prepareTestRuleFunc(PrepareTestRuleOptions{ alertCount, apiErr := m.prepareTestRuleFunc(PrepareTestRuleOptions{
Rule: &parsedRule, Rule: &parsedRule,

View File

@ -2,10 +2,15 @@ package rules
import ( import (
"context" "context"
"fmt"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification"
"github.com/prometheus/common/model"
"strings"
"testing" "testing"
"time" "time"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfmanagertest"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"go.uber.org/zap" "go.uber.org/zap"
@ -32,19 +37,38 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
Email: "test@example.com", Email: "test@example.com",
Role: "admin", Role: "admin",
} }
manager, mockSQLRuleStore, orgId := setupTestManager(t) manager, mockSQLRuleStore, mockRouteStore, nfmanager, orgId := setupTestManager(t)
claims.OrgID = orgId claims.OrgID = orgId
testCases := []struct { testCases := []struct {
name string name string
originalData string originalData string
patchData string patchData string
Route []*alertmanagertypes.RoutePolicy
Config *alertmanagertypes.NotificationConfig
expectedResult func(*ruletypes.GettableRule) bool expectedResult func(*ruletypes.GettableRule) bool
expectError bool expectError bool
description string description string
}{ }{
{ {
name: "patch complete rule with task sync validation", name: "patch complete rule with task sync validation",
Route: []*alertmanagertypes.RoutePolicy{
{
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"test-alerts"},
Name: "{{.ruleId}}",
Enabled: true,
},
},
Config: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 4 * time.Hour,
NoDataInterval: 4 * time.Hour,
},
UsePolicy: false,
},
originalData: `{ originalData: `{
"schemaVersion":"v1", "schemaVersion":"v1",
"alert": "test-original-alert", "alert": "test-original-alert",
@ -95,6 +119,23 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
}, },
{ {
name: "patch rule to disabled state", name: "patch rule to disabled state",
Route: []*alertmanagertypes.RoutePolicy{
{
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"test-alerts"},
Name: "{{.ruleId}}",
Enabled: true,
},
},
Config: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 4 * time.Hour,
NoDataInterval: 4 * time.Hour,
},
UsePolicy: false,
},
originalData: `{ originalData: `{
"schemaVersion":"v2", "schemaVersion":"v2",
"alert": "test-disable-alert", "alert": "test-disable-alert",
@ -179,6 +220,20 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
OrgID: claims.OrgID, OrgID: claims.OrgID,
} }
// Update route expectations with actual rule ID
routesWithRuleID := make([]*alertmanagertypes.RoutePolicy, len(tc.Route))
for i, route := range tc.Route {
routesWithRuleID[i] = &alertmanagertypes.RoutePolicy{
Expression: strings.Replace(route.Expression, "{{.ruleId}}", ruleID.String(), -1),
ExpressionKind: route.ExpressionKind,
Channels: route.Channels,
Name: strings.Replace(route.Name, "{{.ruleId}}", ruleID.String(), -1),
Enabled: route.Enabled,
}
}
mockRouteStore.ExpectDeleteRouteByName(existingRule.OrgID, ruleID.String())
mockRouteStore.ExpectCreateBatch(routesWithRuleID)
mockSQLRuleStore.ExpectGetStoredRule(ruleID, existingRule) mockSQLRuleStore.ExpectGetStoredRule(ruleID, existingRule)
mockSQLRuleStore.ExpectEditRule(existingRule) mockSQLRuleStore.ExpectEditRule(existingRule)
@ -200,6 +255,12 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
assert.Nil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be removed for disabled rule") assert.Nil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be removed for disabled rule")
} else { } else {
syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second) syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second)
// Verify notification config
config, err := nfmanager.GetNotificationConfig(orgId, result.Id)
assert.NoError(t, err)
assert.Equal(t, tc.Config, config)
assert.True(t, syncCompleted, "Task synchronization should complete within timeout") assert.True(t, syncCompleted, "Task synchronization should complete within timeout")
assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be created/updated for enabled rule") assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be created/updated for enabled rule")
assert.Greater(t, len(manager.Rules()), 0, "Rules should be updated in manager") assert.Greater(t, len(manager.Rules()), 0, "Rules should be updated in manager")
@ -234,7 +295,7 @@ func findTaskByName(tasks []Task, taskName string) Task {
return nil return nil
} }
func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore, string) { func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore, *nfroutingstoretest.MockSQLRouteStore, nfmanager.NotificationManager, string) {
settings := instrumentationtest.New().ToProviderSettings() settings := instrumentationtest.New().ToProviderSettings()
testDB := utils.NewQueryServiceDBForTests(t) testDB := utils.NewQueryServiceDBForTests(t)
@ -266,7 +327,11 @@ func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore,
t.Fatalf("Failed to create noop sharder: %v", err) t.Fatalf("Failed to create noop sharder: %v", err)
} }
orgGetter := implorganization.NewGetter(implorganization.NewStore(testDB), noopSharder) orgGetter := implorganization.NewGetter(implorganization.NewStore(testDB), noopSharder)
notificationManager := nfmanagertest.NewMock() routeStore := nfroutingstoretest.NewMockSQLRouteStore()
notificationManager, err := rulebasednotification.New(t.Context(), settings, nfmanager.Config{}, routeStore)
if err != nil {
t.Fatalf("Failed to create alert manager: %v", err)
}
alertManager, err := signozalertmanager.New(context.TODO(), settings, alertmanager.Config{Provider: "signoz", Signoz: alertmanager.Signoz{PollInterval: 10 * time.Second, Config: alertmanagerserver.NewConfig()}}, testDB, orgGetter, notificationManager) alertManager, err := signozalertmanager.New(context.TODO(), settings, alertmanager.Config{Provider: "signoz", Signoz: alertmanager.Signoz{PollInterval: 10 * time.Second, Config: alertmanagerserver.NewConfig()}}, testDB, orgGetter, notificationManager)
if err != nil { if err != nil {
t.Fatalf("Failed to create alert manager: %v", err) t.Fatalf("Failed to create alert manager: %v", err)
@ -290,21 +355,40 @@ func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore,
} }
close(manager.block) close(manager.block)
return manager, mockSQLRuleStore, testOrgID.StringValue() return manager, mockSQLRuleStore, routeStore, notificationManager, testOrgID.StringValue()
} }
func TestCreateRule(t *testing.T) { func TestCreateRule(t *testing.T) {
claims := &authtypes.Claims{ claims := &authtypes.Claims{
Email: "test@example.com", Email: "test@example.com",
} }
manager, mockSQLRuleStore, orgId := setupTestManager(t) manager, mockSQLRuleStore, mockRouteStore, nfmanager, orgId := setupTestManager(t)
claims.OrgID = orgId claims.OrgID = orgId
testCases := []struct { testCases := []struct {
name string name string
Route []*alertmanagertypes.RoutePolicy
Config *alertmanagertypes.NotificationConfig
ruleStr string ruleStr string
}{ }{
{ {
name: "validate stored rule data structure", name: "validate stored rule data structure",
Route: []*alertmanagertypes.RoutePolicy{
{
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"test-alerts"},
Name: "{{.ruleId}}",
Enabled: true,
},
},
Config: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 4 * time.Hour,
NoDataInterval: 4 * time.Hour,
},
UsePolicy: false,
},
ruleStr: `{ ruleStr: `{
"alert": "cpu usage", "alert": "cpu usage",
"ruleType": "threshold_rule", "ruleType": "threshold_rule",
@ -341,6 +425,30 @@ func TestCreateRule(t *testing.T) {
}, },
{ {
name: "create complete v2 rule with thresholds", name: "create complete v2 rule with thresholds",
Route: []*alertmanagertypes.RoutePolicy{
{
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"critical\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"test-alerts"},
Name: "{{.ruleId}}",
Enabled: true,
},
{
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"test-alerts"},
Name: "{{.ruleId}}",
Enabled: true,
},
},
Config: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("k8s.node.name"): {}, model.LabelName("ruleId"): {}},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 10 * time.Minute,
NoDataInterval: 4 * time.Hour,
},
UsePolicy: false,
},
ruleStr: `{ ruleStr: `{
"schemaVersion":"v2", "schemaVersion":"v2",
"state": "firing", "state": "firing",
@ -399,6 +507,18 @@ func TestCreateRule(t *testing.T) {
"frequency": "1m" "frequency": "1m"
} }
}, },
"notificationSettings": {
"GroupBy": [
"k8s.node.name"
],
"renotify": {
"interval": "10m",
"enabled": true,
"alertStates": [
"firing"
]
}
},
"labels": { "labels": {
"severity": "warning" "severity": "warning"
}, },
@ -429,6 +549,20 @@ func TestCreateRule(t *testing.T) {
}, },
OrgID: claims.OrgID, OrgID: claims.OrgID,
} }
// Update route expectations with actual rule ID
routesWithRuleID := make([]*alertmanagertypes.RoutePolicy, len(tc.Route))
for i, route := range tc.Route {
routesWithRuleID[i] = &alertmanagertypes.RoutePolicy{
Expression: strings.Replace(route.Expression, "{{.ruleId}}", rule.ID.String(), -1),
ExpressionKind: route.ExpressionKind,
Channels: route.Channels,
Name: strings.Replace(route.Name, "{{.ruleId}}", rule.ID.String(), -1),
Enabled: route.Enabled,
}
}
mockRouteStore.ExpectCreateBatch(routesWithRuleID)
mockSQLRuleStore.ExpectCreateRule(rule) mockSQLRuleStore.ExpectCreateRule(rule)
ctx := authtypes.NewContextWithClaims(context.Background(), *claims) ctx := authtypes.NewContextWithClaims(context.Background(), *claims)
@ -441,6 +575,12 @@ func TestCreateRule(t *testing.T) {
// Wait for task creation with proper synchronization // Wait for task creation with proper synchronization
taskName := prepareTaskName(result.Id) taskName := prepareTaskName(result.Id)
syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second) syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second)
// Verify notification config
config, err := nfmanager.GetNotificationConfig(orgId, result.Id)
assert.NoError(t, err)
assert.Equal(t, tc.Config, config)
assert.True(t, syncCompleted, "Task creation should complete within timeout") assert.True(t, syncCompleted, "Task creation should complete within timeout")
assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be created with correct name") assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be created with correct name")
assert.Greater(t, len(manager.Rules()), 0, "Rules should be added to manager") assert.Greater(t, len(manager.Rules()), 0, "Rules should be added to manager")
@ -455,14 +595,35 @@ func TestEditRule(t *testing.T) {
claims := &authtypes.Claims{ claims := &authtypes.Claims{
Email: "test@example.com", Email: "test@example.com",
} }
manager, mockSQLRuleStore, orgId := setupTestManager(t) manager, mockSQLRuleStore, mockRouteStore, nfmanager, orgId := setupTestManager(t)
claims.OrgID = orgId claims.OrgID = orgId
testCases := []struct { testCases := []struct {
ruleID string
name string name string
Route []*alertmanagertypes.RoutePolicy
Config *alertmanagertypes.NotificationConfig
ruleStr string ruleStr string
}{ }{
{ {
name: "validate edit rule functionality", ruleID: "12345678-1234-1234-1234-123456789012",
name: "validate edit rule functionality",
Route: []*alertmanagertypes.RoutePolicy{
{
Expression: fmt.Sprintf("ruleId == \"rule1\" && threshold.name == \"critical\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"critical-alerts"},
Name: "12345678-1234-1234-1234-123456789012",
Enabled: true,
},
},
Config: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 4 * time.Hour,
NoDataInterval: 4 * time.Hour,
},
UsePolicy: false,
},
ruleStr: `{ ruleStr: `{
"alert": "updated cpu usage", "alert": "updated cpu usage",
"ruleType": "threshold_rule", "ruleType": "threshold_rule",
@ -498,7 +659,32 @@ func TestEditRule(t *testing.T) {
}`, }`,
}, },
{ {
name: "edit complete v2 rule with thresholds", ruleID: "12345678-1234-1234-1234-123456789013",
name: "edit complete v2 rule with thresholds",
Route: []*alertmanagertypes.RoutePolicy{
{
Expression: fmt.Sprintf("ruleId == \"rule2\" && threshold.name == \"critical\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"test-alerts"},
Name: "12345678-1234-1234-1234-123456789013",
Enabled: true,
},
{
Expression: fmt.Sprintf("ruleId == \"rule2\" && threshold.name == \"warning\""),
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: []string{"test-alerts"},
Name: "12345678-1234-1234-1234-123456789013",
Enabled: true,
},
},
Config: &alertmanagertypes.NotificationConfig{
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}, model.LabelName("k8s.node.name"): {}},
Renotify: alertmanagertypes.ReNotificationConfig{
RenotifyInterval: 10 * time.Minute,
NoDataInterval: 4 * time.Hour,
},
UsePolicy: false,
},
ruleStr: `{ ruleStr: `{
"schemaVersion":"v2", "schemaVersion":"v2",
"state": "firing", "state": "firing",
@ -560,6 +746,18 @@ func TestEditRule(t *testing.T) {
"labels": { "labels": {
"severity": "critical" "severity": "critical"
}, },
"notificationSettings": {
"GroupBy": [
"k8s.node.name"
],
"renotify": {
"interval": "10m",
"enabled": true,
"alertStates": [
"firing"
]
}
},
"annotations": { "annotations": {
"description": "This alert is fired when memory usage crosses the threshold", "description": "This alert is fired when memory usage crosses the threshold",
"summary": "Memory usage threshold exceeded" "summary": "Memory usage threshold exceeded"
@ -573,11 +771,13 @@ func TestEditRule(t *testing.T) {
for _, tc := range testCases { for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) { t.Run(tc.name, func(t *testing.T) {
ruleID := valuer.GenerateUUID() ruleId, err := valuer.NewUUID(tc.ruleID)
if err != nil {
t.Errorf("error creating ruleId: %s", err)
}
existingRule := &ruletypes.Rule{ existingRule := &ruletypes.Rule{
Identifiable: types.Identifiable{ Identifiable: types.Identifiable{
ID: ruleID, ID: ruleId,
}, },
TimeAuditable: types.TimeAuditable{ TimeAuditable: types.TimeAuditable{
CreatedAt: time.Now(), CreatedAt: time.Now(),
@ -590,18 +790,24 @@ func TestEditRule(t *testing.T) {
Data: `{"alert": "original cpu usage", "disabled": false}`, Data: `{"alert": "original cpu usage", "disabled": false}`,
OrgID: claims.OrgID, OrgID: claims.OrgID,
} }
mockRouteStore.ExpectDeleteRouteByName(existingRule.OrgID, ruleId.String())
mockSQLRuleStore.ExpectGetStoredRule(ruleID, existingRule) mockRouteStore.ExpectCreateBatch(tc.Route)
mockSQLRuleStore.ExpectGetStoredRule(ruleId, existingRule)
mockSQLRuleStore.ExpectEditRule(existingRule) mockSQLRuleStore.ExpectEditRule(existingRule)
ctx := authtypes.NewContextWithClaims(context.Background(), *claims) ctx := authtypes.NewContextWithClaims(context.Background(), *claims)
err := manager.EditRule(ctx, tc.ruleStr, ruleID) err = manager.EditRule(ctx, tc.ruleStr, ruleId)
assert.NoError(t, err) assert.NoError(t, err)
// Wait for task update with proper synchronization // Wait for task update with proper synchronization
taskName := prepareTaskName(ruleID.StringValue())
taskName := prepareTaskName(ruleId.String())
syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second) syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second)
config, err := nfmanager.GetNotificationConfig(orgId, ruleId.String())
assert.NoError(t, err)
assert.Equal(t, tc.Config, config)
assert.True(t, syncCompleted, "Task update should complete within timeout") assert.True(t, syncCompleted, "Task update should complete within timeout")
assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be updated with correct name") assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be updated with correct name")
assert.Greater(t, len(manager.Rules()), 0, "Rules should be updated in manager") assert.Greater(t, len(manager.Rules()), 0, "Rules should be updated in manager")

View File

@ -147,13 +147,19 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
var alerts = make(map[uint64]*ruletypes.Alert, len(res)) var alerts = make(map[uint64]*ruletypes.Alert, len(res))
ruleReceivers := r.Threshold.GetRuleReceivers()
ruleReceiverMap := make(map[string][]string)
for _, value := range ruleReceivers {
ruleReceiverMap[value.Name] = value.Channels
}
for _, series := range res { for _, series := range res {
if len(series.Floats) == 0 { if len(series.Floats) == 0 {
continue continue
} }
results, err := r.Threshold.ShouldAlert(toCommonSeries(series)) results, err := r.Threshold.ShouldAlert(toCommonSeries(series), r.Unit())
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -165,7 +171,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
} }
r.logger.DebugContext(ctx, "alerting for series", "rule_name", r.Name(), "series", series) r.logger.DebugContext(ctx, "alerting for series", "rule_name", r.Name(), "series", series)
threshold := valueFormatter.Format(r.targetVal(), r.Unit()) threshold := valueFormatter.Format(result.Target, result.TargetUnit)
tmplData := ruletypes.AlertTemplateData(l, valueFormatter.Format(result.V, r.Unit()), threshold) tmplData := ruletypes.AlertTemplateData(l, valueFormatter.Format(result.V, r.Unit()), threshold)
// Inject some convenience variables that are easier to remember for users // Inject some convenience variables that are easier to remember for users
@ -218,7 +224,6 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
r.lastError = err r.lastError = err
return nil, err return nil, err
} }
alerts[h] = &ruletypes.Alert{ alerts[h] = &ruletypes.Alert{
Labels: lbs, Labels: lbs,
QueryResultLables: resultLabels, QueryResultLables: resultLabels,
@ -227,13 +232,12 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
State: model.StatePending, State: model.StatePending,
Value: result.V, Value: result.V,
GeneratorURL: r.GeneratorURL(), GeneratorURL: r.GeneratorURL(),
Receivers: r.preferredChannels, Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
} }
} }
} }
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts)) r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
// alerts[h] is ready, add or update active list now // alerts[h] is ready, add or update active list now
for h, a := range alerts { for h, a := range alerts {
// Check whether we already have alerting state for the identifying label set. // Check whether we already have alerting state for the identifying label set.
@ -241,7 +245,9 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive { if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
alert.Value = a.Value alert.Value = a.Value
alert.Annotations = a.Annotations alert.Annotations = a.Annotations
alert.Receivers = r.preferredChannels if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
alert.Receivers = ruleReceiverMap[v]
}
continue continue
} }

View File

@ -696,7 +696,7 @@ func TestPromRuleShouldAlert(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
} }
resultVectors, err := rule.Threshold.ShouldAlert(toCommonSeries(c.values)) resultVectors, err := rule.Threshold.ShouldAlert(toCommonSeries(c.values), rule.Unit())
assert.NoError(t, err) assert.NoError(t, err)
// Compare full result vector with expected vector // Compare full result vector with expected vector

View File

@ -38,7 +38,6 @@ func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError)
if parsedRule.RuleType == ruletypes.RuleTypeThreshold { if parsedRule.RuleType == ruletypes.RuleTypeThreshold {
// add special labels for test alerts // add special labels for test alerts
parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target)
parsedRule.Labels[labels.RuleSourceLabel] = "" parsedRule.Labels[labels.RuleSourceLabel] = ""
parsedRule.Labels[labels.AlertRuleIdLabel] = "" parsedRule.Labels[labels.AlertRuleIdLabel] = ""

View File

@ -488,7 +488,7 @@ func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID,
continue continue
} }
} }
resultSeries, err := r.Threshold.ShouldAlert(*series) resultSeries, err := r.Threshold.ShouldAlert(*series, r.Unit())
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -565,7 +565,7 @@ func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUI
continue continue
} }
} }
resultSeries, err := r.Threshold.ShouldAlert(*series) resultSeries, err := r.Threshold.ShouldAlert(*series, r.Unit())
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -602,6 +602,12 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
resultFPs := map[uint64]struct{}{} resultFPs := map[uint64]struct{}{}
var alerts = make(map[uint64]*ruletypes.Alert, len(res)) var alerts = make(map[uint64]*ruletypes.Alert, len(res))
ruleReceivers := r.Threshold.GetRuleReceivers()
ruleReceiverMap := make(map[string][]string)
for _, value := range ruleReceivers {
ruleReceiverMap[value.Name] = value.Channels
}
for _, smpl := range res { for _, smpl := range res {
l := make(map[string]string, len(smpl.Metric)) l := make(map[string]string, len(smpl.Metric))
for _, lbl := range smpl.Metric { for _, lbl := range smpl.Metric {
@ -610,7 +616,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
value := valueFormatter.Format(smpl.V, r.Unit()) value := valueFormatter.Format(smpl.V, r.Unit())
//todo(aniket): handle different threshold //todo(aniket): handle different threshold
threshold := valueFormatter.Format(r.targetVal(), r.Unit()) threshold := valueFormatter.Format(smpl.Target, smpl.TargetUnit)
r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold) r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold)
tmplData := ruletypes.AlertTemplateData(l, value, threshold) tmplData := ruletypes.AlertTemplateData(l, value, threshold)
@ -690,7 +696,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
State: model.StatePending, State: model.StatePending,
Value: smpl.V, Value: smpl.V,
GeneratorURL: r.GeneratorURL(), GeneratorURL: r.GeneratorURL(),
Receivers: r.preferredChannels, Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
Missing: smpl.IsMissing, Missing: smpl.IsMissing,
} }
} }
@ -705,7 +711,9 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
alert.Value = a.Value alert.Value = a.Value
alert.Annotations = a.Annotations alert.Annotations = a.Annotations
alert.Receivers = r.preferredChannels if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
alert.Receivers = ruleReceiverMap[v]
}
continue continue
} }

View File

@ -824,7 +824,7 @@ func TestThresholdRuleShouldAlert(t *testing.T) {
values.Points[i].Timestamp = time.Now().UnixMilli() values.Points[i].Timestamp = time.Now().UnixMilli()
} }
resultVectors, err := rule.Threshold.ShouldAlert(c.values) resultVectors, err := rule.Threshold.ShouldAlert(c.values, rule.Unit())
assert.NoError(t, err, "Test case %d", idx) assert.NoError(t, err, "Test case %d", idx)
// Compare result vectors with expected behavior // Compare result vectors with expected behavior
@ -1201,7 +1201,7 @@ func TestThresholdRuleLabelNormalization(t *testing.T) {
values.Points[i].Timestamp = time.Now().UnixMilli() values.Points[i].Timestamp = time.Now().UnixMilli()
} }
vector, err := rule.Threshold.ShouldAlert(c.values) vector, err := rule.Threshold.ShouldAlert(c.values, rule.Unit())
assert.NoError(t, err) assert.NoError(t, err)
for name, value := range c.values.Labels { for name, value := range c.values.Labels {
@ -1211,7 +1211,7 @@ func TestThresholdRuleLabelNormalization(t *testing.T) {
} }
// Get result vectors from threshold evaluation // Get result vectors from threshold evaluation
resultVectors, err := rule.Threshold.ShouldAlert(c.values) resultVectors, err := rule.Threshold.ShouldAlert(c.values, rule.Unit())
assert.NoError(t, err, "Test case %d", idx) assert.NoError(t, err, "Test case %d", idx)
// Compare result vectors with expected behavior // Compare result vectors with expected behavior
@ -1501,13 +1501,11 @@ func TestThresholdRuleUnitCombinations(t *testing.T) {
Kind: ruletypes.BasicThresholdKind, Kind: ruletypes.BasicThresholdKind,
Spec: ruletypes.BasicRuleThresholds{ Spec: ruletypes.BasicRuleThresholds{
{ {
Name: postableRule.AlertName, Name: postableRule.AlertName,
TargetValue: &c.target, TargetValue: &c.target,
TargetUnit: c.targetUnit, TargetUnit: c.targetUnit,
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit, MatchType: ruletypes.MatchType(c.matchType),
MatchType: ruletypes.MatchType(c.matchType), CompareOp: ruletypes.CompareOp(c.compareOp),
CompareOp: ruletypes.CompareOp(c.compareOp),
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
}, },
}, },
} }
@ -1612,12 +1610,10 @@ func TestThresholdRuleNoData(t *testing.T) {
Kind: ruletypes.BasicThresholdKind, Kind: ruletypes.BasicThresholdKind,
Spec: ruletypes.BasicRuleThresholds{ Spec: ruletypes.BasicRuleThresholds{
{ {
Name: postableRule.AlertName, Name: postableRule.AlertName,
TargetValue: &target, TargetValue: &target,
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit, MatchType: ruletypes.AtleastOnce,
MatchType: ruletypes.AtleastOnce, CompareOp: ruletypes.ValueIsEq,
CompareOp: ruletypes.ValueIsEq,
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
}, },
}, },
} }
@ -1734,13 +1730,11 @@ func TestThresholdRuleTracesLink(t *testing.T) {
Kind: ruletypes.BasicThresholdKind, Kind: ruletypes.BasicThresholdKind,
Spec: ruletypes.BasicRuleThresholds{ Spec: ruletypes.BasicRuleThresholds{
{ {
Name: postableRule.AlertName, Name: postableRule.AlertName,
TargetValue: &c.target, TargetValue: &c.target,
TargetUnit: c.targetUnit, TargetUnit: c.targetUnit,
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit, MatchType: ruletypes.MatchType(c.matchType),
MatchType: ruletypes.MatchType(c.matchType), CompareOp: ruletypes.CompareOp(c.compareOp),
CompareOp: ruletypes.CompareOp(c.compareOp),
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
}, },
}, },
} }
@ -1873,13 +1867,11 @@ func TestThresholdRuleLogsLink(t *testing.T) {
Kind: ruletypes.BasicThresholdKind, Kind: ruletypes.BasicThresholdKind,
Spec: ruletypes.BasicRuleThresholds{ Spec: ruletypes.BasicRuleThresholds{
{ {
Name: postableRule.AlertName, Name: postableRule.AlertName,
TargetValue: &c.target, TargetValue: &c.target,
TargetUnit: c.targetUnit, TargetUnit: c.targetUnit,
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit, MatchType: ruletypes.MatchType(c.matchType),
MatchType: ruletypes.MatchType(c.matchType), CompareOp: ruletypes.CompareOp(c.compareOp),
CompareOp: ruletypes.CompareOp(c.compareOp),
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
}, },
}, },
} }
@ -2125,22 +2117,18 @@ func TestMultipleThresholdRule(t *testing.T) {
Kind: ruletypes.BasicThresholdKind, Kind: ruletypes.BasicThresholdKind,
Spec: ruletypes.BasicRuleThresholds{ Spec: ruletypes.BasicRuleThresholds{
{ {
Name: "first_threshold", Name: "first_threshold",
TargetValue: &c.target, TargetValue: &c.target,
TargetUnit: c.targetUnit, TargetUnit: c.targetUnit,
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit, MatchType: ruletypes.MatchType(c.matchType),
MatchType: ruletypes.MatchType(c.matchType), CompareOp: ruletypes.CompareOp(c.compareOp),
CompareOp: ruletypes.CompareOp(c.compareOp),
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
}, },
{ {
Name: "second_threshold", Name: "second_threshold",
TargetValue: &c.secondTarget, TargetValue: &c.secondTarget,
TargetUnit: c.targetUnit, TargetUnit: c.targetUnit,
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit, MatchType: ruletypes.MatchType(c.matchType),
MatchType: ruletypes.MatchType(c.matchType), CompareOp: ruletypes.CompareOp(c.compareOp),
CompareOp: ruletypes.CompareOp(c.compareOp),
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
}, },
}, },
} }

View File

@ -38,6 +38,7 @@ import (
"github.com/SigNoz/signoz/pkg/telemetrystore" "github.com/SigNoz/signoz/pkg/telemetrystore"
"github.com/SigNoz/signoz/pkg/telemetrystore/clickhousetelemetrystore" "github.com/SigNoz/signoz/pkg/telemetrystore/clickhousetelemetrystore"
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystorehook" "github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystorehook"
routeTypes "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/SigNoz/signoz/pkg/version" "github.com/SigNoz/signoz/pkg/version"
"github.com/SigNoz/signoz/pkg/web" "github.com/SigNoz/signoz/pkg/web"
"github.com/SigNoz/signoz/pkg/web/noopweb" "github.com/SigNoz/signoz/pkg/web/noopweb"
@ -133,6 +134,7 @@ func NewSQLMigrationProviderFactories(
sqlmigration.NewQueryBuilderV5MigrationFactory(sqlstore, telemetryStore), sqlmigration.NewQueryBuilderV5MigrationFactory(sqlstore, telemetryStore),
sqlmigration.NewAddMeterQuickFiltersFactory(sqlstore, sqlschema), sqlmigration.NewAddMeterQuickFiltersFactory(sqlstore, sqlschema),
sqlmigration.NewUpdateTTLSettingForCustomRetentionFactory(sqlstore, sqlschema), sqlmigration.NewUpdateTTLSettingForCustomRetentionFactory(sqlstore, sqlschema),
sqlmigration.NewAddRoutePolicyFactory(sqlstore, sqlschema),
) )
} }
@ -155,9 +157,9 @@ func NewPrometheusProviderFactories(telemetryStore telemetrystore.TelemetryStore
) )
} }
func NewNotificationManagerProviderFactories() factory.NamedMap[factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config]] { func NewNotificationManagerProviderFactories(routeStore routeTypes.RouteStore) factory.NamedMap[factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config]] {
return factory.MustNewNamedMap( return factory.MustNewNamedMap(
rulebasednotification.NewFactory(), rulebasednotification.NewFactory(routeStore),
) )
} }

View File

@ -4,6 +4,7 @@ import (
"context" "context"
"github.com/SigNoz/signoz/pkg/alertmanager" "github.com/SigNoz/signoz/pkg/alertmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager" "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/sqlroutingstore"
"github.com/SigNoz/signoz/pkg/analytics" "github.com/SigNoz/signoz/pkg/analytics"
"github.com/SigNoz/signoz/pkg/cache" "github.com/SigNoz/signoz/pkg/cache"
"github.com/SigNoz/signoz/pkg/emailing" "github.com/SigNoz/signoz/pkg/emailing"
@ -230,12 +231,14 @@ func New(
// Initialize user getter // Initialize user getter
userGetter := impluser.NewGetter(impluser.NewStore(sqlstore, providerSettings)) userGetter := impluser.NewGetter(impluser.NewStore(sqlstore, providerSettings))
// will need to create factory for all stores
routeStore := sqlroutingstore.NewStore(sqlstore)
// shared NotificationManager instance for both alertmanager and rules // shared NotificationManager instance for both alertmanager and rules
notificationManager, err := factory.NewProviderFromNamedMap( notificationManager, err := factory.NewProviderFromNamedMap(
ctx, ctx,
providerSettings, providerSettings,
nfmanager.Config{}, nfmanager.Config{},
NewNotificationManagerProviderFactories(), NewNotificationManagerProviderFactories(routeStore),
"rulebased", "rulebased",
) )
if err != nil { if err != nil {

View File

@ -0,0 +1,260 @@
package sqlmigration
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/sqlschema"
"github.com/SigNoz/signoz/pkg/sqlstore"
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/uptrace/bun"
"github.com/uptrace/bun/migrate"
"log/slog"
"time"
)
// Shared types for migration
type expressionRoute struct {
bun.BaseModel `bun:"table:route_policy"`
types.Identifiable
types.TimeAuditable
types.UserAuditable
Expression string `bun:"expression,type:text"`
ExpressionKind string `bun:"kind,type:text"`
Channels []string `bun:"channels,type:text"`
Name string `bun:"name,type:text"`
Description string `bun:"description,type:text"`
Enabled bool `bun:"enabled,type:boolean,default:true"`
Tags []string `bun:"tags,type:text"`
OrgID string `bun:"org_id,type:text"`
}
type rule struct {
bun.BaseModel `bun:"table:rule"`
types.Identifiable
types.TimeAuditable
types.UserAuditable
Deleted int `bun:"deleted,default:0"`
Data string `bun:"data,type:text"`
OrgID string `bun:"org_id,type:text"`
}
type addRoutePolicies struct {
sqlstore sqlstore.SQLStore
sqlschema sqlschema.SQLSchema
logger *slog.Logger
}
func NewAddRoutePolicyFactory(sqlstore sqlstore.SQLStore, sqlschema sqlschema.SQLSchema) factory.ProviderFactory[SQLMigration, Config] {
return factory.NewProviderFactory(factory.MustNewName("add_route_policy"), func(ctx context.Context, providerSettings factory.ProviderSettings, config Config) (SQLMigration, error) {
return newAddRoutePolicy(ctx, providerSettings, config, sqlstore, sqlschema)
})
}
func newAddRoutePolicy(_ context.Context, settings factory.ProviderSettings, _ Config, sqlstore sqlstore.SQLStore, sqlschema sqlschema.SQLSchema) (SQLMigration, error) {
return &addRoutePolicies{
sqlstore: sqlstore,
sqlschema: sqlschema,
logger: settings.Logger,
}, nil
}
func (migration *addRoutePolicies) Register(migrations *migrate.Migrations) error {
if err := migrations.Register(migration.Up, migration.Down); err != nil {
return err
}
return nil
}
func (migration *addRoutePolicies) Up(ctx context.Context, db *bun.DB) error {
_, _, err := migration.sqlschema.GetTable(ctx, sqlschema.TableName("route_policy"))
if err == nil {
return nil
}
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return err
}
defer func() {
_ = tx.Rollback()
}()
sqls := [][]byte{}
// Create the route_policy table
table := &sqlschema.Table{
Name: "route_policy",
Columns: []*sqlschema.Column{
{Name: "id", DataType: sqlschema.DataTypeText, Nullable: false},
{Name: "created_at", DataType: sqlschema.DataTypeTimestamp, Nullable: false},
{Name: "updated_at", DataType: sqlschema.DataTypeTimestamp, Nullable: false},
{Name: "created_by", DataType: sqlschema.DataTypeText, Nullable: false},
{Name: "updated_by", DataType: sqlschema.DataTypeText, Nullable: false},
{Name: "expression", DataType: sqlschema.DataTypeText, Nullable: false},
{Name: "kind", DataType: sqlschema.DataTypeText, Nullable: false},
{Name: "channels", DataType: sqlschema.DataTypeText, Nullable: false},
{Name: "name", DataType: sqlschema.DataTypeText, Nullable: false},
{Name: "description", DataType: sqlschema.DataTypeText, Nullable: true},
{Name: "enabled", DataType: sqlschema.DataTypeBoolean, Nullable: false, Default: "true"},
{Name: "tags", DataType: sqlschema.DataTypeText, Nullable: true},
{Name: "org_id", DataType: sqlschema.DataTypeText, Nullable: false},
},
PrimaryKeyConstraint: &sqlschema.PrimaryKeyConstraint{
ColumnNames: []sqlschema.ColumnName{"id"},
},
ForeignKeyConstraints: []*sqlschema.ForeignKeyConstraint{
{
ReferencingColumnName: "org_id",
ReferencedTableName: "organizations",
ReferencedColumnName: "id",
},
},
}
tableSQLs := migration.sqlschema.Operator().CreateTable(table)
sqls = append(sqls, tableSQLs...)
for _, sqlStmt := range sqls {
if _, err := tx.ExecContext(ctx, string(sqlStmt)); err != nil {
return err
}
}
err = migration.migrateRulesToRoutePolicies(ctx, tx)
if err != nil {
return err
}
if err := tx.Commit(); err != nil {
return err
}
return nil
}
func (migration *addRoutePolicies) migrateRulesToRoutePolicies(ctx context.Context, tx bun.Tx) error {
var rules []*rule
err := tx.NewSelect().
Model(&rules).
Where("deleted = ?", 0).
Scan(ctx)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil // No rules to migrate
}
return errors.NewInternalf(errors.CodeInternal, "failed to fetch rules")
}
channelsByOrg, err := migration.getAllChannels(ctx, tx)
if err != nil {
return errors.NewInternalf(errors.CodeInternal, "fetching channels error: %v", err)
}
var routesToInsert []*expressionRoute
routesToInsert, err = migration.convertRulesToRoutes(rules, channelsByOrg)
if err != nil {
return errors.NewInternalf(errors.CodeInternal, "converting rules to routes error: %v", err)
}
// Insert all routes in a single batch operation
if len(routesToInsert) > 0 {
_, err = tx.NewInsert().
Model(&routesToInsert).
Exec(ctx)
if err != nil {
return errors.NewInternalf(errors.CodeInternal, "failed to insert notification routes")
}
}
return nil
}
func (migration *addRoutePolicies) convertRulesToRoutes(rules []*rule, channelsByOrg map[string][]string) ([]*expressionRoute, error) {
var routes []*expressionRoute
for _, r := range rules {
var gettableRule ruletypes.GettableRule
if err := json.Unmarshal([]byte(r.Data), &gettableRule); err != nil {
return nil, errors.NewInternalf(errors.CodeInternal, "failed to unmarshal rule data for rule ID %s: %v", r.ID, err)
}
if len(gettableRule.PreferredChannels) == 0 {
channels, exists := channelsByOrg[r.OrgID]
if !exists || len(channels) == 0 {
continue
}
gettableRule.PreferredChannels = channels
}
severity := "critical"
if v, ok := gettableRule.Labels["severity"]; ok {
severity = v
}
expression := fmt.Sprintf(`%s == "%s" && %s == "%s"`, "threshold.name", severity, "ruleId", r.ID.String())
route := &expressionRoute{
Identifiable: types.Identifiable{
ID: valuer.GenerateUUID(),
},
TimeAuditable: types.TimeAuditable{
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
},
UserAuditable: types.UserAuditable{
CreatedBy: r.CreatedBy,
UpdatedBy: r.UpdatedBy,
},
Expression: expression,
ExpressionKind: "rule",
Channels: gettableRule.PreferredChannels,
Name: r.ID.StringValue(),
Enabled: true,
OrgID: r.OrgID,
}
routes = append(routes, route)
}
return routes, nil
}
func (migration *addRoutePolicies) getAllChannels(ctx context.Context, tx bun.Tx) (map[string][]string, error) {
type channel struct {
bun.BaseModel `bun:"table:notification_channel"`
types.Identifiable
types.TimeAuditable
Name string `json:"name" bun:"name"`
Type string `json:"type" bun:"type"`
Data string `json:"data" bun:"data"`
OrgID string `json:"org_id" bun:"org_id"`
}
var channels []*channel
err := tx.NewSelect().
Model(&channels).
Scan(ctx)
if err != nil {
return nil, errors.NewInternalf(errors.CodeInternal, "failed to fetch all channels")
}
// Group channels by org ID
channelsByOrg := make(map[string][]string)
for _, ch := range channels {
channelsByOrg[ch.OrgID] = append(channelsByOrg[ch.OrgID], ch.Name)
}
return channelsByOrg, nil
}
func (migration *addRoutePolicies) Down(ctx context.Context, db *bun.DB) error {
return nil
}

View File

@ -27,6 +27,8 @@ type (
// An alias for the Alert type from the alertmanager package. // An alias for the Alert type from the alertmanager package.
Alert = types.Alert Alert = types.Alert
AlertSlice = types.AlertSlice
PostableAlert = models.PostableAlert PostableAlert = models.PostableAlert
PostableAlerts = models.PostableAlerts PostableAlerts = models.PostableAlerts
@ -38,6 +40,10 @@ type (
GettableAlerts = models.GettableAlerts GettableAlerts = models.GettableAlerts
) )
const (
NoDataLabel = model.LabelName("nodata")
)
type DeprecatedGettableAlert struct { type DeprecatedGettableAlert struct {
*model.Alert *model.Alert
Status types.AlertStatus `json:"status"` Status types.AlertStatus `json:"status"`
@ -307,3 +313,11 @@ func receiversMatchFilter(receivers []string, filter *regexp.Regexp) bool {
return false return false
} }
func NoDataAlert(alert *types.Alert) bool {
if _, ok := alert.Labels[NoDataLabel]; ok {
return true
} else {
return false
}
}

View File

@ -21,6 +21,7 @@ import (
const ( const (
DefaultReceiverName string = "default-receiver" DefaultReceiverName string = "default-receiver"
DefaultGroupBy string = "ruleId" DefaultGroupBy string = "ruleId"
DefaultGroupByAll string = "__all__"
) )
var ( var (
@ -193,6 +194,20 @@ func (c *Config) SetRouteConfig(routeConfig RouteConfig) error {
return nil return nil
} }
func (c *Config) AddInhibitRules(rules []config.InhibitRule) error {
if c.alertmanagerConfig == nil {
return errors.New(errors.TypeInvalidInput, ErrCodeAlertmanagerConfigInvalid, "config is nil")
}
c.alertmanagerConfig.InhibitRules = append(c.alertmanagerConfig.InhibitRules, rules...)
c.storeableConfig.Config = string(newRawFromConfig(c.alertmanagerConfig))
c.storeableConfig.Hash = fmt.Sprintf("%x", newConfigHash(c.storeableConfig.Config))
c.storeableConfig.UpdatedAt = time.Now()
return nil
}
func (c *Config) AlertmanagerConfig() *config.Config { func (c *Config) AlertmanagerConfig() *config.Config {
return c.alertmanagerConfig return c.alertmanagerConfig
} }
@ -304,6 +319,27 @@ func (c *Config) CreateRuleIDMatcher(ruleID string, receiverNames []string) erro
return nil return nil
} }
func (c *Config) DeleteRuleIDInhibitor(ruleID string) error {
if c.alertmanagerConfig.InhibitRules == nil {
return nil // already nil
}
var filteredRules []config.InhibitRule
for _, inhibitor := range c.alertmanagerConfig.InhibitRules {
sourceContainsRuleID := matcherContainsRuleID(inhibitor.SourceMatchers, ruleID)
targetContainsRuleID := matcherContainsRuleID(inhibitor.TargetMatchers, ruleID)
if !sourceContainsRuleID && !targetContainsRuleID {
filteredRules = append(filteredRules, inhibitor)
}
}
c.alertmanagerConfig.InhibitRules = filteredRules
c.storeableConfig.Config = string(newRawFromConfig(c.alertmanagerConfig))
c.storeableConfig.Hash = fmt.Sprintf("%x", newConfigHash(c.storeableConfig.Config))
c.storeableConfig.UpdatedAt = time.Now()
return nil
}
func (c *Config) UpdateRuleIDMatcher(ruleID string, receiverNames []string) error { func (c *Config) UpdateRuleIDMatcher(ruleID string, receiverNames []string) error {
err := c.DeleteRuleIDMatcher(ruleID) err := c.DeleteRuleIDMatcher(ruleID)
if err != nil { if err != nil {
@ -405,6 +441,8 @@ func init() {
type NotificationConfig struct { type NotificationConfig struct {
NotificationGroup map[model.LabelName]struct{} NotificationGroup map[model.LabelName]struct{}
Renotify ReNotificationConfig Renotify ReNotificationConfig
UsePolicy bool
GroupByAll bool
} }
func (nc *NotificationConfig) DeepCopy() NotificationConfig { func (nc *NotificationConfig) DeepCopy() NotificationConfig {
@ -415,6 +453,7 @@ func (nc *NotificationConfig) DeepCopy() NotificationConfig {
for k, v := range nc.NotificationGroup { for k, v := range nc.NotificationGroup {
deepCopy.NotificationGroup[k] = v deepCopy.NotificationGroup[k] = v
} }
deepCopy.UsePolicy = nc.UsePolicy
return deepCopy return deepCopy
} }
@ -423,7 +462,7 @@ type ReNotificationConfig struct {
RenotifyInterval time.Duration RenotifyInterval time.Duration
} }
func NewNotificationConfig(groups []string, renotifyInterval time.Duration, noDataRenotifyInterval time.Duration) NotificationConfig { func NewNotificationConfig(groups []string, renotifyInterval time.Duration, noDataRenotifyInterval time.Duration, policy bool) NotificationConfig {
notificationConfig := GetDefaultNotificationConfig() notificationConfig := GetDefaultNotificationConfig()
if renotifyInterval != 0 { if renotifyInterval != 0 {
@ -435,8 +474,13 @@ func NewNotificationConfig(groups []string, renotifyInterval time.Duration, noDa
} }
for _, group := range groups { for _, group := range groups {
notificationConfig.NotificationGroup[model.LabelName(group)] = struct{}{} notificationConfig.NotificationGroup[model.LabelName(group)] = struct{}{}
if group == DefaultGroupByAll {
notificationConfig.GroupByAll = true
}
} }
notificationConfig.UsePolicy = policy
return notificationConfig return notificationConfig
} }

View File

@ -0,0 +1,139 @@
package alertmanagertypes
import (
"context"
"github.com/expr-lang/expr"
"time"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/uptrace/bun"
)
type PostableRoutePolicy struct {
Expression string `json:"expression"`
ExpressionKind ExpressionKind `json:"kind"`
Channels []string `json:"channels"`
Name string `json:"name"`
Description string `json:"description"`
Tags []string `json:"tags,omitempty"`
}
func (p *PostableRoutePolicy) Validate() error {
if p.Expression == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "expression is required")
}
if p.Name == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "name is required")
}
if len(p.Channels) == 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "at least one channel is required")
}
// Validate channels are not empty
for i, channel := range p.Channels {
if channel == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "channel at index %d cannot be empty", i)
}
}
if p.ExpressionKind != PolicyBasedExpression && p.ExpressionKind != RuleBasedExpression {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported expression kind: %s", p.ExpressionKind.StringValue())
}
_, err := expr.Compile(p.Expression)
if err != nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid expression syntax: %v", err)
}
return nil
}
type GettableRoutePolicy struct {
PostableRoutePolicy // Embedded
ID string `json:"id"`
// Audit fields
CreatedAt *time.Time `json:"createdAt"`
UpdatedAt *time.Time `json:"updatedAt"`
CreatedBy *string `json:"createdBy"`
UpdatedBy *string `json:"updatedBy"`
}
type ExpressionKind struct {
valuer.String
}
var (
RuleBasedExpression = ExpressionKind{valuer.NewString("rule")}
PolicyBasedExpression = ExpressionKind{valuer.NewString("policy")}
)
// RoutePolicy represents the database model for expression routes
type RoutePolicy struct {
bun.BaseModel `bun:"table:route_policy"`
types.Identifiable
types.TimeAuditable
types.UserAuditable
Expression string `bun:"expression,type:text,notnull" json:"expression"`
ExpressionKind ExpressionKind `bun:"kind,type:text" json:"kind"`
Channels []string `bun:"channels,type:jsonb" json:"channels"`
Name string `bun:"name,type:text" json:"name"`
Description string `bun:"description,type:text" json:"description"`
Enabled bool `bun:"enabled,type:boolean,default:true" json:"enabled"`
Tags []string `bun:"tags,type:jsonb" json:"tags,omitempty"`
OrgID string `bun:"org_id,type:text,notnull" json:"orgId"`
}
func (er *RoutePolicy) Validate() error {
if er == nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route_policy cannot be nil")
}
if er.Expression == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "expression is required")
}
if er.Name == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "name is required")
}
if er.OrgID == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "organization ID is required")
}
if len(er.Channels) == 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "at least one channel is required")
}
// Validate channels are not empty
for i, channel := range er.Channels {
if channel == "" {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "channel at index %d cannot be empty", i)
}
}
if er.ExpressionKind != PolicyBasedExpression && er.ExpressionKind != RuleBasedExpression {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported expression kind: %s", er.ExpressionKind.StringValue())
}
return nil
}
type RouteStore interface {
GetByID(ctx context.Context, orgId string, id string) (*RoutePolicy, error)
Create(ctx context.Context, route *RoutePolicy) error
CreateBatch(ctx context.Context, routes []*RoutePolicy) error
Delete(ctx context.Context, orgId string, id string) error
GetAllByKind(ctx context.Context, orgID string, kind ExpressionKind) ([]*RoutePolicy, error)
GetAllByName(ctx context.Context, orgID string, name string) ([]*RoutePolicy, error)
DeleteRouteByName(ctx context.Context, orgID string, name string) error
}

View File

@ -4,6 +4,7 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"github.com/prometheus/common/model"
"log/slog" "log/slog"
"time" "time"
@ -49,9 +50,9 @@ func NewReceiver(input string) (Receiver, error) {
return receiverWithDefaults, nil return receiverWithDefaults, nil
} }
func TestReceiver(ctx context.Context, receiver Receiver, receiverIntegrationsFunc ReceiverIntegrationsFunc, config *Config, tmpl *template.Template, logger *slog.Logger, alert *Alert) error { func TestReceiver(ctx context.Context, receiver Receiver, receiverIntegrationsFunc ReceiverIntegrationsFunc, config *Config, tmpl *template.Template, logger *slog.Logger, lSet model.LabelSet, alert ...*Alert) error {
ctx = notify.WithGroupKey(ctx, fmt.Sprintf("%s-%s-%d", receiver.Name, alert.Labels.Fingerprint(), time.Now().Unix())) ctx = notify.WithGroupKey(ctx, fmt.Sprintf("%s-%s-%d", receiver.Name, lSet.Fingerprint(), time.Now().Unix()))
ctx = notify.WithGroupLabels(ctx, alert.Labels) ctx = notify.WithGroupLabels(ctx, lSet)
ctx = notify.WithReceiverName(ctx, receiver.Name) ctx = notify.WithReceiverName(ctx, receiver.Name)
// We need to create a new config with the same global and route config but empty receivers and routes // We need to create a new config with the same global and route config but empty receivers and routes
@ -80,7 +81,7 @@ func TestReceiver(ctx context.Context, receiver Receiver, receiverIntegrationsFu
return errors.Newf(errors.TypeNotFound, errors.CodeNotFound, "no integrations found for receiver %s", receiver.Name) return errors.Newf(errors.TypeNotFound, errors.CodeNotFound, "no integrations found for receiver %s", receiver.Name)
} }
if _, err = integrations[0].Notify(ctx, alert); err != nil { if _, err = integrations[0].Notify(ctx, alert...); err != nil {
return err return err
} }

View File

@ -15,6 +15,8 @@ import (
"github.com/SigNoz/signoz/pkg/query-service/utils/times" "github.com/SigNoz/signoz/pkg/query-service/utils/times"
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp" "github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes" "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/prometheus/alertmanager/config"
) )
type AlertType string type AlertType string
@ -65,21 +67,95 @@ type PostableRule struct {
} }
type NotificationSettings struct { type NotificationSettings struct {
NotificationGroupBy []string `json:"notificationGroupBy,omitempty"` GroupBy []string `json:"groupBy,omitempty"`
ReNotifyInterval Duration `json:"renotify,omitempty"` Renotify Renotify `json:"renotify,omitempty"`
AlertStates []model.AlertState `json:"alertStates,omitempty"` UsePolicy bool `json:"usePolicy,omitempty"`
}
type Renotify struct {
Enabled bool `json:"enabled"`
ReNotifyInterval Duration `json:"interval,omitempty"`
AlertStates []model.AlertState `json:"alertStates,omitempty"`
} }
func (ns *NotificationSettings) GetAlertManagerNotificationConfig() alertmanagertypes.NotificationConfig { func (ns *NotificationSettings) GetAlertManagerNotificationConfig() alertmanagertypes.NotificationConfig {
var renotifyInterval Duration var renotifyInterval time.Duration
var noDataRenotifyInterval Duration var noDataRenotifyInterval time.Duration
if slices.Contains(ns.AlertStates, model.StateNoData) { if ns.Renotify.Enabled {
noDataRenotifyInterval = ns.ReNotifyInterval if slices.Contains(ns.Renotify.AlertStates, model.StateNoData) {
noDataRenotifyInterval = time.Duration(ns.Renotify.ReNotifyInterval)
}
if slices.Contains(ns.Renotify.AlertStates, model.StateFiring) {
renotifyInterval = time.Duration(ns.Renotify.ReNotifyInterval)
}
} else {
renotifyInterval = 8760 * time.Hour //1 year for no renotify substitute
noDataRenotifyInterval = 8760 * time.Hour
} }
if slices.Contains(ns.AlertStates, model.StateFiring) { return alertmanagertypes.NewNotificationConfig(ns.GroupBy, renotifyInterval, noDataRenotifyInterval, ns.UsePolicy)
renotifyInterval = ns.ReNotifyInterval }
func (r *PostableRule) GetRuleRouteRequest(ruleId string) ([]*alertmanagertypes.PostableRoutePolicy, error) {
threshold, err := r.RuleCondition.Thresholds.GetRuleThreshold()
if err != nil {
return nil, err
} }
return alertmanagertypes.NewNotificationConfig(ns.NotificationGroupBy, time.Duration(renotifyInterval), time.Duration(noDataRenotifyInterval)) receivers := threshold.GetRuleReceivers()
routeRequests := make([]*alertmanagertypes.PostableRoutePolicy, 0)
for _, receiver := range receivers {
expression := fmt.Sprintf(`%s == "%s" && %s == "%s"`, LabelThresholdName, receiver.Name, LabelRuleId, ruleId)
routeRequests = append(routeRequests, &alertmanagertypes.PostableRoutePolicy{
Expression: expression,
ExpressionKind: alertmanagertypes.RuleBasedExpression,
Channels: receiver.Channels,
Name: ruleId,
Description: fmt.Sprintf("Auto-generated route for rule %s", ruleId),
Tags: []string{"auto-generated", "rule-based"},
})
}
return routeRequests, nil
}
func (r *PostableRule) GetInhibitRules(ruleId string) ([]config.InhibitRule, error) {
threshold, err := r.RuleCondition.Thresholds.GetRuleThreshold()
if err != nil {
return nil, err
}
var groups []string
if r.NotificationSettings != nil {
for k := range r.NotificationSettings.GetAlertManagerNotificationConfig().NotificationGroup {
groups = append(groups, string(k))
}
}
receivers := threshold.GetRuleReceivers()
var inhibitRules []config.InhibitRule
for i := 0; i < len(receivers)-1; i++ {
rule := config.InhibitRule{
SourceMatchers: config.Matchers{
{
Name: LabelThresholdName,
Value: receivers[i].Name,
},
{
Name: LabelRuleId,
Value: ruleId,
},
},
TargetMatchers: config.Matchers{
{
Name: LabelThresholdName,
Value: receivers[i+1].Name,
},
{
Name: LabelRuleId,
Value: ruleId,
},
},
Equal: groups,
}
inhibitRules = append(inhibitRules, rule)
}
return inhibitRules, nil
} }
func (ns *NotificationSettings) UnmarshalJSON(data []byte) error { func (ns *NotificationSettings) UnmarshalJSON(data []byte) error {
@ -95,7 +171,7 @@ func (ns *NotificationSettings) UnmarshalJSON(data []byte) error {
} }
// Validate states after unmarshaling // Validate states after unmarshaling
for _, state := range ns.AlertStates { for _, state := range ns.Renotify.AlertStates {
if state != model.StateFiring && state != model.StateNoData { if state != model.StateFiring && state != model.StateNoData {
return fmt.Errorf("invalid alert state: %s", state) return fmt.Errorf("invalid alert state: %s", state)
} }
@ -143,15 +219,25 @@ func (r *PostableRule) processRuleDefaults() error {
Kind: BasicThresholdKind, Kind: BasicThresholdKind,
Spec: BasicRuleThresholds{{ Spec: BasicRuleThresholds{{
Name: thresholdName, Name: thresholdName,
RuleUnit: r.RuleCondition.CompositeQuery.Unit,
TargetUnit: r.RuleCondition.TargetUnit, TargetUnit: r.RuleCondition.TargetUnit,
TargetValue: r.RuleCondition.Target, TargetValue: r.RuleCondition.Target,
MatchType: r.RuleCondition.MatchType, MatchType: r.RuleCondition.MatchType,
CompareOp: r.RuleCondition.CompareOp, CompareOp: r.RuleCondition.CompareOp,
Channels: r.PreferredChannels,
}}, }},
} }
r.RuleCondition.Thresholds = &thresholdData r.RuleCondition.Thresholds = &thresholdData
r.Evaluation = &EvaluationEnvelope{RollingEvaluation, RollingWindow{EvalWindow: r.EvalWindow, Frequency: r.Frequency}} r.Evaluation = &EvaluationEnvelope{RollingEvaluation, RollingWindow{EvalWindow: r.EvalWindow, Frequency: r.Frequency}}
r.NotificationSettings = &NotificationSettings{
Renotify: Renotify{
Enabled: true,
ReNotifyInterval: Duration(4 * time.Hour),
AlertStates: []model.AlertState{model.StateFiring},
},
}
if r.RuleCondition.AlertOnAbsent {
r.NotificationSettings.Renotify.AlertStates = append(r.NotificationSettings.Renotify.AlertStates, model.StateNoData)
}
} }
} }
@ -170,6 +256,7 @@ func (r *PostableRule) MarshalJSON() ([]byte, error) {
} }
aux.Evaluation = nil aux.Evaluation = nil
aux.SchemaVersion = "" aux.SchemaVersion = ""
aux.NotificationSettings = nil
return json.Marshal(aux) return json.Marshal(aux)
default: default:
copyStruct := *r copyStruct := *r
@ -192,7 +279,7 @@ func isValidLabelName(ln string) bool {
return false return false
} }
for i, b := range ln { for i, b := range ln {
if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || (b >= '0' && b <= '9' && i > 0)) { if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == '.' || (b >= '0' && b <= '9' && i > 0)) {
return false return false
} }
} }
@ -347,6 +434,7 @@ func (g *GettableRule) MarshalJSON() ([]byte, error) {
} }
aux.Evaluation = nil aux.Evaluation = nil
aux.SchemaVersion = "" aux.SchemaVersion = ""
aux.NotificationSettings = nil
return json.Marshal(aux) return json.Marshal(aux)
default: default:
copyStruct := *g copyStruct := *g

View File

@ -2,10 +2,11 @@ package ruletypes
import ( import (
"encoding/json" "encoding/json"
"github.com/stretchr/testify/assert"
"testing" "testing"
"time" "time"
"github.com/stretchr/testify/assert"
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3" v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
) )
@ -303,10 +304,6 @@ func TestParseIntoRuleSchemaVersioning(t *testing.T) {
t.Errorf("Expected threshold name 'warning' from severity label, got '%s'", spec.Name) t.Errorf("Expected threshold name 'warning' from severity label, got '%s'", spec.Name)
} }
// Verify all fields are copied from RuleCondition
if spec.RuleUnit != "percent" {
t.Errorf("Expected RuleUnit 'percent', got '%s'", spec.RuleUnit)
}
if spec.TargetUnit != "%" { if spec.TargetUnit != "%" {
t.Errorf("Expected TargetUnit '%%', got '%s'", spec.TargetUnit) t.Errorf("Expected TargetUnit '%%', got '%s'", spec.TargetUnit)
} }
@ -455,9 +452,6 @@ func TestParseIntoRuleSchemaVersioning(t *testing.T) {
if spec.TargetUnit != "%" { if spec.TargetUnit != "%" {
t.Errorf("Expected TargetUnit '%%' (overwritten), got '%s'", spec.TargetUnit) t.Errorf("Expected TargetUnit '%%' (overwritten), got '%s'", spec.TargetUnit)
} }
if spec.RuleUnit != "percent" {
t.Errorf("Expected RuleUnit 'percent' (overwritten), got '%s'", spec.RuleUnit)
}
if rule.Evaluation == nil { if rule.Evaluation == nil {
t.Fatal("Expected Evaluation to be populated") t.Fatal("Expected Evaluation to be populated")
@ -630,9 +624,9 @@ func TestParseIntoRuleThresholdGeneration(t *testing.T) {
vector, err := threshold.ShouldAlert(v3.Series{ vector, err := threshold.ShouldAlert(v3.Series{
Points: []v3.Point{{Value: 0.15, Timestamp: 1000}}, // 150ms in seconds Points: []v3.Point{{Value: 0.15, Timestamp: 1000}}, // 150ms in seconds
Labels: map[string]string{"test": "label"}, Labels: map[string]string{"test": "label"},
}) }, "")
if err != nil { if err != nil {
t.Fatalf("Unexpected error in ShouldAlert: %v", err) t.Fatalf("Unexpected error in shouldAlert: %v", err)
} }
if len(vector) == 0 { if len(vector) == 0 {
@ -707,9 +701,9 @@ func TestParseIntoRuleMultipleThresholds(t *testing.T) {
vector, err := threshold.ShouldAlert(v3.Series{ vector, err := threshold.ShouldAlert(v3.Series{
Points: []v3.Point{{Value: 95.0, Timestamp: 1000}}, // 95% CPU usage Points: []v3.Point{{Value: 95.0, Timestamp: 1000}}, // 95% CPU usage
Labels: map[string]string{"service": "test"}, Labels: map[string]string{"service": "test"},
}) }, "")
if err != nil { if err != nil {
t.Fatalf("Unexpected error in ShouldAlert: %v", err) t.Fatalf("Unexpected error in shouldAlert: %v", err)
} }
assert.Equal(t, 2, len(vector)) assert.Equal(t, 2, len(vector))
@ -717,9 +711,9 @@ func TestParseIntoRuleMultipleThresholds(t *testing.T) {
vector, err = threshold.ShouldAlert(v3.Series{ vector, err = threshold.ShouldAlert(v3.Series{
Points: []v3.Point{{Value: 75.0, Timestamp: 1000}}, // 75% CPU usage Points: []v3.Point{{Value: 75.0, Timestamp: 1000}}, // 75% CPU usage
Labels: map[string]string{"service": "test"}, Labels: map[string]string{"service": "test"},
}) }, "")
if err != nil { if err != nil {
t.Fatalf("Unexpected error in ShouldAlert: %v", err) t.Fatalf("Unexpected error in shouldAlert: %v", err)
} }
assert.Equal(t, 1, len(vector)) assert.Equal(t, 1, len(vector))

View File

@ -2,3 +2,4 @@ package ruletypes
const CriticalThresholdName = "CRITICAL" const CriticalThresholdName = "CRITICAL"
const LabelThresholdName = "threshold.name" const LabelThresholdName = "threshold.name"
const LabelRuleId = "ruleId"

View File

@ -18,6 +18,10 @@ type Sample struct {
Metric labels.Labels Metric labels.Labels
IsMissing bool IsMissing bool
Target float64
TargetUnit string
} }
func (s Sample) String() string { func (s Sample) String() string {

View File

@ -51,23 +51,41 @@ func (r *RuleThresholdData) UnmarshalJSON(data []byte) error {
return nil return nil
} }
type RuleReceivers struct {
Channels []string `json:"channels"`
Name string `json:"name"`
}
type RuleThreshold interface { type RuleThreshold interface {
ShouldAlert(series v3.Series) (Vector, error) ShouldAlert(series v3.Series, unit string) (Vector, error)
GetRuleReceivers() []RuleReceivers
} }
type BasicRuleThreshold struct { type BasicRuleThreshold struct {
Name string `json:"name"` Name string `json:"name"`
TargetValue *float64 `json:"target"` TargetValue *float64 `json:"target"`
TargetUnit string `json:"targetUnit"` TargetUnit string `json:"targetUnit"`
RuleUnit string `json:"ruleUnit"`
RecoveryTarget *float64 `json:"recoveryTarget"` RecoveryTarget *float64 `json:"recoveryTarget"`
MatchType MatchType `json:"matchType"` MatchType MatchType `json:"matchType"`
CompareOp CompareOp `json:"op"` CompareOp CompareOp `json:"op"`
SelectedQuery string `json:"selectedQuery"` Channels []string `json:"channels"`
} }
type BasicRuleThresholds []BasicRuleThreshold type BasicRuleThresholds []BasicRuleThreshold
func (r BasicRuleThresholds) GetRuleReceivers() []RuleReceivers {
thresholds := []BasicRuleThreshold(r)
var receiverRoutes []RuleReceivers
sortThresholds(thresholds)
for _, threshold := range thresholds {
receiverRoutes = append(receiverRoutes, RuleReceivers{
Name: threshold.Name,
Channels: threshold.Channels,
})
}
return receiverRoutes
}
func (r BasicRuleThresholds) Validate() error { func (r BasicRuleThresholds) Validate() error {
var errs []error var errs []error
for _, basicThreshold := range r { for _, basicThreshold := range r {
@ -78,13 +96,27 @@ func (r BasicRuleThresholds) Validate() error {
return errors.Join(errs...) return errors.Join(errs...)
} }
func (r BasicRuleThresholds) ShouldAlert(series v3.Series) (Vector, error) { func (r BasicRuleThresholds) ShouldAlert(series v3.Series, unit string) (Vector, error) {
var resultVector Vector var resultVector Vector
thresholds := []BasicRuleThreshold(r) thresholds := []BasicRuleThreshold(r)
sortThresholds(thresholds)
for _, threshold := range thresholds {
smpl, shouldAlert := threshold.shouldAlert(series, unit)
if shouldAlert {
smpl.Target = threshold.target(unit)
smpl.TargetUnit = threshold.TargetUnit
resultVector = append(resultVector, smpl)
}
}
return resultVector, nil
}
func sortThresholds(thresholds []BasicRuleThreshold) {
sort.Slice(thresholds, func(i, j int) bool { sort.Slice(thresholds, func(i, j int) bool {
compareOp := thresholds[i].GetCompareOp()
targetI := thresholds[i].Target() compareOp := thresholds[i].getCompareOp()
targetJ := thresholds[j].Target() targetI := thresholds[i].target(thresholds[i].TargetUnit) //for sorting we dont need rule unit
targetJ := thresholds[j].target(thresholds[j].TargetUnit)
switch compareOp { switch compareOp {
case ValueIsAbove, ValueAboveOrEq, ValueOutsideBounds: case ValueIsAbove, ValueAboveOrEq, ValueOutsideBounds:
@ -98,49 +130,22 @@ func (r BasicRuleThresholds) ShouldAlert(series v3.Series) (Vector, error) {
return targetI > targetJ return targetI > targetJ
} }
}) })
for _, threshold := range thresholds {
smpl, shouldAlert := threshold.ShouldAlert(series)
if shouldAlert {
resultVector = append(resultVector, smpl)
}
}
return resultVector, nil
} }
func (b BasicRuleThreshold) GetName() string { func (b BasicRuleThreshold) target(ruleUnit string) float64 {
return b.Name
}
func (b BasicRuleThreshold) Target() float64 {
unitConverter := converter.FromUnit(converter.Unit(b.TargetUnit)) unitConverter := converter.FromUnit(converter.Unit(b.TargetUnit))
// convert the target value to the y-axis unit // convert the target value to the y-axis unit
value := unitConverter.Convert(converter.Value{ value := unitConverter.Convert(converter.Value{
F: *b.TargetValue, F: *b.TargetValue,
U: converter.Unit(b.TargetUnit), U: converter.Unit(b.TargetUnit),
}, converter.Unit(b.RuleUnit)) }, converter.Unit(ruleUnit))
return value.F return value.F
} }
func (b BasicRuleThreshold) GetRecoveryTarget() float64 { func (b BasicRuleThreshold) getCompareOp() CompareOp {
if b.RecoveryTarget == nil {
return 0
} else {
return *b.RecoveryTarget
}
}
func (b BasicRuleThreshold) GetMatchType() MatchType {
return b.MatchType
}
func (b BasicRuleThreshold) GetCompareOp() CompareOp {
return b.CompareOp return b.CompareOp
} }
func (b BasicRuleThreshold) GetSelectedQuery() string {
return b.SelectedQuery
}
func (b BasicRuleThreshold) Validate() error { func (b BasicRuleThreshold) Validate() error {
var errs []error var errs []error
if b.Name == "" { if b.Name == "" {
@ -182,7 +187,7 @@ func removeGroupinSetPoints(series v3.Series) []v3.Point {
return result return result
} }
func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) { func (b BasicRuleThreshold) shouldAlert(series v3.Series, ruleUnit string) (Sample, bool) {
var shouldAlert bool var shouldAlert bool
var alertSmpl Sample var alertSmpl Sample
var lbls labels.Labels var lbls labels.Labels
@ -191,6 +196,8 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
lbls = append(lbls, labels.Label{Name: name, Value: value}) lbls = append(lbls, labels.Label{Name: name, Value: value})
} }
target := b.target(ruleUnit)
lbls = append(lbls, labels.Label{Name: LabelThresholdName, Value: b.Name}) lbls = append(lbls, labels.Label{Name: LabelThresholdName, Value: b.Name})
series.Points = removeGroupinSetPoints(series) series.Points = removeGroupinSetPoints(series)
@ -205,7 +212,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
// If any sample matches the condition, the rule is firing. // If any sample matches the condition, the rule is firing.
if b.CompareOp == ValueIsAbove { if b.CompareOp == ValueIsAbove {
for _, smpl := range series.Points { for _, smpl := range series.Points {
if smpl.Value > b.Target() { if smpl.Value > target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls} alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true shouldAlert = true
break break
@ -213,7 +220,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
} }
} else if b.CompareOp == ValueIsBelow { } else if b.CompareOp == ValueIsBelow {
for _, smpl := range series.Points { for _, smpl := range series.Points {
if smpl.Value < b.Target() { if smpl.Value < target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls} alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true shouldAlert = true
break break
@ -221,7 +228,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
} }
} else if b.CompareOp == ValueIsEq { } else if b.CompareOp == ValueIsEq {
for _, smpl := range series.Points { for _, smpl := range series.Points {
if smpl.Value == b.Target() { if smpl.Value == target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls} alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true shouldAlert = true
break break
@ -229,7 +236,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
} }
} else if b.CompareOp == ValueIsNotEq { } else if b.CompareOp == ValueIsNotEq {
for _, smpl := range series.Points { for _, smpl := range series.Points {
if smpl.Value != b.Target() { if smpl.Value != target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls} alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true shouldAlert = true
break break
@ -237,7 +244,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
} }
} else if b.CompareOp == ValueOutsideBounds { } else if b.CompareOp == ValueOutsideBounds {
for _, smpl := range series.Points { for _, smpl := range series.Points {
if math.Abs(smpl.Value) >= b.Target() { if math.Abs(smpl.Value) >= target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls} alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = true shouldAlert = true
break break
@ -247,10 +254,10 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
case AllTheTimes: case AllTheTimes:
// If all samples match the condition, the rule is firing. // If all samples match the condition, the rule is firing.
shouldAlert = true shouldAlert = true
alertSmpl = Sample{Point: Point{V: b.Target()}, Metric: lbls} alertSmpl = Sample{Point: Point{V: target}, Metric: lbls}
if b.CompareOp == ValueIsAbove { if b.CompareOp == ValueIsAbove {
for _, smpl := range series.Points { for _, smpl := range series.Points {
if smpl.Value <= b.Target() { if smpl.Value <= target {
shouldAlert = false shouldAlert = false
break break
} }
@ -267,7 +274,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
} }
} else if b.CompareOp == ValueIsBelow { } else if b.CompareOp == ValueIsBelow {
for _, smpl := range series.Points { for _, smpl := range series.Points {
if smpl.Value >= b.Target() { if smpl.Value >= target {
shouldAlert = false shouldAlert = false
break break
} }
@ -283,14 +290,14 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
} }
} else if b.CompareOp == ValueIsEq { } else if b.CompareOp == ValueIsEq {
for _, smpl := range series.Points { for _, smpl := range series.Points {
if smpl.Value != b.Target() { if smpl.Value != target {
shouldAlert = false shouldAlert = false
break break
} }
} }
} else if b.CompareOp == ValueIsNotEq { } else if b.CompareOp == ValueIsNotEq {
for _, smpl := range series.Points { for _, smpl := range series.Points {
if smpl.Value == b.Target() { if smpl.Value == target {
shouldAlert = false shouldAlert = false
break break
} }
@ -306,7 +313,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
} }
} else if b.CompareOp == ValueOutsideBounds { } else if b.CompareOp == ValueOutsideBounds {
for _, smpl := range series.Points { for _, smpl := range series.Points {
if math.Abs(smpl.Value) < b.Target() { if math.Abs(smpl.Value) < target {
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls} alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
shouldAlert = false shouldAlert = false
break break
@ -326,23 +333,23 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
avg := sum / count avg := sum / count
alertSmpl = Sample{Point: Point{V: avg}, Metric: lbls} alertSmpl = Sample{Point: Point{V: avg}, Metric: lbls}
if b.CompareOp == ValueIsAbove { if b.CompareOp == ValueIsAbove {
if avg > b.Target() { if avg > target {
shouldAlert = true shouldAlert = true
} }
} else if b.CompareOp == ValueIsBelow { } else if b.CompareOp == ValueIsBelow {
if avg < b.Target() { if avg < target {
shouldAlert = true shouldAlert = true
} }
} else if b.CompareOp == ValueIsEq { } else if b.CompareOp == ValueIsEq {
if avg == b.Target() { if avg == target {
shouldAlert = true shouldAlert = true
} }
} else if b.CompareOp == ValueIsNotEq { } else if b.CompareOp == ValueIsNotEq {
if avg != b.Target() { if avg != target {
shouldAlert = true shouldAlert = true
} }
} else if b.CompareOp == ValueOutsideBounds { } else if b.CompareOp == ValueOutsideBounds {
if math.Abs(avg) >= b.Target() { if math.Abs(avg) >= target {
shouldAlert = true shouldAlert = true
} }
} }
@ -358,23 +365,23 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
} }
alertSmpl = Sample{Point: Point{V: sum}, Metric: lbls} alertSmpl = Sample{Point: Point{V: sum}, Metric: lbls}
if b.CompareOp == ValueIsAbove { if b.CompareOp == ValueIsAbove {
if sum > b.Target() { if sum > target {
shouldAlert = true shouldAlert = true
} }
} else if b.CompareOp == ValueIsBelow { } else if b.CompareOp == ValueIsBelow {
if sum < b.Target() { if sum < target {
shouldAlert = true shouldAlert = true
} }
} else if b.CompareOp == ValueIsEq { } else if b.CompareOp == ValueIsEq {
if sum == b.Target() { if sum == target {
shouldAlert = true shouldAlert = true
} }
} else if b.CompareOp == ValueIsNotEq { } else if b.CompareOp == ValueIsNotEq {
if sum != b.Target() { if sum != target {
shouldAlert = true shouldAlert = true
} }
} else if b.CompareOp == ValueOutsideBounds { } else if b.CompareOp == ValueOutsideBounds {
if math.Abs(sum) >= b.Target() { if math.Abs(sum) >= target {
shouldAlert = true shouldAlert = true
} }
} }
@ -383,19 +390,19 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
shouldAlert = false shouldAlert = false
alertSmpl = Sample{Point: Point{V: series.Points[len(series.Points)-1].Value}, Metric: lbls} alertSmpl = Sample{Point: Point{V: series.Points[len(series.Points)-1].Value}, Metric: lbls}
if b.CompareOp == ValueIsAbove { if b.CompareOp == ValueIsAbove {
if series.Points[len(series.Points)-1].Value > b.Target() { if series.Points[len(series.Points)-1].Value > target {
shouldAlert = true shouldAlert = true
} }
} else if b.CompareOp == ValueIsBelow { } else if b.CompareOp == ValueIsBelow {
if series.Points[len(series.Points)-1].Value < b.Target() { if series.Points[len(series.Points)-1].Value < target {
shouldAlert = true shouldAlert = true
} }
} else if b.CompareOp == ValueIsEq { } else if b.CompareOp == ValueIsEq {
if series.Points[len(series.Points)-1].Value == b.Target() { if series.Points[len(series.Points)-1].Value == target {
shouldAlert = true shouldAlert = true
} }
} else if b.CompareOp == ValueIsNotEq { } else if b.CompareOp == ValueIsNotEq {
if series.Points[len(series.Points)-1].Value != b.Target() { if series.Points[len(series.Points)-1].Value != target {
shouldAlert = true shouldAlert = true
} }
} }