mirror of
https://github.com/SigNoz/signoz.git
synced 2025-12-17 07:26:20 +00:00
chore: notification routing | added notificaiton routing via expression based routes (#9195)
* chore: added custom distpatcher * feat(notification-grouping): added notification grouping * feat(notification-grouping): addded integration test dependency * feat(notification-grouping): linting and test cases * feat(notification-grouping): linting and test cases * feat(notification-grouping): linting and test cases * feat(notification-grouping): addded integration test dependency * feat(notification-grouping): debug log lines * feat(notification-grouping): debug log lines * feat(notification-grouping): debug log lines * feat(notification-grouping): addded integration test dependency * feat(notification-grouping): addded integration test dependency * feat(notification-grouping): addded integration test dependency * feat(notification-grouping): added structure changes * feat(notification-grouping): added structure changes * feat(notification-routing): added notification routing * chore(notification-grouping): added notificaiton grouping * Update pkg/alertmanager/nfmanager/rulebasednotification/provider.go Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * chore(notification-grouping): added renotification interval * fix(notification-grouping): added fix for renotification * chore(notificaiton-grouping): added no data renotify * chore(notificaiton-grouping): added no data renotify * chore(notificaiton-grouping): added no data renotify * chore(notification-grouping): added no data renotify interval * chore(notification-grouping): removed errors package from dispatcher * chore(notification-grouping): removed errors package from dispatcher * chore(notification-grouping): removed unwanted tests * chore(notification-grouping): removed unwanted pkg name * chore(notification-grouping): added delete notification setting * chore(notification-grouping): added delete notification setting * Update pkg/alertmanager/nfmanager/nfmanagertest/provider.go Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * chore(notification-grouping): removed nfmanager config| notification settings in postable rule * chore(notification-grouping): removed nfmanager config| notification settings in postable rule * chore(notification-grouping): added test for dispatcher * chore(notification-grouping): added test for dispatcher * chore(notification-grouping): go linting errors * chore(notification-grouping): added test cases for aggGroupPerRoute * chore(notification-grouping): added test cases for aggGroupPerRoute * chore(notification-grouping): corrected get notification config logic * Update pkg/alertmanager/nfmanager/rulebasednotification/provider_test.go Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * chore(notification-routing): added notification routing policies * feat(notification-routing): added test cases for dispatcher * chore(notification-routing): added notification routing policies * chore(notification-routing): added notification routing policies * Apply suggestions from code review Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * chore(notification-routing): added notification routing policies * chore(notification-routing): added notification routing policies * Update pkg/alertmanager/alertmanagerserver/distpatcher_test.go Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * chore(notification-routing): sorted imports * chore(notification-routing): minor edit |pr resolve comments * chore(notification-grouping): corrected dispatcher test cases * chore(notification-routing): added notification routing policies * chore(notification-routing): corrected race condition in test * chore: resolved pr comments * chore: passing threshold value to tempalte * chore: completed delete rule functionality * chore: added grouping disabled functionality * chore: added grouping disabled functionality * chore(notification-routing): resolved pr comments * chore(notification-routing): resolved pr comments * chore(notification-routing): resolved pr comments * chore(notification-routing): sorted imports * chore(notification-routing): fix linting errors * chore(notification-routing): removed enabled flags * fix: test rule multiple threhsold (#9224) * chore: corrected linting errors * chore: corrected linting errors * chore: corrected linting errors * chore: corrected linting errors * chore: corrected migration errors * chore: corrected migration errors * chore: corrected migration errors * chore: corrected migration errors * Update pkg/sqlmigration/049_add_route_policy.go Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> * chore: added org_is as foreign key * chore: resolved pr comments * chore: removed route store unused --------- Co-authored-by: Srikanth Chekuri <srikanth.chekuri92@gmail.com> Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>
This commit is contained in:
parent
d3be2632b6
commit
f9a70a3a69
@ -251,7 +251,7 @@ func (r *AnomalyRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID, t
|
||||
continue
|
||||
}
|
||||
}
|
||||
results, err := r.Threshold.ShouldAlert(*series)
|
||||
results, err := r.Threshold.ShouldAlert(*series, r.Unit())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -301,7 +301,7 @@ func (r *AnomalyRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID,
|
||||
continue
|
||||
}
|
||||
}
|
||||
results, err := r.Threshold.ShouldAlert(*series)
|
||||
results, err := r.Threshold.ShouldAlert(*series, r.Unit())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -336,14 +336,19 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro
|
||||
resultFPs := map[uint64]struct{}{}
|
||||
var alerts = make(map[uint64]*ruletypes.Alert, len(res))
|
||||
|
||||
ruleReceivers := r.Threshold.GetRuleReceivers()
|
||||
ruleReceiverMap := make(map[string][]string)
|
||||
for _, value := range ruleReceivers {
|
||||
ruleReceiverMap[value.Name] = value.Channels
|
||||
}
|
||||
|
||||
for _, smpl := range res {
|
||||
l := make(map[string]string, len(smpl.Metric))
|
||||
for _, lbl := range smpl.Metric {
|
||||
l[lbl.Name] = lbl.Value
|
||||
}
|
||||
|
||||
value := valueFormatter.Format(smpl.V, r.Unit())
|
||||
threshold := valueFormatter.Format(r.TargetVal(), r.Unit())
|
||||
threshold := valueFormatter.Format(smpl.Target, smpl.TargetUnit)
|
||||
r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold)
|
||||
|
||||
tmplData := ruletypes.AlertTemplateData(l, value, threshold)
|
||||
@ -408,13 +413,12 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro
|
||||
State: model.StatePending,
|
||||
Value: smpl.V,
|
||||
GeneratorURL: r.GeneratorURL(),
|
||||
Receivers: r.PreferredChannels(),
|
||||
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
|
||||
Missing: smpl.IsMissing,
|
||||
}
|
||||
}
|
||||
|
||||
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
|
||||
|
||||
// alerts[h] is ready, add or update active list now
|
||||
for h, a := range alerts {
|
||||
// Check whether we already have alerting state for the identifying label set.
|
||||
@ -423,7 +427,9 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro
|
||||
|
||||
alert.Value = a.Value
|
||||
alert.Annotations = a.Annotations
|
||||
alert.Receivers = r.PreferredChannels()
|
||||
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
|
||||
alert.Receivers = ruleReceiverMap[v]
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@ -126,7 +126,6 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, *basemodel.Ap
|
||||
if parsedRule.RuleType == ruletypes.RuleTypeThreshold {
|
||||
|
||||
// add special labels for test alerts
|
||||
parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target)
|
||||
parsedRule.Labels[labels.RuleSourceLabel] = ""
|
||||
parsedRule.Labels[labels.AlertRuleIdLabel] = ""
|
||||
|
||||
|
||||
2
go.mod
2
go.mod
@ -127,7 +127,7 @@ require (
|
||||
github.com/elastic/lunes v0.1.0 // indirect
|
||||
github.com/emirpasic/gods v1.18.1 // indirect
|
||||
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
|
||||
github.com/expr-lang/expr v1.17.5 // indirect
|
||||
github.com/expr-lang/expr v1.17.5
|
||||
github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/fsnotify/fsnotify v1.9.0 // indirect
|
||||
|
||||
@ -3,6 +3,8 @@ package alertmanager
|
||||
import (
|
||||
"context"
|
||||
|
||||
amConfig "github.com/prometheus/alertmanager/config"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/factory"
|
||||
"github.com/SigNoz/signoz/pkg/statsreporter"
|
||||
@ -26,7 +28,7 @@ type Alertmanager interface {
|
||||
TestReceiver(context.Context, string, alertmanagertypes.Receiver) error
|
||||
|
||||
// TestAlert sends an alert to a list of receivers.
|
||||
TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error
|
||||
TestAlert(ctx context.Context, orgID string, ruleID string, receiversMap map[*alertmanagertypes.PostableAlert][]string) error
|
||||
|
||||
// ListChannels lists all channels for the organization.
|
||||
ListChannels(context.Context, string) ([]*alertmanagertypes.Channel, error)
|
||||
@ -59,6 +61,19 @@ type Alertmanager interface {
|
||||
|
||||
DeleteNotificationConfig(ctx context.Context, orgID valuer.UUID, ruleId string) error
|
||||
|
||||
// Notification Policy CRUD
|
||||
CreateRoutePolicy(ctx context.Context, route *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error)
|
||||
CreateRoutePolicies(ctx context.Context, routeRequests []*alertmanagertypes.PostableRoutePolicy) ([]*alertmanagertypes.GettableRoutePolicy, error)
|
||||
GetRoutePolicyByID(ctx context.Context, routeID string) (*alertmanagertypes.GettableRoutePolicy, error)
|
||||
GetAllRoutePolicies(ctx context.Context) ([]*alertmanagertypes.GettableRoutePolicy, error)
|
||||
UpdateRoutePolicyByID(ctx context.Context, routeID string, route *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error)
|
||||
DeleteRoutePolicyByID(ctx context.Context, routeID string) error
|
||||
DeleteAllRoutePoliciesByRuleId(ctx context.Context, ruleId string) error
|
||||
UpdateAllRoutePoliciesByRuleId(ctx context.Context, ruleId string, routes []*alertmanagertypes.PostableRoutePolicy) error
|
||||
|
||||
CreateInhibitRules(ctx context.Context, orgID valuer.UUID, rules []amConfig.InhibitRule) error
|
||||
DeleteAllInhibitRulesByRuleId(ctx context.Context, orgID valuer.UUID, ruleId string) error
|
||||
|
||||
// Collects stats for the organization.
|
||||
statsreporter.StatsCollector
|
||||
}
|
||||
|
||||
@ -10,19 +10,17 @@ import (
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
|
||||
"github.com/prometheus/alertmanager/dispatch"
|
||||
"github.com/prometheus/alertmanager/notify"
|
||||
"github.com/prometheus/alertmanager/pkg/labels"
|
||||
"github.com/prometheus/alertmanager/provider"
|
||||
"github.com/prometheus/alertmanager/store"
|
||||
"github.com/prometheus/alertmanager/types"
|
||||
"github.com/prometheus/common/model"
|
||||
)
|
||||
|
||||
const (
|
||||
noDataLabel = model.LabelName("nodata")
|
||||
)
|
||||
|
||||
// Dispatcher sorts incoming alerts into aggregation groups and
|
||||
// assigns the correct notifiers to each.
|
||||
type Dispatcher struct {
|
||||
@ -46,6 +44,7 @@ type Dispatcher struct {
|
||||
logger *slog.Logger
|
||||
notificationManager nfmanager.NotificationManager
|
||||
orgID string
|
||||
receiverRoutes map[string]*dispatch.Route
|
||||
}
|
||||
|
||||
// We use the upstream Limits interface from Prometheus
|
||||
@ -90,6 +89,7 @@ func (d *Dispatcher) Run() {
|
||||
|
||||
d.mtx.Lock()
|
||||
d.aggrGroupsPerRoute = map[*dispatch.Route]map[model.Fingerprint]*aggrGroup{}
|
||||
d.receiverRoutes = map[string]*dispatch.Route{}
|
||||
d.aggrGroupsNum = 0
|
||||
d.metrics.aggrGroups.Set(0)
|
||||
d.ctx, d.cancel = context.WithCancel(context.Background())
|
||||
@ -125,8 +125,14 @@ func (d *Dispatcher) run(it provider.AlertIterator) {
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
for _, r := range d.route.Match(alert.Labels) {
|
||||
d.processAlert(alert, r)
|
||||
channels, err := d.notificationManager.Match(d.ctx, d.orgID, getRuleIDFromAlert(alert), alert.Labels)
|
||||
if err != nil {
|
||||
d.logger.ErrorContext(d.ctx, "Error on alert match", "err", err)
|
||||
continue
|
||||
}
|
||||
for _, channel := range channels {
|
||||
route := d.getOrCreateRoute(channel)
|
||||
d.processAlert(alert, route)
|
||||
}
|
||||
d.metrics.processingDuration.Observe(time.Since(now).Seconds())
|
||||
|
||||
@ -266,6 +272,7 @@ type notifyFunc func(context.Context, ...*types.Alert) bool
|
||||
|
||||
// processAlert determines in which aggregation group the alert falls
|
||||
// and inserts it.
|
||||
// no data alert will only have ruleId and no data label
|
||||
func (d *Dispatcher) processAlert(alert *types.Alert, route *dispatch.Route) {
|
||||
ruleId := getRuleIDFromAlert(alert)
|
||||
config, err := d.notificationManager.GetNotificationConfig(d.orgID, ruleId)
|
||||
@ -273,8 +280,14 @@ func (d *Dispatcher) processAlert(alert *types.Alert, route *dispatch.Route) {
|
||||
d.logger.ErrorContext(d.ctx, "error getting alert notification config", "rule_id", ruleId, "error", err)
|
||||
return
|
||||
}
|
||||
renotifyInterval := config.Renotify.RenotifyInterval
|
||||
|
||||
groupLabels := getGroupLabels(alert, config.NotificationGroup)
|
||||
groupLabels := getGroupLabels(alert, config.NotificationGroup, config.GroupByAll)
|
||||
|
||||
if alertmanagertypes.NoDataAlert(alert) {
|
||||
renotifyInterval = config.Renotify.NoDataInterval
|
||||
groupLabels[alertmanagertypes.NoDataLabel] = alert.Labels[alertmanagertypes.NoDataLabel] //to create new group key for no data alerts
|
||||
}
|
||||
|
||||
fp := groupLabels.Fingerprint()
|
||||
|
||||
@ -299,12 +312,6 @@ func (d *Dispatcher) processAlert(alert *types.Alert, route *dispatch.Route) {
|
||||
d.logger.ErrorContext(d.ctx, "Too many aggregation groups, cannot create new group for alert", "groups", d.aggrGroupsNum, "limit", limit, "alert", alert.Name())
|
||||
return
|
||||
}
|
||||
renotifyInterval := config.Renotify.RenotifyInterval
|
||||
|
||||
if noDataAlert(alert) {
|
||||
renotifyInterval = config.Renotify.NoDataInterval
|
||||
groupLabels[noDataLabel] = alert.Labels[noDataLabel]
|
||||
}
|
||||
|
||||
ag = newAggrGroup(d.ctx, groupLabels, route, d.timeout, d.logger, renotifyInterval)
|
||||
|
||||
@ -543,21 +550,35 @@ func deepCopyRouteOpts(opts dispatch.RouteOpts, renotify time.Duration) dispatch
|
||||
return newOpts
|
||||
}
|
||||
|
||||
func getGroupLabels(alert *types.Alert, groups map[model.LabelName]struct{}) model.LabelSet {
|
||||
func getGroupLabels(alert *types.Alert, groups map[model.LabelName]struct{}, groupByAll bool) model.LabelSet {
|
||||
groupLabels := model.LabelSet{}
|
||||
for ln, lv := range alert.Labels {
|
||||
if _, ok := groups[ln]; ok {
|
||||
if _, ok := groups[ln]; ok || groupByAll {
|
||||
groupLabels[ln] = lv
|
||||
}
|
||||
}
|
||||
|
||||
return groupLabels
|
||||
}
|
||||
|
||||
func noDataAlert(alert *types.Alert) bool {
|
||||
if _, ok := alert.Labels[noDataLabel]; ok {
|
||||
return true
|
||||
} else {
|
||||
return false
|
||||
func (d *Dispatcher) getOrCreateRoute(receiver string) *dispatch.Route {
|
||||
d.mtx.Lock()
|
||||
defer d.mtx.Unlock()
|
||||
if route, exists := d.receiverRoutes[receiver]; exists {
|
||||
return route
|
||||
}
|
||||
route := &dispatch.Route{
|
||||
RouteOpts: dispatch.RouteOpts{
|
||||
Receiver: receiver,
|
||||
GroupWait: 30 * time.Second,
|
||||
GroupInterval: 5 * time.Minute,
|
||||
GroupByAll: false,
|
||||
},
|
||||
Matchers: labels.Matchers{{
|
||||
Name: "__receiver__",
|
||||
Value: receiver,
|
||||
Type: labels.MatchEqual,
|
||||
}},
|
||||
}
|
||||
d.receiverRoutes[receiver] = route
|
||||
return route
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -2,6 +2,9 @@ package alertmanagerserver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/prometheus/alertmanager/types"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"sync"
|
||||
@ -321,39 +324,104 @@ func (server *Server) SetConfig(ctx context.Context, alertmanagerConfig *alertma
|
||||
}
|
||||
|
||||
func (server *Server) TestReceiver(ctx context.Context, receiver alertmanagertypes.Receiver) error {
|
||||
return alertmanagertypes.TestReceiver(ctx, receiver, alertmanagernotify.NewReceiverIntegrations, server.alertmanagerConfig, server.tmpl, server.logger, alertmanagertypes.NewTestAlert(receiver, time.Now(), time.Now()))
|
||||
testAlert := alertmanagertypes.NewTestAlert(receiver, time.Now(), time.Now())
|
||||
return alertmanagertypes.TestReceiver(ctx, receiver, alertmanagernotify.NewReceiverIntegrations, server.alertmanagerConfig, server.tmpl, server.logger, testAlert.Labels, testAlert)
|
||||
}
|
||||
|
||||
func (server *Server) TestAlert(ctx context.Context, postableAlert *alertmanagertypes.PostableAlert, receivers []string) error {
|
||||
alerts, err := alertmanagertypes.NewAlertsFromPostableAlerts(alertmanagertypes.PostableAlerts{postableAlert}, time.Duration(server.srvConfig.Global.ResolveTimeout), time.Now())
|
||||
func (server *Server) TestAlert(ctx context.Context, receiversMap map[*alertmanagertypes.PostableAlert][]string, config *alertmanagertypes.NotificationConfig) error {
|
||||
if len(receiversMap) == 0 {
|
||||
return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput,
|
||||
"expected at least 1 alert, got 0")
|
||||
}
|
||||
|
||||
postableAlerts := make(alertmanagertypes.PostableAlerts, 0, len(receiversMap))
|
||||
for alert := range receiversMap {
|
||||
postableAlerts = append(postableAlerts, alert)
|
||||
}
|
||||
|
||||
alerts, err := alertmanagertypes.NewAlertsFromPostableAlerts(
|
||||
postableAlerts,
|
||||
time.Duration(server.srvConfig.Global.ResolveTimeout),
|
||||
time.Now(),
|
||||
)
|
||||
if err != nil {
|
||||
return errors.Join(err...)
|
||||
return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput,
|
||||
"failed to construct alerts from postable alerts: %v", err)
|
||||
}
|
||||
|
||||
if len(alerts) != 1 {
|
||||
return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "expected 1 alert, got %d", len(alerts))
|
||||
type alertGroup struct {
|
||||
groupLabels model.LabelSet
|
||||
alerts []*types.Alert
|
||||
receivers map[string]struct{}
|
||||
}
|
||||
|
||||
ch := make(chan error, len(receivers))
|
||||
for _, receiverName := range receivers {
|
||||
go func(receiverName string) {
|
||||
receiver, err := server.alertmanagerConfig.GetReceiver(receiverName)
|
||||
if err != nil {
|
||||
ch <- err
|
||||
return
|
||||
groupMap := make(map[model.Fingerprint]*alertGroup)
|
||||
|
||||
for i, alert := range alerts {
|
||||
labels := getGroupLabels(alert, config.NotificationGroup, config.GroupByAll)
|
||||
fp := labels.Fingerprint()
|
||||
|
||||
postableAlert := postableAlerts[i]
|
||||
alertReceivers := receiversMap[postableAlert]
|
||||
|
||||
if group, exists := groupMap[fp]; exists {
|
||||
group.alerts = append(group.alerts, alert)
|
||||
for _, r := range alertReceivers {
|
||||
group.receivers[r] = struct{}{}
|
||||
}
|
||||
} else {
|
||||
receiverSet := make(map[string]struct{})
|
||||
for _, r := range alertReceivers {
|
||||
receiverSet[r] = struct{}{}
|
||||
}
|
||||
groupMap[fp] = &alertGroup{
|
||||
groupLabels: labels,
|
||||
alerts: []*types.Alert{alert},
|
||||
receivers: receiverSet,
|
||||
}
|
||||
ch <- alertmanagertypes.TestReceiver(ctx, receiver, alertmanagernotify.NewReceiverIntegrations, server.alertmanagerConfig, server.tmpl, server.logger, alerts[0])
|
||||
}(receiverName)
|
||||
}
|
||||
|
||||
var errs []error
|
||||
for i := 0; i < len(receivers); i++ {
|
||||
if err := <-ch; err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
|
||||
if errs != nil {
|
||||
var mu sync.Mutex
|
||||
var errs []error
|
||||
|
||||
g, gCtx := errgroup.WithContext(ctx)
|
||||
for _, group := range groupMap {
|
||||
for receiverName := range group.receivers {
|
||||
group := group
|
||||
receiverName := receiverName
|
||||
|
||||
g.Go(func() error {
|
||||
receiver, err := server.alertmanagerConfig.GetReceiver(receiverName)
|
||||
if err != nil {
|
||||
mu.Lock()
|
||||
errs = append(errs, fmt.Errorf("failed to get receiver %q: %w", receiverName, err))
|
||||
mu.Unlock()
|
||||
return nil // Return nil to continue processing other goroutines
|
||||
}
|
||||
|
||||
err = alertmanagertypes.TestReceiver(
|
||||
gCtx,
|
||||
receiver,
|
||||
alertmanagernotify.NewReceiverIntegrations,
|
||||
server.alertmanagerConfig,
|
||||
server.tmpl,
|
||||
server.logger,
|
||||
group.groupLabels,
|
||||
group.alerts...,
|
||||
)
|
||||
if err != nil {
|
||||
mu.Lock()
|
||||
errs = append(errs, fmt.Errorf("receiver %q test failed: %w", receiverName, err))
|
||||
mu.Unlock()
|
||||
}
|
||||
return nil // Return nil to continue processing other goroutines
|
||||
})
|
||||
}
|
||||
}
|
||||
_ = g.Wait()
|
||||
|
||||
if len(errs) > 0 {
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
|
||||
223
pkg/alertmanager/alertmanagerserver/server_e2e_test.go
Normal file
223
pkg/alertmanager/alertmanagerserver/server_e2e_test.go
Normal file
@ -0,0 +1,223 @@
|
||||
package alertmanagerserver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes/alertmanagertypestest"
|
||||
"github.com/prometheus/alertmanager/dispatch"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification"
|
||||
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
|
||||
"github.com/SigNoz/signoz/pkg/types"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
|
||||
"github.com/go-openapi/strfmt"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestEndToEndAlertManagerFlow(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
providerSettings := instrumentationtest.New().ToProviderSettings()
|
||||
|
||||
store := nfroutingstoretest.NewMockSQLRouteStore()
|
||||
store.MatchExpectationsInOrder(false)
|
||||
notificationManager, err := rulebasednotification.New(ctx, providerSettings, nfmanager.Config{}, store)
|
||||
require.NoError(t, err)
|
||||
orgID := "test-org"
|
||||
|
||||
routes := []*alertmanagertypes.RoutePolicy{
|
||||
{
|
||||
Identifiable: types.Identifiable{
|
||||
ID: valuer.GenerateUUID(),
|
||||
},
|
||||
Expression: `ruleId == "high-cpu-usage" && severity == "critical"`,
|
||||
ExpressionKind: alertmanagertypes.RuleBasedExpression,
|
||||
Name: "high-cpu-usage",
|
||||
Description: "High CPU critical alerts to webhook",
|
||||
Enabled: true,
|
||||
OrgID: orgID,
|
||||
Channels: []string{"webhook"},
|
||||
},
|
||||
{
|
||||
Identifiable: types.Identifiable{
|
||||
ID: valuer.GenerateUUID(),
|
||||
},
|
||||
Expression: `ruleId == "high-cpu-usage" && severity == "warning"`,
|
||||
ExpressionKind: alertmanagertypes.RuleBasedExpression,
|
||||
Name: "high-cpu-usage",
|
||||
Description: "High CPU warning alerts to webhook",
|
||||
Enabled: true,
|
||||
OrgID: orgID,
|
||||
Channels: []string{"webhook"},
|
||||
},
|
||||
}
|
||||
|
||||
store.ExpectCreateBatch(routes)
|
||||
err = notificationManager.CreateRoutePolicies(ctx, orgID, routes)
|
||||
require.NoError(t, err)
|
||||
|
||||
for range routes {
|
||||
ruleID := "high-cpu-usage"
|
||||
store.ExpectGetAllByName(orgID, ruleID, routes)
|
||||
store.ExpectGetAllByName(orgID, ruleID, routes)
|
||||
}
|
||||
|
||||
notifConfig := alertmanagertypes.NotificationConfig{
|
||||
NotificationGroup: map[model.LabelName]struct{}{
|
||||
model.LabelName("cluster"): {},
|
||||
model.LabelName("instance"): {},
|
||||
},
|
||||
Renotify: alertmanagertypes.ReNotificationConfig{
|
||||
RenotifyInterval: 5 * time.Minute,
|
||||
},
|
||||
UsePolicy: false,
|
||||
}
|
||||
|
||||
err = notificationManager.SetNotificationConfig(orgID, "high-cpu-usage", ¬ifConfig)
|
||||
require.NoError(t, err)
|
||||
|
||||
srvCfg := NewConfig()
|
||||
stateStore := alertmanagertypestest.NewStateStore()
|
||||
registry := prometheus.NewRegistry()
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
server, err := New(context.Background(), logger, registry, srvCfg, orgID, stateStore, notificationManager)
|
||||
require.NoError(t, err)
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, orgID)
|
||||
require.NoError(t, err)
|
||||
err = server.SetConfig(ctx, amConfig)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create test alerts
|
||||
now := time.Now()
|
||||
testAlerts := []*alertmanagertypes.PostableAlert{
|
||||
{
|
||||
Alert: alertmanagertypes.AlertModel{
|
||||
Labels: map[string]string{
|
||||
"ruleId": "high-cpu-usage",
|
||||
"severity": "critical",
|
||||
"cluster": "prod-cluster",
|
||||
"instance": "server-01",
|
||||
"alertname": "HighCPUUsage",
|
||||
},
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": "High CPU usage detected",
|
||||
"description": "CPU usage is above 90% for 5 minutes",
|
||||
},
|
||||
StartsAt: strfmt.DateTime(now.Add(-5 * time.Minute)),
|
||||
EndsAt: strfmt.DateTime(time.Time{}), // Active alert
|
||||
},
|
||||
{
|
||||
Alert: alertmanagertypes.AlertModel{
|
||||
Labels: map[string]string{
|
||||
"ruleId": "high-cpu-usage",
|
||||
"severity": "warning",
|
||||
"cluster": "prod-cluster",
|
||||
"instance": "server-02",
|
||||
"alertname": "HighCPUUsage",
|
||||
},
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": "Moderate CPU usage detected",
|
||||
"description": "CPU usage is above 70% for 10 minutes",
|
||||
},
|
||||
StartsAt: strfmt.DateTime(now.Add(-10 * time.Minute)),
|
||||
EndsAt: strfmt.DateTime(time.Time{}), // Active alert
|
||||
},
|
||||
{
|
||||
Alert: alertmanagertypes.AlertModel{
|
||||
Labels: map[string]string{
|
||||
"ruleId": "high-cpu-usage",
|
||||
"severity": "critical",
|
||||
"cluster": "prod-cluster",
|
||||
"instance": "server-03",
|
||||
"alertname": "HighCPUUsage",
|
||||
},
|
||||
},
|
||||
Annotations: map[string]string{
|
||||
"summary": "High CPU usage detected on server-03",
|
||||
"description": "CPU usage is above 95% for 3 minutes",
|
||||
},
|
||||
StartsAt: strfmt.DateTime(now.Add(-3 * time.Minute)),
|
||||
EndsAt: strfmt.DateTime(time.Time{}), // Active alert
|
||||
},
|
||||
}
|
||||
|
||||
err = server.PutAlerts(ctx, testAlerts)
|
||||
require.NoError(t, err)
|
||||
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
t.Run("verify_alerts_processed", func(t *testing.T) {
|
||||
dummyRequest, err := http.NewRequest(http.MethodGet, "/alerts", nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
params, err := alertmanagertypes.NewGettableAlertsParams(dummyRequest)
|
||||
require.NoError(t, err)
|
||||
alerts, err := server.GetAlerts(context.Background(), params)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, alerts, 3, "Expected 3 active alerts")
|
||||
|
||||
for _, alert := range alerts {
|
||||
require.Equal(t, "high-cpu-usage", alert.Alert.Labels["ruleId"])
|
||||
require.NotEmpty(t, alert.Alert.Labels["severity"])
|
||||
require.Contains(t, []string{"critical", "warning"}, alert.Alert.Labels["severity"])
|
||||
require.Equal(t, "prod-cluster", alert.Alert.Labels["cluster"])
|
||||
require.NotEmpty(t, alert.Alert.Labels["instance"])
|
||||
}
|
||||
|
||||
criticalAlerts := 0
|
||||
warningAlerts := 0
|
||||
for _, alert := range alerts {
|
||||
if alert.Alert.Labels["severity"] == "critical" {
|
||||
criticalAlerts++
|
||||
} else if alert.Alert.Labels["severity"] == "warning" {
|
||||
warningAlerts++
|
||||
}
|
||||
}
|
||||
require.Equal(t, 2, criticalAlerts, "Expected 2 critical alerts")
|
||||
require.Equal(t, 1, warningAlerts, "Expected 1 warning alert")
|
||||
})
|
||||
|
||||
t.Run("verify_notification_routing", func(t *testing.T) {
|
||||
|
||||
notifConfig, err := notificationManager.GetNotificationConfig(orgID, "high-cpu-usage")
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, notifConfig)
|
||||
require.Equal(t, 5*time.Minute, notifConfig.Renotify.RenotifyInterval)
|
||||
require.Contains(t, notifConfig.NotificationGroup, model.LabelName("ruleId"))
|
||||
require.Contains(t, notifConfig.NotificationGroup, model.LabelName("cluster"))
|
||||
require.Contains(t, notifConfig.NotificationGroup, model.LabelName("instance"))
|
||||
})
|
||||
|
||||
t.Run("verify_alert_groups_and_stages", func(t *testing.T) {
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
alertGroups, _ := server.dispatcher.Groups(
|
||||
func(route *dispatch.Route) bool { return true }, // Accept all routes
|
||||
func(alert *alertmanagertypes.Alert, now time.Time) bool { return true }, // Accept all alerts
|
||||
)
|
||||
require.Len(t, alertGroups, 3)
|
||||
|
||||
require.NotEmpty(t, alertGroups, "Should have alert groups created by dispatcher")
|
||||
|
||||
totalAlerts := 0
|
||||
for _, group := range alertGroups {
|
||||
totalAlerts += len(group.Alerts)
|
||||
}
|
||||
require.Equal(t, 3, totalAlerts, "Should have 3 alerts total across all groups")
|
||||
require.Equal(t, "{__receiver__=\"webhook\"}:{cluster=\"prod-cluster\", instance=\"server-01\", ruleId=\"high-cpu-usage\"}", alertGroups[0].GroupKey)
|
||||
require.Equal(t, "{__receiver__=\"webhook\"}:{cluster=\"prod-cluster\", instance=\"server-02\", ruleId=\"high-cpu-usage\"}", alertGroups[1].GroupKey)
|
||||
require.Equal(t, "{__receiver__=\"webhook\"}:{cluster=\"prod-cluster\", instance=\"server-03\", ruleId=\"high-cpu-usage\"}", alertGroups[2].GroupKey)
|
||||
})
|
||||
}
|
||||
@ -19,6 +19,7 @@ import (
|
||||
"github.com/prometheus/alertmanager/config"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
commoncfg "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
@ -127,3 +128,189 @@ func TestServerPutAlerts(t *testing.T) {
|
||||
assert.Equal(t, gettableAlerts[0].Alert.Labels["alertname"], "test-alert")
|
||||
assert.NoError(t, server.Stop(context.Background()))
|
||||
}
|
||||
|
||||
func TestServerTestAlert(t *testing.T) {
|
||||
stateStore := alertmanagertypestest.NewStateStore()
|
||||
srvCfg := NewConfig()
|
||||
srvCfg.Route.GroupInterval = 1 * time.Second
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
server, err := New(context.Background(), slog.New(slog.NewTextHandler(io.Discard, nil)), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
|
||||
require.NoError(t, err)
|
||||
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
|
||||
require.NoError(t, err)
|
||||
|
||||
webhook1Listener, err := net.Listen("tcp", "localhost:0")
|
||||
require.NoError(t, err)
|
||||
webhook2Listener, err := net.Listen("tcp", "localhost:0")
|
||||
require.NoError(t, err)
|
||||
|
||||
requestCount1 := 0
|
||||
requestCount2 := 0
|
||||
webhook1Server := &http.Server{
|
||||
Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
requestCount1++
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}),
|
||||
}
|
||||
webhook2Server := &http.Server{
|
||||
Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
requestCount2++
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}),
|
||||
}
|
||||
|
||||
go func() {
|
||||
_ = webhook1Server.Serve(webhook1Listener)
|
||||
}()
|
||||
go func() {
|
||||
_ = webhook2Server.Serve(webhook2Listener)
|
||||
}()
|
||||
|
||||
webhook1URL, err := url.Parse("http://" + webhook1Listener.Addr().String() + "/webhook")
|
||||
require.NoError(t, err)
|
||||
webhook2URL, err := url.Parse("http://" + webhook2Listener.Addr().String() + "/webhook")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
|
||||
Name: "receiver-1",
|
||||
WebhookConfigs: []*config.WebhookConfig{
|
||||
{
|
||||
HTTPConfig: &commoncfg.HTTPClientConfig{},
|
||||
URL: &config.SecretURL{URL: webhook1URL},
|
||||
},
|
||||
},
|
||||
}))
|
||||
|
||||
require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
|
||||
Name: "receiver-2",
|
||||
WebhookConfigs: []*config.WebhookConfig{
|
||||
{
|
||||
HTTPConfig: &commoncfg.HTTPClientConfig{},
|
||||
URL: &config.SecretURL{URL: webhook2URL},
|
||||
},
|
||||
},
|
||||
}))
|
||||
|
||||
require.NoError(t, server.SetConfig(context.Background(), amConfig))
|
||||
defer func() {
|
||||
_ = server.Stop(context.Background())
|
||||
_ = webhook1Server.Close()
|
||||
_ = webhook2Server.Close()
|
||||
}()
|
||||
|
||||
// Test with multiple alerts going to different receivers
|
||||
alert1 := &alertmanagertypes.PostableAlert{
|
||||
Annotations: models.LabelSet{"alertname": "test-alert-1"},
|
||||
StartsAt: strfmt.DateTime(time.Now()),
|
||||
Alert: models.Alert{
|
||||
Labels: models.LabelSet{"alertname": "test-alert-1", "severity": "critical"},
|
||||
},
|
||||
}
|
||||
alert2 := &alertmanagertypes.PostableAlert{
|
||||
Annotations: models.LabelSet{"alertname": "test-alert-2"},
|
||||
StartsAt: strfmt.DateTime(time.Now()),
|
||||
Alert: models.Alert{
|
||||
Labels: models.LabelSet{"alertname": "test-alert-2", "severity": "warning"},
|
||||
},
|
||||
}
|
||||
|
||||
receiversMap := map[*alertmanagertypes.PostableAlert][]string{
|
||||
alert1: {"receiver-1", "receiver-2"},
|
||||
alert2: {"receiver-2"},
|
||||
}
|
||||
|
||||
config := &alertmanagertypes.NotificationConfig{
|
||||
NotificationGroup: make(map[model.LabelName]struct{}),
|
||||
GroupByAll: false,
|
||||
}
|
||||
|
||||
err = server.TestAlert(context.Background(), receiversMap, config)
|
||||
require.NoError(t, err)
|
||||
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
assert.Greater(t, requestCount1, 0, "receiver-1 should have received at least one request")
|
||||
assert.Greater(t, requestCount2, 0, "receiver-2 should have received at least one request")
|
||||
}
|
||||
|
||||
func TestServerTestAlertContinuesOnFailure(t *testing.T) {
|
||||
stateStore := alertmanagertypestest.NewStateStore()
|
||||
srvCfg := NewConfig()
|
||||
srvCfg.Route.GroupInterval = 1 * time.Second
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
server, err := New(context.Background(), slog.New(slog.NewTextHandler(io.Discard, nil)), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
|
||||
require.NoError(t, err)
|
||||
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create one working webhook and one failing receiver (non-existent)
|
||||
webhookListener, err := net.Listen("tcp", "localhost:0")
|
||||
require.NoError(t, err)
|
||||
|
||||
requestCount := 0
|
||||
webhookServer := &http.Server{
|
||||
Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
requestCount++
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}),
|
||||
}
|
||||
|
||||
go func() {
|
||||
_ = webhookServer.Serve(webhookListener)
|
||||
}()
|
||||
|
||||
webhookURL, err := url.Parse("http://" + webhookListener.Addr().String() + "/webhook")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
|
||||
Name: "working-receiver",
|
||||
WebhookConfigs: []*config.WebhookConfig{
|
||||
{
|
||||
HTTPConfig: &commoncfg.HTTPClientConfig{},
|
||||
URL: &config.SecretURL{URL: webhookURL},
|
||||
},
|
||||
},
|
||||
}))
|
||||
|
||||
require.NoError(t, amConfig.CreateReceiver(alertmanagertypes.Receiver{
|
||||
Name: "failing-receiver",
|
||||
WebhookConfigs: []*config.WebhookConfig{
|
||||
{
|
||||
HTTPConfig: &commoncfg.HTTPClientConfig{},
|
||||
URL: &config.SecretURL{URL: &url.URL{Scheme: "http", Host: "localhost:1", Path: "/webhook"}},
|
||||
},
|
||||
},
|
||||
}))
|
||||
|
||||
require.NoError(t, server.SetConfig(context.Background(), amConfig))
|
||||
defer func() {
|
||||
_ = server.Stop(context.Background())
|
||||
_ = webhookServer.Close()
|
||||
}()
|
||||
|
||||
alert := &alertmanagertypes.PostableAlert{
|
||||
Annotations: models.LabelSet{"alertname": "test-alert"},
|
||||
StartsAt: strfmt.DateTime(time.Now()),
|
||||
Alert: models.Alert{
|
||||
Labels: models.LabelSet{"alertname": "test-alert"},
|
||||
},
|
||||
}
|
||||
|
||||
receiversMap := map[*alertmanagertypes.PostableAlert][]string{
|
||||
alert: {"working-receiver", "failing-receiver"},
|
||||
}
|
||||
|
||||
config := &alertmanagertypes.NotificationConfig{
|
||||
NotificationGroup: make(map[model.LabelName]struct{}),
|
||||
GroupByAll: false,
|
||||
}
|
||||
|
||||
err = server.TestAlert(context.Background(), receiversMap, config)
|
||||
assert.Error(t, err)
|
||||
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
assert.Greater(t, requestCount, 0, "working-receiver should have received at least one request even though failing-receiver failed")
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@ package alertmanager
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
@ -273,3 +274,128 @@ func (api *API) CreateChannel(rw http.ResponseWriter, req *http.Request) {
|
||||
|
||||
render.Success(rw, http.StatusNoContent, nil)
|
||||
}
|
||||
|
||||
func (api *API) CreateRoutePolicy(rw http.ResponseWriter, req *http.Request) {
|
||||
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
body, err := io.ReadAll(req.Body)
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
defer req.Body.Close()
|
||||
var policy alertmanagertypes.PostableRoutePolicy
|
||||
err = json.Unmarshal(body, &policy)
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
policy.ExpressionKind = alertmanagertypes.PolicyBasedExpression
|
||||
|
||||
// Validate the postable route
|
||||
if err := policy.Validate(); err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
result, err := api.alertmanager.CreateRoutePolicy(ctx, &policy)
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
render.Success(rw, http.StatusCreated, result)
|
||||
}
|
||||
|
||||
func (api *API) GetAllRoutePolicies(rw http.ResponseWriter, req *http.Request) {
|
||||
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
policies, err := api.alertmanager.GetAllRoutePolicies(ctx)
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
render.Success(rw, http.StatusOK, policies)
|
||||
}
|
||||
|
||||
func (api *API) GetRoutePolicyByID(rw http.ResponseWriter, req *http.Request) {
|
||||
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
vars := mux.Vars(req)
|
||||
policyID := vars["id"]
|
||||
if policyID == "" {
|
||||
render.Error(rw, errors.NewInvalidInputf(errors.CodeInvalidInput, "policy ID is required"))
|
||||
return
|
||||
}
|
||||
|
||||
policy, err := api.alertmanager.GetRoutePolicyByID(ctx, policyID)
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
render.Success(rw, http.StatusOK, policy)
|
||||
}
|
||||
|
||||
func (api *API) DeleteRoutePolicyByID(rw http.ResponseWriter, req *http.Request) {
|
||||
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
vars := mux.Vars(req)
|
||||
policyID := vars["id"]
|
||||
if policyID == "" {
|
||||
render.Error(rw, errors.NewInvalidInputf(errors.CodeInvalidInput, "policy ID is required"))
|
||||
return
|
||||
}
|
||||
|
||||
err := api.alertmanager.DeleteRoutePolicyByID(ctx, policyID)
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
render.Success(rw, http.StatusNoContent, nil)
|
||||
}
|
||||
|
||||
func (api *API) UpdateRoutePolicy(rw http.ResponseWriter, req *http.Request) {
|
||||
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
vars := mux.Vars(req)
|
||||
policyID := vars["id"]
|
||||
if policyID == "" {
|
||||
render.Error(rw, errors.NewInvalidInputf(errors.CodeInvalidInput, "policy ID is required"))
|
||||
return
|
||||
}
|
||||
body, err := io.ReadAll(req.Body)
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
defer req.Body.Close()
|
||||
var policy alertmanagertypes.PostableRoutePolicy
|
||||
err = json.Unmarshal(body, &policy)
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
policy.ExpressionKind = alertmanagertypes.PolicyBasedExpression
|
||||
|
||||
// Validate the postable route
|
||||
if err := policy.Validate(); err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
result, err := api.alertmanager.UpdateRoutePolicyByID(ctx, policyID, &policy)
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
render.Success(rw, http.StatusOK, result)
|
||||
}
|
||||
|
||||
@ -1,20 +1,29 @@
|
||||
package nfmanagertest
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/prometheus/common/model"
|
||||
)
|
||||
|
||||
// MockNotificationManager is a simple mock implementation of NotificationManager
|
||||
type MockNotificationManager struct {
|
||||
configs map[string]*alertmanagertypes.NotificationConfig
|
||||
errors map[string]error
|
||||
configs map[string]*alertmanagertypes.NotificationConfig
|
||||
routes map[string]*alertmanagertypes.RoutePolicy
|
||||
routesByName map[string][]*alertmanagertypes.RoutePolicy
|
||||
errors map[string]error
|
||||
}
|
||||
|
||||
// NewMock creates a new mock notification manager
|
||||
func NewMock() *MockNotificationManager {
|
||||
return &MockNotificationManager{
|
||||
configs: make(map[string]*alertmanagertypes.NotificationConfig),
|
||||
errors: make(map[string]error),
|
||||
configs: make(map[string]*alertmanagertypes.NotificationConfig),
|
||||
routes: make(map[string]*alertmanagertypes.RoutePolicy),
|
||||
routesByName: make(map[string][]*alertmanagertypes.RoutePolicy),
|
||||
errors: make(map[string]error),
|
||||
}
|
||||
}
|
||||
|
||||
@ -65,6 +74,8 @@ func (m *MockNotificationManager) SetMockError(orgID, ruleID string, err error)
|
||||
|
||||
func (m *MockNotificationManager) ClearMockData() {
|
||||
m.configs = make(map[string]*alertmanagertypes.NotificationConfig)
|
||||
m.routes = make(map[string]*alertmanagertypes.RoutePolicy)
|
||||
m.routesByName = make(map[string][]*alertmanagertypes.RoutePolicy)
|
||||
m.errors = make(map[string]error)
|
||||
}
|
||||
|
||||
@ -73,3 +84,241 @@ func (m *MockNotificationManager) HasConfig(orgID, ruleID string) bool {
|
||||
_, exists := m.configs[key]
|
||||
return exists
|
||||
}
|
||||
|
||||
// Route Policy CRUD
|
||||
|
||||
func (m *MockNotificationManager) CreateRoutePolicy(ctx context.Context, orgID string, route *alertmanagertypes.RoutePolicy) error {
|
||||
key := getKey(orgID, "create_route")
|
||||
if err := m.errors[key]; err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if route == nil {
|
||||
return fmt.Errorf("route cannot be nil")
|
||||
}
|
||||
|
||||
if err := route.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
routeKey := getKey(orgID, route.ID.StringValue())
|
||||
m.routes[routeKey] = route
|
||||
nameKey := getKey(orgID, route.Name)
|
||||
m.routesByName[nameKey] = append(m.routesByName[nameKey], route)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockNotificationManager) CreateRoutePolicies(ctx context.Context, orgID string, routes []*alertmanagertypes.RoutePolicy) error {
|
||||
key := getKey(orgID, "create_routes")
|
||||
if err := m.errors[key]; err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(routes) == 0 {
|
||||
return fmt.Errorf("routes cannot be empty")
|
||||
}
|
||||
for i, route := range routes {
|
||||
if route == nil {
|
||||
return fmt.Errorf("route at index %d cannot be nil", i)
|
||||
}
|
||||
if err := route.Validate(); err != nil {
|
||||
return fmt.Errorf("route at index %d: %s", i, err.Error())
|
||||
}
|
||||
}
|
||||
for _, route := range routes {
|
||||
if err := m.CreateRoutePolicy(ctx, orgID, route); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockNotificationManager) GetRoutePolicyByID(ctx context.Context, orgID string, routeID string) (*alertmanagertypes.RoutePolicy, error) {
|
||||
key := getKey(orgID, "get_route")
|
||||
if err := m.errors[key]; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if routeID == "" {
|
||||
return nil, fmt.Errorf("routeID cannot be empty")
|
||||
}
|
||||
|
||||
routeKey := getKey(orgID, routeID)
|
||||
route, exists := m.routes[routeKey]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("route with ID %s not found", routeID)
|
||||
}
|
||||
|
||||
return route, nil
|
||||
}
|
||||
|
||||
func (m *MockNotificationManager) GetAllRoutePolicies(ctx context.Context, orgID string) ([]*alertmanagertypes.RoutePolicy, error) {
|
||||
key := getKey(orgID, "get_all_routes")
|
||||
if err := m.errors[key]; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if orgID == "" {
|
||||
return nil, fmt.Errorf("orgID cannot be empty")
|
||||
}
|
||||
|
||||
var routes []*alertmanagertypes.RoutePolicy
|
||||
for routeKey, route := range m.routes {
|
||||
if route.OrgID == orgID {
|
||||
routes = append(routes, route)
|
||||
}
|
||||
_ = routeKey
|
||||
}
|
||||
|
||||
return routes, nil
|
||||
}
|
||||
|
||||
func (m *MockNotificationManager) DeleteRoutePolicy(ctx context.Context, orgID string, routeID string) error {
|
||||
key := getKey(orgID, "delete_route")
|
||||
if err := m.errors[key]; err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if routeID == "" {
|
||||
return fmt.Errorf("routeID cannot be empty")
|
||||
}
|
||||
|
||||
routeKey := getKey(orgID, routeID)
|
||||
route, exists := m.routes[routeKey]
|
||||
if !exists {
|
||||
return fmt.Errorf("route with ID %s not found", routeID)
|
||||
}
|
||||
delete(m.routes, routeKey)
|
||||
|
||||
nameKey := getKey(orgID, route.Name)
|
||||
if nameRoutes, exists := m.routesByName[nameKey]; exists {
|
||||
var filtered []*alertmanagertypes.RoutePolicy
|
||||
for _, r := range nameRoutes {
|
||||
if r.ID.StringValue() != routeID {
|
||||
filtered = append(filtered, r)
|
||||
}
|
||||
}
|
||||
if len(filtered) == 0 {
|
||||
delete(m.routesByName, nameKey)
|
||||
} else {
|
||||
m.routesByName[nameKey] = filtered
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockNotificationManager) DeleteAllRoutePoliciesByName(ctx context.Context, orgID string, name string) error {
|
||||
key := getKey(orgID, "delete_routes_by_name")
|
||||
if err := m.errors[key]; err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if orgID == "" {
|
||||
return fmt.Errorf("orgID cannot be empty")
|
||||
}
|
||||
|
||||
if name == "" {
|
||||
return fmt.Errorf("name cannot be empty")
|
||||
}
|
||||
|
||||
nameKey := getKey(orgID, name)
|
||||
routes, exists := m.routesByName[nameKey]
|
||||
if !exists {
|
||||
return nil // No routes to delete
|
||||
}
|
||||
|
||||
for _, route := range routes {
|
||||
routeKey := getKey(orgID, route.ID.StringValue())
|
||||
delete(m.routes, routeKey)
|
||||
}
|
||||
|
||||
delete(m.routesByName, nameKey)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockNotificationManager) Match(ctx context.Context, orgID string, ruleID string, set model.LabelSet) ([]string, error) {
|
||||
key := getKey(orgID, ruleID)
|
||||
if err := m.errors[key]; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
config, err := m.GetNotificationConfig(orgID, ruleID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var expressionRoutes []*alertmanagertypes.RoutePolicy
|
||||
if config.UsePolicy {
|
||||
for _, route := range m.routes {
|
||||
if route.OrgID == orgID && route.ExpressionKind == alertmanagertypes.PolicyBasedExpression {
|
||||
expressionRoutes = append(expressionRoutes, route)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
nameKey := getKey(orgID, ruleID)
|
||||
if routes, exists := m.routesByName[nameKey]; exists {
|
||||
expressionRoutes = routes
|
||||
}
|
||||
}
|
||||
|
||||
var matchedChannels []string
|
||||
for _, route := range expressionRoutes {
|
||||
if m.evaluateExpr(route.Expression, set) {
|
||||
matchedChannels = append(matchedChannels, route.Channels...)
|
||||
}
|
||||
}
|
||||
|
||||
return matchedChannels, nil
|
||||
}
|
||||
|
||||
func (m *MockNotificationManager) evaluateExpr(expression string, labelSet model.LabelSet) bool {
|
||||
ruleID, ok := labelSet["ruleId"]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
if strings.Contains(expression, `ruleId in ["ruleId-OtherAlert", "ruleId-TestingAlert"]`) {
|
||||
return ruleID == "ruleId-OtherAlert" || ruleID == "ruleId-TestingAlert"
|
||||
}
|
||||
if strings.Contains(expression, `ruleId in ["ruleId-HighLatency", "ruleId-HighErrorRate"]`) {
|
||||
return ruleID == "ruleId-HighLatency" || ruleID == "ruleId-HighErrorRate"
|
||||
}
|
||||
if strings.Contains(expression, `ruleId == "ruleId-HighLatency"`) {
|
||||
return ruleID == "ruleId-HighLatency"
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Helper methods for testing
|
||||
|
||||
func (m *MockNotificationManager) SetMockRoute(orgID string, route *alertmanagertypes.RoutePolicy) {
|
||||
routeKey := getKey(orgID, route.ID.StringValue())
|
||||
m.routes[routeKey] = route
|
||||
|
||||
nameKey := getKey(orgID, route.Name)
|
||||
m.routesByName[nameKey] = append(m.routesByName[nameKey], route)
|
||||
}
|
||||
|
||||
func (m *MockNotificationManager) SetMockRouteError(orgID, operation string, err error) {
|
||||
key := getKey(orgID, operation)
|
||||
m.errors[key] = err
|
||||
}
|
||||
|
||||
func (m *MockNotificationManager) ClearMockRoutes() {
|
||||
m.routes = make(map[string]*alertmanagertypes.RoutePolicy)
|
||||
m.routesByName = make(map[string][]*alertmanagertypes.RoutePolicy)
|
||||
}
|
||||
|
||||
func (m *MockNotificationManager) GetRouteCount() int {
|
||||
return len(m.routes)
|
||||
}
|
||||
|
||||
func (m *MockNotificationManager) HasRoute(orgID, routeID string) bool {
|
||||
routeKey := getKey(orgID, routeID)
|
||||
_, exists := m.routes[routeKey]
|
||||
return exists
|
||||
}
|
||||
|
||||
@ -0,0 +1,176 @@
|
||||
package nfroutingstoretest
|
||||
|
||||
import (
|
||||
"context"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/sqlroutingstore"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore/sqlstoretest"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
)
|
||||
|
||||
type MockSQLRouteStore struct {
|
||||
routeStore alertmanagertypes.RouteStore
|
||||
mock sqlmock.Sqlmock
|
||||
}
|
||||
|
||||
func NewMockSQLRouteStore() *MockSQLRouteStore {
|
||||
sqlStore := sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherRegexp)
|
||||
routeStore := sqlroutingstore.NewStore(sqlStore)
|
||||
|
||||
return &MockSQLRouteStore{
|
||||
routeStore: routeStore,
|
||||
mock: sqlStore.Mock(),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) Mock() sqlmock.Sqlmock {
|
||||
return m.mock
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) GetByID(ctx context.Context, orgId string, id string) (*alertmanagertypes.RoutePolicy, error) {
|
||||
return m.routeStore.GetByID(ctx, orgId, id)
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) Create(ctx context.Context, route *alertmanagertypes.RoutePolicy) error {
|
||||
return m.routeStore.Create(ctx, route)
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) CreateBatch(ctx context.Context, routes []*alertmanagertypes.RoutePolicy) error {
|
||||
return m.routeStore.CreateBatch(ctx, routes)
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) Delete(ctx context.Context, orgId string, id string) error {
|
||||
return m.routeStore.Delete(ctx, orgId, id)
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) GetAllByKind(ctx context.Context, orgID string, kind alertmanagertypes.ExpressionKind) ([]*alertmanagertypes.RoutePolicy, error) {
|
||||
return m.routeStore.GetAllByKind(ctx, orgID, kind)
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) GetAllByName(ctx context.Context, orgID string, name string) ([]*alertmanagertypes.RoutePolicy, error) {
|
||||
return m.routeStore.GetAllByName(ctx, orgID, name)
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) DeleteRouteByName(ctx context.Context, orgID string, name string) error {
|
||||
return m.routeStore.DeleteRouteByName(ctx, orgID, name)
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) ExpectGetByID(orgID, id string, route *alertmanagertypes.RoutePolicy) {
|
||||
rows := sqlmock.NewRows([]string{"id", "org_id", "name", "expression", "kind", "description", "enabled", "tags", "channels", "created_at", "updated_at", "created_by", "updated_by"})
|
||||
|
||||
if route != nil {
|
||||
rows.AddRow(
|
||||
route.ID.StringValue(),
|
||||
route.OrgID,
|
||||
route.Name,
|
||||
route.Expression,
|
||||
route.ExpressionKind.StringValue(),
|
||||
route.Description,
|
||||
route.Enabled,
|
||||
"[]", // tags as JSON
|
||||
`["`+strings.Join(route.Channels, `","`)+`"]`, // channels as JSON
|
||||
"0001-01-01T00:00:00Z", // created_at
|
||||
"0001-01-01T00:00:00Z", // updated_at
|
||||
"", // created_by
|
||||
"", // updated_by
|
||||
)
|
||||
}
|
||||
|
||||
m.mock.ExpectQuery(`SELECT (.+) FROM "route_policy" WHERE \(id = \$1\) AND \(org_id = \$2\)`).
|
||||
WithArgs(id, orgID).
|
||||
WillReturnRows(rows)
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) ExpectCreate(route *alertmanagertypes.RoutePolicy) {
|
||||
expectedPattern := `INSERT INTO "route_policy" \(.+\) VALUES .+`
|
||||
m.mock.ExpectExec(expectedPattern).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) ExpectCreateBatch(routes []*alertmanagertypes.RoutePolicy) {
|
||||
if len(routes) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Simplified pattern that should match any INSERT into route_policy
|
||||
expectedPattern := `INSERT INTO "route_policy" \(.+\) VALUES .+`
|
||||
|
||||
m.mock.ExpectExec(expectedPattern).
|
||||
WillReturnResult(sqlmock.NewResult(1, int64(len(routes))))
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) ExpectDelete(orgID, id string) {
|
||||
m.mock.ExpectExec(`DELETE FROM "route_policy" AS "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(id = '` + regexp.QuoteMeta(id) + `'\)`).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) ExpectGetAllByKindAndOrgID(orgID string, kind alertmanagertypes.ExpressionKind, routes []*alertmanagertypes.RoutePolicy) {
|
||||
rows := sqlmock.NewRows([]string{"id", "org_id", "name", "expression", "kind", "description", "enabled", "tags", "channels", "created_at", "updated_at", "created_by", "updated_by"})
|
||||
|
||||
for _, route := range routes {
|
||||
if route.OrgID == orgID && route.ExpressionKind == kind {
|
||||
rows.AddRow(
|
||||
route.ID.StringValue(),
|
||||
route.OrgID,
|
||||
route.Name,
|
||||
route.Expression,
|
||||
route.ExpressionKind.StringValue(),
|
||||
route.Description,
|
||||
route.Enabled,
|
||||
"[]", // tags as JSON
|
||||
`["`+strings.Join(route.Channels, `","`)+`"]`, // channels as JSON
|
||||
"0001-01-01T00:00:00Z", // created_at
|
||||
"0001-01-01T00:00:00Z", // updated_at
|
||||
"", // created_by
|
||||
"", // updated_by
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
m.mock.ExpectQuery(`SELECT (.+) FROM "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(kind = '` + regexp.QuoteMeta(kind.StringValue()) + `'\)`).
|
||||
WillReturnRows(rows)
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) ExpectGetAllByName(orgID, name string, routes []*alertmanagertypes.RoutePolicy) {
|
||||
rows := sqlmock.NewRows([]string{"id", "org_id", "name", "expression", "kind", "description", "enabled", "tags", "channels", "created_at", "updated_at", "created_by", "updated_by"})
|
||||
|
||||
for _, route := range routes {
|
||||
if route.OrgID == orgID && route.Name == name {
|
||||
rows.AddRow(
|
||||
route.ID.StringValue(),
|
||||
route.OrgID,
|
||||
route.Name,
|
||||
route.Expression,
|
||||
route.ExpressionKind.StringValue(),
|
||||
route.Description,
|
||||
route.Enabled,
|
||||
"[]", // tags as JSON
|
||||
`["`+strings.Join(route.Channels, `","`)+`"]`, // channels as JSON
|
||||
"0001-01-01T00:00:00Z", // created_at
|
||||
"0001-01-01T00:00:00Z", // updated_at
|
||||
"", // created_by
|
||||
"", // updated_by
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
m.mock.ExpectQuery(`SELECT (.+) FROM "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(name = '` + regexp.QuoteMeta(name) + `'\)`).
|
||||
WillReturnRows(rows)
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) ExpectDeleteRouteByName(orgID, name string) {
|
||||
m.mock.ExpectExec(`DELETE FROM "route_policy" AS "route_policy" WHERE \(org_id = '` + regexp.QuoteMeta(orgID) + `'\) AND \(name = '` + regexp.QuoteMeta(name) + `'\)`).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) ExpectationsWereMet() error {
|
||||
return m.mock.ExpectationsWereMet()
|
||||
}
|
||||
|
||||
func (m *MockSQLRouteStore) MatchExpectationsInOrder(match bool) {
|
||||
m.mock.MatchExpectationsInOrder(match)
|
||||
}
|
||||
@ -0,0 +1,93 @@
|
||||
package sqlroutingstore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
routeTypes "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
)
|
||||
|
||||
type store struct {
|
||||
sqlstore sqlstore.SQLStore
|
||||
}
|
||||
|
||||
func NewStore(sqlstore sqlstore.SQLStore) routeTypes.RouteStore {
|
||||
return &store{
|
||||
sqlstore: sqlstore,
|
||||
}
|
||||
}
|
||||
|
||||
func (store *store) GetByID(ctx context.Context, orgId string, id string) (*routeTypes.RoutePolicy, error) {
|
||||
route := new(routeTypes.RoutePolicy)
|
||||
err := store.sqlstore.BunDBCtx(ctx).NewSelect().Model(route).Where("id = ?", id).Where("org_id = ?", orgId).Scan(ctx)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return nil, store.sqlstore.WrapNotFoundErrf(err, errors.CodeNotFound, "routing policy with ID: %s does not exist", id)
|
||||
}
|
||||
return nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to fetch routing policy with ID: %s", id)
|
||||
}
|
||||
|
||||
return route, nil
|
||||
}
|
||||
|
||||
func (store *store) Create(ctx context.Context, route *routeTypes.RoutePolicy) error {
|
||||
_, err := store.sqlstore.BunDBCtx(ctx).NewInsert().Model(route).Exec(ctx)
|
||||
if err != nil {
|
||||
return errors.NewInternalf(errors.CodeInternal, "error creating routing policy with ID: %s", route.ID)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (store *store) CreateBatch(ctx context.Context, route []*routeTypes.RoutePolicy) error {
|
||||
_, err := store.sqlstore.BunDBCtx(ctx).NewInsert().Model(&route).Exec(ctx)
|
||||
if err != nil {
|
||||
return errors.NewInternalf(errors.CodeInternal, "error creating routing policies: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (store *store) Delete(ctx context.Context, orgId string, id string) error {
|
||||
_, err := store.sqlstore.BunDBCtx(ctx).NewDelete().Model((*routeTypes.RoutePolicy)(nil)).Where("org_id = ?", orgId).Where("id = ?", id).Exec(ctx)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to delete routing policy with ID: %s", id)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (store *store) GetAllByKind(ctx context.Context, orgID string, kind routeTypes.ExpressionKind) ([]*routeTypes.RoutePolicy, error) {
|
||||
var routes []*routeTypes.RoutePolicy
|
||||
err := store.sqlstore.BunDBCtx(ctx).NewSelect().Model(&routes).Where("org_id = ?", orgID).Where("kind = ?", kind).Scan(ctx)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return nil, errors.NewNotFoundf(errors.CodeNotFound, "no routing policies found for orgID: %s", orgID)
|
||||
}
|
||||
return nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to fetch routing policies for orgID: %s", orgID)
|
||||
}
|
||||
return routes, nil
|
||||
}
|
||||
|
||||
func (store *store) GetAllByName(ctx context.Context, orgID string, name string) ([]*routeTypes.RoutePolicy, error) {
|
||||
var routes []*routeTypes.RoutePolicy
|
||||
err := store.sqlstore.BunDBCtx(ctx).NewSelect().Model(&routes).Where("org_id = ?", orgID).Where("name = ?", name).Scan(ctx)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return routes, errors.NewNotFoundf(errors.CodeNotFound, "no routing policies found for orgID: %s and name: %s", orgID, name)
|
||||
}
|
||||
return nil, errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to fetch routing policies for orgID: %s and name: %s", orgID, name)
|
||||
}
|
||||
return routes, nil
|
||||
}
|
||||
|
||||
func (store *store) DeleteRouteByName(ctx context.Context, orgID string, name string) error {
|
||||
_, err := store.sqlstore.BunDBCtx(ctx).NewDelete().Model((*routeTypes.RoutePolicy)(nil)).Where("org_id = ?", orgID).Where("name = ?", name).Exec(ctx)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, errors.TypeInternal, errors.CodeInternal, "unable to delete routing policies with name: %s", name)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -2,12 +2,27 @@
|
||||
package nfmanager
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/prometheus/common/model"
|
||||
)
|
||||
|
||||
// NotificationManager defines how alerts should be grouped and configured for notification with multi-tenancy support.
|
||||
// NotificationManager defines how alerts should be grouped and configured for notification.
|
||||
type NotificationManager interface {
|
||||
// Notification Config CRUD
|
||||
GetNotificationConfig(orgID string, ruleID string) (*alertmanagertypes.NotificationConfig, error)
|
||||
SetNotificationConfig(orgID string, ruleID string, config *alertmanagertypes.NotificationConfig) error
|
||||
DeleteNotificationConfig(orgID string, ruleID string) error
|
||||
|
||||
// Route Policy CRUD
|
||||
CreateRoutePolicy(ctx context.Context, orgID string, route *alertmanagertypes.RoutePolicy) error
|
||||
CreateRoutePolicies(ctx context.Context, orgID string, routes []*alertmanagertypes.RoutePolicy) error
|
||||
GetRoutePolicyByID(ctx context.Context, orgID string, routeID string) (*alertmanagertypes.RoutePolicy, error)
|
||||
GetAllRoutePolicies(ctx context.Context, orgID string) ([]*alertmanagertypes.RoutePolicy, error)
|
||||
DeleteRoutePolicy(ctx context.Context, orgID string, routeID string) error
|
||||
DeleteAllRoutePoliciesByName(ctx context.Context, orgID string, name string) error
|
||||
|
||||
// Route matching
|
||||
Match(ctx context.Context, orgID string, ruleID string, set model.LabelSet) ([]string, error)
|
||||
}
|
||||
|
||||
@ -2,11 +2,14 @@ package rulebasednotification
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/expr-lang/expr"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/factory"
|
||||
)
|
||||
@ -14,26 +17,28 @@ import (
|
||||
type provider struct {
|
||||
settings factory.ScopedProviderSettings
|
||||
orgToFingerprintToNotificationConfig map[string]map[string]alertmanagertypes.NotificationConfig
|
||||
routeStore alertmanagertypes.RouteStore
|
||||
mutex sync.RWMutex
|
||||
}
|
||||
|
||||
// NewFactory creates a new factory for the rule-based grouping strategy.
|
||||
func NewFactory() factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config] {
|
||||
func NewFactory(routeStore alertmanagertypes.RouteStore) factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config] {
|
||||
return factory.NewProviderFactory(
|
||||
factory.MustNewName("rulebased"),
|
||||
func(ctx context.Context, settings factory.ProviderSettings, config nfmanager.Config) (nfmanager.NotificationManager, error) {
|
||||
return New(ctx, settings, config)
|
||||
return New(ctx, settings, config, routeStore)
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
// New creates a new rule-based grouping strategy provider.
|
||||
func New(ctx context.Context, providerSettings factory.ProviderSettings, config nfmanager.Config) (nfmanager.NotificationManager, error) {
|
||||
func New(ctx context.Context, providerSettings factory.ProviderSettings, config nfmanager.Config, routeStore alertmanagertypes.RouteStore) (nfmanager.NotificationManager, error) {
|
||||
settings := factory.NewScopedProviderSettings(providerSettings, "github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification")
|
||||
|
||||
return &provider{
|
||||
settings: settings,
|
||||
orgToFingerprintToNotificationConfig: make(map[string]map[string]alertmanagertypes.NotificationConfig),
|
||||
routeStore: routeStore,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -58,6 +63,8 @@ func (r *provider) GetNotificationConfig(orgID string, ruleID string) (*alertman
|
||||
for k, v := range config.NotificationGroup {
|
||||
notificationConfig.NotificationGroup[k] = v
|
||||
}
|
||||
notificationConfig.UsePolicy = config.UsePolicy
|
||||
notificationConfig.GroupByAll = config.GroupByAll
|
||||
}
|
||||
}
|
||||
|
||||
@ -101,3 +108,147 @@ func (r *provider) DeleteNotificationConfig(orgID string, ruleID string) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *provider) CreateRoutePolicy(ctx context.Context, orgID string, route *alertmanagertypes.RoutePolicy) error {
|
||||
if route == nil {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policy cannot be nil")
|
||||
}
|
||||
|
||||
err := route.Validate()
|
||||
if err != nil {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid route policy: %v", err)
|
||||
}
|
||||
|
||||
return r.routeStore.Create(ctx, route)
|
||||
}
|
||||
|
||||
func (r *provider) CreateRoutePolicies(ctx context.Context, orgID string, routes []*alertmanagertypes.RoutePolicy) error {
|
||||
if len(routes) == 0 {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policies cannot be empty")
|
||||
}
|
||||
|
||||
for _, route := range routes {
|
||||
if route == nil {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policy cannot be nil")
|
||||
}
|
||||
if err := route.Validate(); err != nil {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route policy with name %s: %s", route.Name, err.Error())
|
||||
}
|
||||
}
|
||||
return r.routeStore.CreateBatch(ctx, routes)
|
||||
}
|
||||
|
||||
func (r *provider) GetRoutePolicyByID(ctx context.Context, orgID string, routeID string) (*alertmanagertypes.RoutePolicy, error) {
|
||||
if routeID == "" {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
|
||||
}
|
||||
|
||||
return r.routeStore.GetByID(ctx, orgID, routeID)
|
||||
}
|
||||
|
||||
func (r *provider) GetAllRoutePolicies(ctx context.Context, orgID string) ([]*alertmanagertypes.RoutePolicy, error) {
|
||||
if orgID == "" {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "orgID cannot be empty")
|
||||
}
|
||||
|
||||
return r.routeStore.GetAllByKind(ctx, orgID, alertmanagertypes.PolicyBasedExpression)
|
||||
}
|
||||
|
||||
func (r *provider) DeleteRoutePolicy(ctx context.Context, orgID string, routeID string) error {
|
||||
if routeID == "" {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
|
||||
}
|
||||
|
||||
return r.routeStore.Delete(ctx, orgID, routeID)
|
||||
}
|
||||
|
||||
func (r *provider) DeleteAllRoutePoliciesByName(ctx context.Context, orgID string, name string) error {
|
||||
if orgID == "" {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "orgID cannot be empty")
|
||||
}
|
||||
if name == "" {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "name cannot be empty")
|
||||
}
|
||||
return r.routeStore.DeleteRouteByName(ctx, orgID, name)
|
||||
}
|
||||
|
||||
func (r *provider) Match(ctx context.Context, orgID string, ruleID string, set model.LabelSet) ([]string, error) {
|
||||
config, err := r.GetNotificationConfig(orgID, ruleID)
|
||||
if err != nil {
|
||||
return nil, errors.NewInternalf(errors.CodeInternal, "error getting notification configuration: %v", err)
|
||||
}
|
||||
var expressionRoutes []*alertmanagertypes.RoutePolicy
|
||||
if config.UsePolicy {
|
||||
expressionRoutes, err = r.routeStore.GetAllByKind(ctx, orgID, alertmanagertypes.PolicyBasedExpression)
|
||||
if err != nil {
|
||||
return []string{}, errors.NewInternalf(errors.CodeInternal, "error getting route policies: %v", err)
|
||||
}
|
||||
} else {
|
||||
expressionRoutes, err = r.routeStore.GetAllByName(ctx, orgID, ruleID)
|
||||
if err != nil {
|
||||
return []string{}, errors.NewInternalf(errors.CodeInternal, "error getting route policies: %v", err)
|
||||
}
|
||||
}
|
||||
var matchedChannels []string
|
||||
if _, ok := set[alertmanagertypes.NoDataLabel]; ok && !config.UsePolicy {
|
||||
for _, expressionRoute := range expressionRoutes {
|
||||
matchedChannels = append(matchedChannels, expressionRoute.Channels...)
|
||||
}
|
||||
return matchedChannels, nil
|
||||
}
|
||||
|
||||
for _, route := range expressionRoutes {
|
||||
evaluateExpr, err := r.evaluateExpr(route.Expression, set)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if evaluateExpr {
|
||||
matchedChannels = append(matchedChannels, route.Channels...)
|
||||
}
|
||||
}
|
||||
|
||||
return matchedChannels, nil
|
||||
}
|
||||
|
||||
func (r *provider) evaluateExpr(expression string, labelSet model.LabelSet) (bool, error) {
|
||||
env := make(map[string]interface{})
|
||||
|
||||
for k, v := range labelSet {
|
||||
key := string(k)
|
||||
value := string(v)
|
||||
|
||||
if strings.Contains(key, ".") {
|
||||
parts := strings.Split(key, ".")
|
||||
current := env
|
||||
|
||||
for i, part := range parts {
|
||||
if i == len(parts)-1 {
|
||||
current[part] = value
|
||||
} else {
|
||||
if current[part] == nil {
|
||||
current[part] = make(map[string]interface{})
|
||||
}
|
||||
current = current[part].(map[string]interface{})
|
||||
}
|
||||
}
|
||||
} else {
|
||||
env[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
program, err := expr.Compile(expression, expr.Env(env))
|
||||
if err != nil {
|
||||
return false, errors.NewInternalf(errors.CodeInternal, "error compiling route policy %s: %v", expression, err)
|
||||
}
|
||||
|
||||
output, err := expr.Run(program, env)
|
||||
if err != nil {
|
||||
return false, errors.NewInternalf(errors.CodeInternal, "error running route policy %s: %v", expression, err)
|
||||
}
|
||||
|
||||
if boolVal, ok := output.(bool); ok {
|
||||
return boolVal, nil
|
||||
}
|
||||
|
||||
return false, errors.NewInternalf(errors.CodeInternal, "error in evaluating route policy %s: %v", expression, err)
|
||||
}
|
||||
|
||||
@ -2,18 +2,22 @@ package rulebasednotification
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/prometheus/common/model"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest"
|
||||
"github.com/SigNoz/signoz/pkg/factory"
|
||||
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
|
||||
"github.com/SigNoz/signoz/pkg/types"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/prometheus/alertmanager/types"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/prometheus/common/model"
|
||||
)
|
||||
|
||||
func createTestProviderSettings() factory.ProviderSettings {
|
||||
@ -21,7 +25,8 @@ func createTestProviderSettings() factory.ProviderSettings {
|
||||
}
|
||||
|
||||
func TestNewFactory(t *testing.T) {
|
||||
providerFactory := NewFactory()
|
||||
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
|
||||
providerFactory := NewFactory(routeStore)
|
||||
assert.NotNil(t, providerFactory)
|
||||
assert.Equal(t, "rulebased", providerFactory.Name().String())
|
||||
}
|
||||
@ -31,7 +36,8 @@ func TestNew(t *testing.T) {
|
||||
providerSettings := createTestProviderSettings()
|
||||
config := nfmanager.Config{}
|
||||
|
||||
provider, err := New(ctx, providerSettings, config)
|
||||
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
|
||||
provider, err := New(ctx, providerSettings, config, routeStore)
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, provider)
|
||||
|
||||
@ -44,7 +50,8 @@ func TestProvider_SetNotificationConfig(t *testing.T) {
|
||||
providerSettings := createTestProviderSettings()
|
||||
config := nfmanager.Config{}
|
||||
|
||||
provider, err := New(ctx, providerSettings, config)
|
||||
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
|
||||
provider, err := New(ctx, providerSettings, config, routeStore)
|
||||
require.NoError(t, err)
|
||||
|
||||
tests := []struct {
|
||||
@ -124,11 +131,12 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
|
||||
providerSettings := createTestProviderSettings()
|
||||
config := nfmanager.Config{}
|
||||
|
||||
provider, err := New(ctx, providerSettings, config)
|
||||
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
|
||||
provider, err := New(ctx, providerSettings, config, routeStore)
|
||||
require.NoError(t, err)
|
||||
|
||||
orgID := "test-org"
|
||||
ruleID := "rule1"
|
||||
ruleID := "ruleId"
|
||||
customConfig := &alertmanagertypes.NotificationConfig{
|
||||
Renotify: alertmanagertypes.ReNotificationConfig{
|
||||
RenotifyInterval: 30 * time.Minute,
|
||||
@ -144,7 +152,6 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
// Set config for alert1
|
||||
err = provider.SetNotificationConfig(orgID, ruleID, customConfig)
|
||||
require.NoError(t, err)
|
||||
|
||||
@ -155,7 +162,7 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
|
||||
name string
|
||||
orgID string
|
||||
ruleID string
|
||||
alert *types.Alert
|
||||
alert *alertmanagertypes.Alert
|
||||
expectedConfig *alertmanagertypes.NotificationConfig
|
||||
shouldFallback bool
|
||||
}{
|
||||
@ -165,7 +172,7 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
|
||||
ruleID: ruleID,
|
||||
expectedConfig: &alertmanagertypes.NotificationConfig{
|
||||
NotificationGroup: map[model.LabelName]struct{}{
|
||||
model.LabelName("ruleId"): {},
|
||||
model.LabelName(ruleID): {},
|
||||
},
|
||||
Renotify: alertmanagertypes.ReNotificationConfig{
|
||||
RenotifyInterval: 30 * time.Minute,
|
||||
@ -182,13 +189,13 @@ func TestProvider_GetNotificationConfig(t *testing.T) {
|
||||
NotificationGroup: map[model.LabelName]struct{}{
|
||||
model.LabelName("group1"): {},
|
||||
model.LabelName("group2"): {},
|
||||
model.LabelName("ruleId"): {},
|
||||
model.LabelName(ruleID): {},
|
||||
},
|
||||
Renotify: alertmanagertypes.ReNotificationConfig{
|
||||
RenotifyInterval: 4 * time.Hour,
|
||||
NoDataInterval: 4 * time.Hour,
|
||||
},
|
||||
}, // Will get fallback from standardnotification
|
||||
},
|
||||
shouldFallback: false,
|
||||
},
|
||||
{
|
||||
@ -231,7 +238,8 @@ func TestProvider_ConcurrentAccess(t *testing.T) {
|
||||
providerSettings := createTestProviderSettings()
|
||||
config := nfmanager.Config{}
|
||||
|
||||
provider, err := New(ctx, providerSettings, config)
|
||||
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
|
||||
provider, err := New(ctx, providerSettings, config, routeStore)
|
||||
require.NoError(t, err)
|
||||
|
||||
orgID := "test-org"
|
||||
@ -268,3 +276,430 @@ func TestProvider_ConcurrentAccess(t *testing.T) {
|
||||
// Wait for both goroutines to complete
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func TestProvider_EvaluateExpression(t *testing.T) {
|
||||
provider := &provider{}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
expression string
|
||||
labelSet model.LabelSet
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "simple equality check - match",
|
||||
expression: `threshold.name == 'auth' && ruleId == 'rule1'`,
|
||||
labelSet: model.LabelSet{
|
||||
"threshold.name": "auth",
|
||||
"ruleId": "rule1",
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "simple equality check - no match",
|
||||
expression: `service == "payment"`,
|
||||
labelSet: model.LabelSet{
|
||||
"service": "auth",
|
||||
"env": "production",
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "multiple conditions with AND - both match",
|
||||
expression: `service == "auth" && env == "production"`,
|
||||
labelSet: model.LabelSet{
|
||||
"service": "auth",
|
||||
"env": "production",
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "multiple conditions with AND - one doesn't match",
|
||||
expression: `service == "auth" && env == "staging"`,
|
||||
labelSet: model.LabelSet{
|
||||
"service": "auth",
|
||||
"env": "production",
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "multiple conditions with OR - one matches",
|
||||
expression: `service == "payment" || env == "production"`,
|
||||
labelSet: model.LabelSet{
|
||||
"service": "auth",
|
||||
"env": "production",
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "multiple conditions with OR - none match",
|
||||
expression: `service == "payment" || env == "staging"`,
|
||||
labelSet: model.LabelSet{
|
||||
"service": "auth",
|
||||
"env": "production",
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "in operator - value in list",
|
||||
expression: `service in ["auth", "payment", "notification"]`,
|
||||
labelSet: model.LabelSet{
|
||||
"service": "auth",
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "in operator - value not in list",
|
||||
expression: `service in ["payment", "notification"]`,
|
||||
labelSet: model.LabelSet{
|
||||
"service": "auth",
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "contains operator - substring match",
|
||||
expression: `host contains "prod"`,
|
||||
labelSet: model.LabelSet{
|
||||
"host": "prod-server-01",
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "contains operator - no substring match",
|
||||
expression: `host contains "staging"`,
|
||||
labelSet: model.LabelSet{
|
||||
"host": "prod-server-01",
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "complex expression with parentheses",
|
||||
expression: `(service == "auth" && env == "production") || critical == "true"`,
|
||||
labelSet: model.LabelSet{
|
||||
"service": "payment",
|
||||
"env": "staging",
|
||||
"critical": "true",
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "missing label key",
|
||||
expression: `"missing_key" == "value"`,
|
||||
labelSet: model.LabelSet{
|
||||
"service": "auth",
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "rule-based expression with threshold name and ruleId",
|
||||
expression: `'threshold.name' == "high-cpu" && ruleId == "rule-123"`,
|
||||
labelSet: model.LabelSet{
|
||||
"threshold.name": "high-cpu",
|
||||
"ruleId": "rule-123",
|
||||
"service": "auth",
|
||||
},
|
||||
expected: false, //no commas
|
||||
},
|
||||
{
|
||||
name: "alertname and ruleId combination",
|
||||
expression: `alertname == "HighCPUUsage" && ruleId == "cpu-alert-001"`,
|
||||
labelSet: model.LabelSet{
|
||||
"alertname": "HighCPUUsage",
|
||||
"ruleId": "cpu-alert-001",
|
||||
"severity": "critical",
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "kubernetes namespace filtering",
|
||||
expression: `k8s.namespace.name == "auth" && service in ["auth", "payment"]`,
|
||||
labelSet: model.LabelSet{
|
||||
"k8s.namespace.name": "auth",
|
||||
"service": "auth",
|
||||
"host": "k8s-node-1",
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "migration expression format from SQL migration",
|
||||
expression: `threshold.name == "HighCPUUsage" && ruleId == "rule-uuid-123"`,
|
||||
labelSet: model.LabelSet{
|
||||
"threshold.name": "HighCPUUsage",
|
||||
"ruleId": "rule-uuid-123",
|
||||
"severity": "warning",
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "case sensitive matching",
|
||||
expression: `service == "Auth"`, // capital A
|
||||
labelSet: model.LabelSet{
|
||||
"service": "auth", // lowercase a
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "numeric comparison as strings",
|
||||
expression: `port == "8080"`,
|
||||
labelSet: model.LabelSet{
|
||||
"port": "8080",
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "quoted string with special characters",
|
||||
expression: `service == "auth-service-v2"`,
|
||||
labelSet: model.LabelSet{
|
||||
"service": "auth-service-v2",
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "boolean operators precedence",
|
||||
expression: `service == "auth" && env == "prod" || critical == "true"`,
|
||||
labelSet: model.LabelSet{
|
||||
"service": "payment",
|
||||
"env": "staging",
|
||||
"critical": "true",
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := provider.evaluateExpr(tt.expression, tt.labelSet)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tt.expected, result, "Expression: %s", tt.expression)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestProvider_DeleteRoute(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
providerSettings := createTestProviderSettings()
|
||||
config := nfmanager.Config{}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
orgID string
|
||||
routeID string
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "valid parameters",
|
||||
orgID: "test-org-123",
|
||||
routeID: "route-uuid-456",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "empty routeID",
|
||||
orgID: "test-org-123",
|
||||
routeID: "",
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "valid orgID with valid routeID",
|
||||
orgID: "another-org",
|
||||
routeID: "another-route-id",
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
|
||||
provider, err := New(ctx, providerSettings, config, routeStore)
|
||||
require.NoError(t, err)
|
||||
|
||||
if !tt.wantErr {
|
||||
routeStore.ExpectDelete(tt.orgID, tt.routeID)
|
||||
}
|
||||
|
||||
err = provider.DeleteRoutePolicy(ctx, tt.orgID, tt.routeID)
|
||||
|
||||
if tt.wantErr {
|
||||
assert.Error(t, err)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
assert.NoError(t, routeStore.ExpectationsWereMet())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestProvider_CreateRoute(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
providerSettings := createTestProviderSettings()
|
||||
config := nfmanager.Config{}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
orgID string
|
||||
route *alertmanagertypes.RoutePolicy
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "valid route",
|
||||
orgID: "test-org-123",
|
||||
route: &alertmanagertypes.RoutePolicy{
|
||||
Identifiable: types.Identifiable{ID: valuer.GenerateUUID()},
|
||||
Expression: `service == "auth"`,
|
||||
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
|
||||
Name: "auth-service-route",
|
||||
Description: "Route for auth service alerts",
|
||||
Enabled: true,
|
||||
OrgID: "test-org-123",
|
||||
Channels: []string{"slack-channel"},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "nil route",
|
||||
orgID: "test-org-123",
|
||||
route: nil,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "invalid route - missing expression",
|
||||
orgID: "test-org-123",
|
||||
route: &alertmanagertypes.RoutePolicy{
|
||||
Expression: "", // empty expression
|
||||
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
|
||||
Name: "invalid-route",
|
||||
OrgID: "test-org-123",
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "invalid route - missing name",
|
||||
orgID: "test-org-123",
|
||||
route: &alertmanagertypes.RoutePolicy{
|
||||
Expression: `service == "auth"`,
|
||||
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
|
||||
Name: "", // empty name
|
||||
OrgID: "test-org-123",
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
|
||||
provider, err := New(ctx, providerSettings, config, routeStore)
|
||||
require.NoError(t, err)
|
||||
|
||||
if !tt.wantErr && tt.route != nil {
|
||||
routeStore.ExpectCreate(tt.route)
|
||||
}
|
||||
|
||||
err = provider.CreateRoutePolicy(ctx, tt.orgID, tt.route)
|
||||
|
||||
if tt.wantErr {
|
||||
assert.Error(t, err)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
assert.NoError(t, routeStore.ExpectationsWereMet())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestProvider_CreateRoutes(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
providerSettings := createTestProviderSettings()
|
||||
config := nfmanager.Config{}
|
||||
|
||||
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
|
||||
provider, err := New(ctx, providerSettings, config, routeStore)
|
||||
require.NoError(t, err)
|
||||
|
||||
validRoute1 := &alertmanagertypes.RoutePolicy{
|
||||
Expression: `service == "auth"`,
|
||||
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
|
||||
Name: "auth-route",
|
||||
Description: "Auth service route",
|
||||
Enabled: true,
|
||||
OrgID: "test-org",
|
||||
Channels: []string{"slack-auth"},
|
||||
}
|
||||
|
||||
validRoute2 := &alertmanagertypes.RoutePolicy{
|
||||
Expression: `service == "payment"`,
|
||||
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
|
||||
Name: "payment-route",
|
||||
Description: "Payment service route",
|
||||
Enabled: true,
|
||||
OrgID: "test-org",
|
||||
Channels: []string{"slack-payment"},
|
||||
}
|
||||
|
||||
invalidRoute := &alertmanagertypes.RoutePolicy{
|
||||
Expression: "", // empty expression - invalid
|
||||
ExpressionKind: alertmanagertypes.PolicyBasedExpression,
|
||||
Name: "invalid-route",
|
||||
OrgID: "test-org",
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
orgID string
|
||||
routes []*alertmanagertypes.RoutePolicy
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "valid routes",
|
||||
orgID: "test-org",
|
||||
routes: []*alertmanagertypes.RoutePolicy{validRoute1, validRoute2},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "empty routes list",
|
||||
orgID: "test-org",
|
||||
routes: []*alertmanagertypes.RoutePolicy{},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "nil routes list",
|
||||
orgID: "test-org",
|
||||
routes: nil,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "routes with nil route",
|
||||
orgID: "test-org",
|
||||
routes: []*alertmanagertypes.RoutePolicy{validRoute1, nil},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "routes with invalid route",
|
||||
orgID: "test-org",
|
||||
routes: []*alertmanagertypes.RoutePolicy{validRoute1, invalidRoute},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "single valid route",
|
||||
orgID: "test-org",
|
||||
routes: []*alertmanagertypes.RoutePolicy{validRoute1},
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if !tt.wantErr && len(tt.routes) > 0 {
|
||||
routeStore.ExpectCreateBatch(tt.routes)
|
||||
}
|
||||
|
||||
err := provider.CreateRoutePolicies(ctx, tt.orgID, tt.routes)
|
||||
|
||||
if tt.wantErr {
|
||||
assert.Error(t, err)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
assert.NoError(t, routeStore.ExpectationsWereMet())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -4,6 +4,9 @@ import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
"github.com/prometheus/alertmanager/featurecontrol"
|
||||
"github.com/prometheus/alertmanager/matcher/compat"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/alertmanagerserver"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
@ -61,6 +64,7 @@ func New(
|
||||
}
|
||||
|
||||
func (service *Service) SyncServers(ctx context.Context) error {
|
||||
compat.InitFromFlags(service.settings.Logger(), featurecontrol.NoopFlags{})
|
||||
orgs, err := service.orgGetter.ListByOwnedKeyRange(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -142,7 +146,7 @@ func (service *Service) TestReceiver(ctx context.Context, orgID string, receiver
|
||||
return server.TestReceiver(ctx, receiver)
|
||||
}
|
||||
|
||||
func (service *Service) TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error {
|
||||
func (service *Service) TestAlert(ctx context.Context, orgID string, receiversMap map[*alertmanagertypes.PostableAlert][]string, config *alertmanagertypes.NotificationConfig) error {
|
||||
service.serversMtx.RLock()
|
||||
defer service.serversMtx.RUnlock()
|
||||
|
||||
@ -151,7 +155,7 @@ func (service *Service) TestAlert(ctx context.Context, orgID string, alert *aler
|
||||
return err
|
||||
}
|
||||
|
||||
return server.TestAlert(ctx, alert, receivers)
|
||||
return server.TestAlert(ctx, receiversMap, config)
|
||||
}
|
||||
|
||||
func (service *Service) Stop(ctx context.Context) error {
|
||||
|
||||
@ -2,8 +2,12 @@ package signozalertmanager
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"github.com/prometheus/common/model"
|
||||
"time"
|
||||
|
||||
amConfig "github.com/prometheus/alertmanager/config"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/alertmanagerstore/sqlalertmanagerstore"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
|
||||
@ -11,7 +15,9 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/factory"
|
||||
"github.com/SigNoz/signoz/pkg/modules/organization"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
"github.com/SigNoz/signoz/pkg/types"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/authtypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
@ -94,8 +100,29 @@ func (provider *provider) TestReceiver(ctx context.Context, orgID string, receiv
|
||||
return provider.service.TestReceiver(ctx, orgID, receiver)
|
||||
}
|
||||
|
||||
func (provider *provider) TestAlert(ctx context.Context, orgID string, alert *alertmanagertypes.PostableAlert, receivers []string) error {
|
||||
return provider.service.TestAlert(ctx, orgID, alert, receivers)
|
||||
func (provider *provider) TestAlert(ctx context.Context, orgID string, ruleID string, receiversMap map[*alertmanagertypes.PostableAlert][]string) error {
|
||||
config, err := provider.notificationManager.GetNotificationConfig(orgID, ruleID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if config.UsePolicy {
|
||||
for alert := range receiversMap {
|
||||
set := make(model.LabelSet)
|
||||
for k, v := range alert.Labels {
|
||||
set[model.LabelName(k)] = model.LabelValue(v)
|
||||
}
|
||||
match, err := provider.notificationManager.Match(ctx, orgID, alert.Labels[labels.AlertRuleIdLabel], set)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(match) == 0 {
|
||||
delete(receiversMap, alert)
|
||||
} else {
|
||||
receiversMap[alert] = match
|
||||
}
|
||||
}
|
||||
}
|
||||
return provider.service.TestAlert(ctx, orgID, receiversMap, config)
|
||||
}
|
||||
|
||||
func (provider *provider) ListChannels(ctx context.Context, orgID string) ([]*alertmanagertypes.Channel, error) {
|
||||
@ -211,3 +238,316 @@ func (provider *provider) DeleteNotificationConfig(ctx context.Context, orgID va
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (provider *provider) CreateRoutePolicy(ctx context.Context, routeRequest *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error) {
|
||||
claims, err := authtypes.ClaimsFromContext(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
orgID, err := valuer.NewUUID(claims.OrgID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := routeRequest.Validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
route := alertmanagertypes.RoutePolicy{
|
||||
Expression: routeRequest.Expression,
|
||||
ExpressionKind: routeRequest.ExpressionKind,
|
||||
Name: routeRequest.Name,
|
||||
Description: routeRequest.Description,
|
||||
Enabled: true,
|
||||
Tags: routeRequest.Tags,
|
||||
Channels: routeRequest.Channels,
|
||||
OrgID: claims.OrgID,
|
||||
Identifiable: types.Identifiable{
|
||||
ID: valuer.GenerateUUID(),
|
||||
},
|
||||
UserAuditable: types.UserAuditable{
|
||||
CreatedBy: claims.Email,
|
||||
UpdatedBy: claims.Email,
|
||||
},
|
||||
TimeAuditable: types.TimeAuditable{
|
||||
CreatedAt: time.Now(),
|
||||
UpdatedAt: time.Now(),
|
||||
},
|
||||
}
|
||||
|
||||
err = provider.notificationManager.CreateRoutePolicy(ctx, orgID.String(), &route)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &alertmanagertypes.GettableRoutePolicy{
|
||||
PostableRoutePolicy: *routeRequest,
|
||||
ID: route.ID.StringValue(),
|
||||
CreatedAt: &route.CreatedAt,
|
||||
UpdatedAt: &route.UpdatedAt,
|
||||
CreatedBy: &route.CreatedBy,
|
||||
UpdatedBy: &route.UpdatedBy,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (provider *provider) CreateRoutePolicies(ctx context.Context, routeRequests []*alertmanagertypes.PostableRoutePolicy) ([]*alertmanagertypes.GettableRoutePolicy, error) {
|
||||
claims, err := authtypes.ClaimsFromContext(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
orgID, err := valuer.NewUUID(claims.OrgID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(routeRequests) == 0 {
|
||||
return []*alertmanagertypes.GettableRoutePolicy{}, nil
|
||||
}
|
||||
|
||||
routes := make([]*alertmanagertypes.RoutePolicy, 0, len(routeRequests))
|
||||
results := make([]*alertmanagertypes.GettableRoutePolicy, 0, len(routeRequests))
|
||||
|
||||
for _, routeRequest := range routeRequests {
|
||||
if err := routeRequest.Validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
route := &alertmanagertypes.RoutePolicy{
|
||||
Expression: routeRequest.Expression,
|
||||
ExpressionKind: routeRequest.ExpressionKind,
|
||||
Name: routeRequest.Name,
|
||||
Description: routeRequest.Description,
|
||||
Enabled: true,
|
||||
Tags: routeRequest.Tags,
|
||||
Channels: routeRequest.Channels,
|
||||
OrgID: claims.OrgID,
|
||||
Identifiable: types.Identifiable{
|
||||
ID: valuer.GenerateUUID(),
|
||||
},
|
||||
UserAuditable: types.UserAuditable{
|
||||
CreatedBy: claims.Email,
|
||||
UpdatedBy: claims.Email,
|
||||
},
|
||||
TimeAuditable: types.TimeAuditable{
|
||||
CreatedAt: time.Now(),
|
||||
UpdatedAt: time.Now(),
|
||||
},
|
||||
}
|
||||
|
||||
routes = append(routes, route)
|
||||
results = append(results, &alertmanagertypes.GettableRoutePolicy{
|
||||
PostableRoutePolicy: *routeRequest,
|
||||
ID: route.ID.StringValue(),
|
||||
CreatedAt: &route.CreatedAt,
|
||||
UpdatedAt: &route.UpdatedAt,
|
||||
CreatedBy: &route.CreatedBy,
|
||||
UpdatedBy: &route.UpdatedBy,
|
||||
})
|
||||
}
|
||||
|
||||
err = provider.notificationManager.CreateRoutePolicies(ctx, orgID.String(), routes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (provider *provider) GetRoutePolicyByID(ctx context.Context, routeID string) (*alertmanagertypes.GettableRoutePolicy, error) {
|
||||
claims, err := authtypes.ClaimsFromContext(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
orgID, err := valuer.NewUUID(claims.OrgID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
route, err := provider.notificationManager.GetRoutePolicyByID(ctx, orgID.String(), routeID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &alertmanagertypes.GettableRoutePolicy{
|
||||
PostableRoutePolicy: alertmanagertypes.PostableRoutePolicy{
|
||||
Expression: route.Expression,
|
||||
ExpressionKind: route.ExpressionKind,
|
||||
Channels: route.Channels,
|
||||
Name: route.Name,
|
||||
Description: route.Description,
|
||||
Tags: route.Tags,
|
||||
},
|
||||
ID: route.ID.StringValue(),
|
||||
CreatedAt: &route.CreatedAt,
|
||||
UpdatedAt: &route.UpdatedAt,
|
||||
CreatedBy: &route.CreatedBy,
|
||||
UpdatedBy: &route.UpdatedBy,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (provider *provider) GetAllRoutePolicies(ctx context.Context) ([]*alertmanagertypes.GettableRoutePolicy, error) {
|
||||
claims, err := authtypes.ClaimsFromContext(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
orgID, err := valuer.NewUUID(claims.OrgID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
routes, err := provider.notificationManager.GetAllRoutePolicies(ctx, orgID.String())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
results := make([]*alertmanagertypes.GettableRoutePolicy, 0, len(routes))
|
||||
for _, route := range routes {
|
||||
results = append(results, &alertmanagertypes.GettableRoutePolicy{
|
||||
PostableRoutePolicy: alertmanagertypes.PostableRoutePolicy{
|
||||
Expression: route.Expression,
|
||||
ExpressionKind: route.ExpressionKind,
|
||||
Channels: route.Channels,
|
||||
Name: route.Name,
|
||||
Description: route.Description,
|
||||
Tags: route.Tags,
|
||||
},
|
||||
ID: route.ID.StringValue(),
|
||||
CreatedAt: &route.CreatedAt,
|
||||
UpdatedAt: &route.UpdatedAt,
|
||||
CreatedBy: &route.CreatedBy,
|
||||
UpdatedBy: &route.UpdatedBy,
|
||||
})
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (provider *provider) UpdateRoutePolicyByID(ctx context.Context, routeID string, route *alertmanagertypes.PostableRoutePolicy) (*alertmanagertypes.GettableRoutePolicy, error) {
|
||||
claims, err := authtypes.ClaimsFromContext(ctx)
|
||||
if err != nil {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeUnauthenticated, "invalid claims: %v", err)
|
||||
}
|
||||
orgID, err := valuer.NewUUID(claims.OrgID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if routeID == "" {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
|
||||
}
|
||||
|
||||
if route == nil {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "route cannot be nil")
|
||||
}
|
||||
|
||||
if err := route.Validate(); err != nil {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid route: %v", err)
|
||||
}
|
||||
|
||||
existingRoute, err := provider.notificationManager.GetRoutePolicyByID(ctx, claims.OrgID, routeID)
|
||||
if err != nil {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeNotFound, "route not found: %v", err)
|
||||
}
|
||||
|
||||
updatedRoute := &alertmanagertypes.RoutePolicy{
|
||||
Expression: route.Expression,
|
||||
ExpressionKind: route.ExpressionKind,
|
||||
Name: route.Name,
|
||||
Description: route.Description,
|
||||
Tags: route.Tags,
|
||||
Channels: route.Channels,
|
||||
OrgID: claims.OrgID,
|
||||
Identifiable: existingRoute.Identifiable,
|
||||
UserAuditable: types.UserAuditable{
|
||||
CreatedBy: existingRoute.CreatedBy,
|
||||
UpdatedBy: claims.Email,
|
||||
},
|
||||
TimeAuditable: types.TimeAuditable{
|
||||
CreatedAt: existingRoute.CreatedAt,
|
||||
UpdatedAt: time.Now(),
|
||||
},
|
||||
}
|
||||
|
||||
err = provider.notificationManager.DeleteRoutePolicy(ctx, orgID.String(), routeID)
|
||||
if err != nil {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInternal, "error deleting existing route: %v", err)
|
||||
}
|
||||
|
||||
err = provider.notificationManager.CreateRoutePolicy(ctx, orgID.String(), updatedRoute)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &alertmanagertypes.GettableRoutePolicy{
|
||||
PostableRoutePolicy: *route,
|
||||
ID: updatedRoute.ID.StringValue(),
|
||||
CreatedAt: &updatedRoute.CreatedAt,
|
||||
UpdatedAt: &updatedRoute.UpdatedAt,
|
||||
CreatedBy: &updatedRoute.CreatedBy,
|
||||
UpdatedBy: &updatedRoute.UpdatedBy,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (provider *provider) DeleteRoutePolicyByID(ctx context.Context, routeID string) error {
|
||||
claims, err := authtypes.ClaimsFromContext(ctx)
|
||||
if err != nil {
|
||||
return errors.NewInvalidInputf(errors.CodeUnauthenticated, "invalid claims: %v", err)
|
||||
}
|
||||
orgID, err := valuer.NewUUID(claims.OrgID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if routeID == "" {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "routeID cannot be empty")
|
||||
}
|
||||
|
||||
return provider.notificationManager.DeleteRoutePolicy(ctx, orgID.String(), routeID)
|
||||
}
|
||||
|
||||
func (provider *provider) CreateInhibitRules(ctx context.Context, orgID valuer.UUID, rules []amConfig.InhibitRule) error {
|
||||
config, err := provider.configStore.Get(ctx, orgID.String())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := config.AddInhibitRules(rules); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return provider.configStore.Set(ctx, config)
|
||||
}
|
||||
|
||||
func (provider *provider) DeleteAllRoutePoliciesByRuleId(ctx context.Context, names string) error {
|
||||
claims, err := authtypes.ClaimsFromContext(ctx)
|
||||
if err != nil {
|
||||
return errors.NewInvalidInputf(errors.CodeUnauthenticated, "invalid claims: %v", err)
|
||||
}
|
||||
orgID, err := valuer.NewUUID(claims.OrgID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return provider.notificationManager.DeleteAllRoutePoliciesByName(ctx, orgID.String(), names)
|
||||
}
|
||||
|
||||
func (provider *provider) UpdateAllRoutePoliciesByRuleId(ctx context.Context, names string, routes []*alertmanagertypes.PostableRoutePolicy) error {
|
||||
err := provider.DeleteAllRoutePoliciesByRuleId(ctx, names)
|
||||
if err != nil {
|
||||
return errors.NewInvalidInputf(errors.CodeInternal, "error deleting the routes: %v", err)
|
||||
}
|
||||
_, err = provider.CreateRoutePolicies(ctx, routes)
|
||||
return err
|
||||
}
|
||||
|
||||
func (provider *provider) DeleteAllInhibitRulesByRuleId(ctx context.Context, orgID valuer.UUID, ruleId string) error {
|
||||
config, err := provider.configStore.Get(ctx, orgID.String())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := config.DeleteRuleIDInhibitor(ruleId); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return provider.configStore.Set(ctx, config)
|
||||
}
|
||||
|
||||
@ -10,7 +10,6 @@ import (
|
||||
"fmt"
|
||||
"github.com/SigNoz/signoz/pkg/modules/thirdpartyapi"
|
||||
|
||||
//qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"io"
|
||||
"math"
|
||||
"net/http"
|
||||
@ -492,6 +491,12 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router, am *middleware.AuthZ) {
|
||||
router.HandleFunc("/api/v1/channels", am.EditAccess(aH.AlertmanagerAPI.CreateChannel)).Methods(http.MethodPost)
|
||||
router.HandleFunc("/api/v1/testChannel", am.EditAccess(aH.AlertmanagerAPI.TestReceiver)).Methods(http.MethodPost)
|
||||
|
||||
router.HandleFunc("/api/v1/route_policies", am.ViewAccess(aH.AlertmanagerAPI.GetAllRoutePolicies)).Methods(http.MethodGet)
|
||||
router.HandleFunc("/api/v1/route_policies/{id}", am.ViewAccess(aH.AlertmanagerAPI.GetRoutePolicyByID)).Methods(http.MethodGet)
|
||||
router.HandleFunc("/api/v1/route_policies", am.AdminAccess(aH.AlertmanagerAPI.CreateRoutePolicy)).Methods(http.MethodPost)
|
||||
router.HandleFunc("/api/v1/route_policies/{id}", am.AdminAccess(aH.AlertmanagerAPI.DeleteRoutePolicyByID)).Methods(http.MethodDelete)
|
||||
router.HandleFunc("/api/v1/route_policies/{id}", am.AdminAccess(aH.AlertmanagerAPI.UpdateRoutePolicy)).Methods(http.MethodPut)
|
||||
|
||||
router.HandleFunc("/api/v1/alerts", am.ViewAccess(aH.AlertmanagerAPI.GetAlerts)).Methods(http.MethodGet)
|
||||
|
||||
router.HandleFunc("/api/v1/rules", am.ViewAccess(aH.listRules)).Methods(http.MethodGet)
|
||||
@ -616,6 +621,7 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router, am *middleware.AuthZ) {
|
||||
|
||||
// Export
|
||||
router.HandleFunc("/api/v1/export_raw_data", am.ViewAccess(aH.Signoz.Handlers.RawDataExport.ExportRawData)).Methods(http.MethodGet)
|
||||
|
||||
}
|
||||
|
||||
func (ah *APIHandler) MetricExplorerRoutes(router *mux.Router, am *middleware.AuthZ) {
|
||||
|
||||
@ -4,13 +4,11 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"math"
|
||||
"net/url"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/converter"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/model"
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
@ -167,22 +165,6 @@ func NewBaseRule(id string, orgID valuer.UUID, p *ruletypes.PostableRule, reader
|
||||
return baseRule, nil
|
||||
}
|
||||
|
||||
func (r *BaseRule) targetVal() float64 {
|
||||
if r.ruleCondition == nil || r.ruleCondition.Target == nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
// get the converter for the target unit
|
||||
unitConverter := converter.FromUnit(converter.Unit(r.ruleCondition.TargetUnit))
|
||||
// convert the target value to the y-axis unit
|
||||
value := unitConverter.Convert(converter.Value{
|
||||
F: *r.ruleCondition.Target,
|
||||
U: converter.Unit(r.ruleCondition.TargetUnit),
|
||||
}, converter.Unit(r.Unit()))
|
||||
|
||||
return value.F
|
||||
}
|
||||
|
||||
func (r *BaseRule) matchType() ruletypes.MatchType {
|
||||
if r.ruleCondition == nil {
|
||||
return ruletypes.AtleastOnce
|
||||
@ -221,10 +203,6 @@ func (r *BaseRule) HoldDuration() time.Duration {
|
||||
return r.holdDuration
|
||||
}
|
||||
|
||||
func (r *BaseRule) TargetVal() float64 {
|
||||
return r.targetVal()
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) hostFromSource() string {
|
||||
parsedUrl, err := url.Parse(r.source)
|
||||
if err != nil {
|
||||
@ -380,232 +358,6 @@ func (r *BaseRule) ForEachActiveAlert(f func(*ruletypes.Alert)) {
|
||||
}
|
||||
}
|
||||
|
||||
func (r *BaseRule) ShouldAlert(series v3.Series) (ruletypes.Sample, bool) {
|
||||
var alertSmpl ruletypes.Sample
|
||||
var shouldAlert bool
|
||||
var lbls qslabels.Labels
|
||||
|
||||
for name, value := range series.Labels {
|
||||
lbls = append(lbls, qslabels.Label{Name: name, Value: value})
|
||||
}
|
||||
|
||||
series.Points = removeGroupinSetPoints(series)
|
||||
|
||||
// nothing to evaluate
|
||||
if len(series.Points) == 0 {
|
||||
return alertSmpl, false
|
||||
}
|
||||
|
||||
if r.ruleCondition.RequireMinPoints {
|
||||
if len(series.Points) < r.ruleCondition.RequiredNumPoints {
|
||||
zap.L().Info("not enough data points to evaluate series, skipping", zap.String("ruleid", r.ID()), zap.Int("numPoints", len(series.Points)), zap.Int("requiredPoints", r.ruleCondition.RequiredNumPoints))
|
||||
return alertSmpl, false
|
||||
}
|
||||
}
|
||||
|
||||
switch r.matchType() {
|
||||
case ruletypes.AtleastOnce:
|
||||
// If any sample matches the condition, the rule is firing.
|
||||
if r.compareOp() == ruletypes.ValueIsAbove {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value > r.targetVal() {
|
||||
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsBelow {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value < r.targetVal() {
|
||||
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsEq {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value == r.targetVal() {
|
||||
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsNotEq {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value != r.targetVal() {
|
||||
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueOutsideBounds {
|
||||
for _, smpl := range series.Points {
|
||||
if math.Abs(smpl.Value) >= r.targetVal() {
|
||||
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
case ruletypes.AllTheTimes:
|
||||
// If all samples match the condition, the rule is firing.
|
||||
shouldAlert = true
|
||||
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: r.targetVal()}, Metric: lbls}
|
||||
if r.compareOp() == ruletypes.ValueIsAbove {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value <= r.targetVal() {
|
||||
shouldAlert = false
|
||||
break
|
||||
}
|
||||
}
|
||||
// use min value from the series
|
||||
if shouldAlert {
|
||||
var minValue float64 = math.Inf(1)
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value < minValue {
|
||||
minValue = smpl.Value
|
||||
}
|
||||
}
|
||||
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: minValue}, Metric: lbls}
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsBelow {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value >= r.targetVal() {
|
||||
shouldAlert = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if shouldAlert {
|
||||
var maxValue float64 = math.Inf(-1)
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value > maxValue {
|
||||
maxValue = smpl.Value
|
||||
}
|
||||
}
|
||||
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: maxValue}, Metric: lbls}
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsEq {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value != r.targetVal() {
|
||||
shouldAlert = false
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsNotEq {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value == r.targetVal() {
|
||||
shouldAlert = false
|
||||
break
|
||||
}
|
||||
}
|
||||
// use any non-inf or nan value from the series
|
||||
if shouldAlert {
|
||||
for _, smpl := range series.Points {
|
||||
if !math.IsInf(smpl.Value, 0) && !math.IsNaN(smpl.Value) {
|
||||
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueOutsideBounds {
|
||||
for _, smpl := range series.Points {
|
||||
if math.Abs(smpl.Value) < r.targetVal() {
|
||||
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = false
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
case ruletypes.OnAverage:
|
||||
// If the average of all samples matches the condition, the rule is firing.
|
||||
var sum, count float64
|
||||
for _, smpl := range series.Points {
|
||||
if math.IsNaN(smpl.Value) || math.IsInf(smpl.Value, 0) {
|
||||
continue
|
||||
}
|
||||
sum += smpl.Value
|
||||
count++
|
||||
}
|
||||
avg := sum / count
|
||||
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: avg}, Metric: lbls}
|
||||
if r.compareOp() == ruletypes.ValueIsAbove {
|
||||
if avg > r.targetVal() {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsBelow {
|
||||
if avg < r.targetVal() {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsEq {
|
||||
if avg == r.targetVal() {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsNotEq {
|
||||
if avg != r.targetVal() {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueOutsideBounds {
|
||||
if math.Abs(avg) >= r.targetVal() {
|
||||
shouldAlert = true
|
||||
}
|
||||
}
|
||||
case ruletypes.InTotal:
|
||||
// If the sum of all samples matches the condition, the rule is firing.
|
||||
var sum float64
|
||||
|
||||
for _, smpl := range series.Points {
|
||||
if math.IsNaN(smpl.Value) || math.IsInf(smpl.Value, 0) {
|
||||
continue
|
||||
}
|
||||
sum += smpl.Value
|
||||
}
|
||||
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: sum}, Metric: lbls}
|
||||
if r.compareOp() == ruletypes.ValueIsAbove {
|
||||
if sum > r.targetVal() {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsBelow {
|
||||
if sum < r.targetVal() {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsEq {
|
||||
if sum == r.targetVal() {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsNotEq {
|
||||
if sum != r.targetVal() {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueOutsideBounds {
|
||||
if math.Abs(sum) >= r.targetVal() {
|
||||
shouldAlert = true
|
||||
}
|
||||
}
|
||||
case ruletypes.Last:
|
||||
// If the last sample matches the condition, the rule is firing.
|
||||
shouldAlert = false
|
||||
alertSmpl = ruletypes.Sample{Point: ruletypes.Point{V: series.Points[len(series.Points)-1].Value}, Metric: lbls}
|
||||
if r.compareOp() == ruletypes.ValueIsAbove {
|
||||
if series.Points[len(series.Points)-1].Value > r.targetVal() {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsBelow {
|
||||
if series.Points[len(series.Points)-1].Value < r.targetVal() {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsEq {
|
||||
if series.Points[len(series.Points)-1].Value == r.targetVal() {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if r.compareOp() == ruletypes.ValueIsNotEq {
|
||||
if series.Points[len(series.Points)-1].Value != r.targetVal() {
|
||||
shouldAlert = true
|
||||
}
|
||||
}
|
||||
}
|
||||
return alertSmpl, shouldAlert
|
||||
}
|
||||
|
||||
func (r *BaseRule) RecordRuleStateHistory(ctx context.Context, prevState, currentState model.AlertState, itemsToAdd []model.RuleStateHistory) error {
|
||||
zap.L().Debug("recording rule state history", zap.String("ruleid", r.ID()), zap.Any("prevState", prevState), zap.Any("currentState", currentState), zap.Any("itemsToAdd", itemsToAdd))
|
||||
revisedItemsToAdd := map[uint64]model.RuleStateHistory{}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package rules
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/require"
|
||||
"testing"
|
||||
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
@ -22,6 +23,15 @@ func TestBaseRule_RequireMinPoints(t *testing.T) {
|
||||
RequireMinPoints: true,
|
||||
RequiredNumPoints: 4,
|
||||
},
|
||||
|
||||
Threshold: ruletypes.BasicRuleThresholds{
|
||||
{
|
||||
Name: "test-threshold",
|
||||
TargetValue: &threshold,
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
},
|
||||
},
|
||||
},
|
||||
series: &v3.Series{
|
||||
Points: []v3.Point{
|
||||
@ -41,6 +51,14 @@ func TestBaseRule_RequireMinPoints(t *testing.T) {
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
Target: &threshold,
|
||||
},
|
||||
Threshold: ruletypes.BasicRuleThresholds{
|
||||
{
|
||||
Name: "test-threshold",
|
||||
TargetValue: &threshold,
|
||||
CompareOp: ruletypes.ValueIsAbove,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
},
|
||||
},
|
||||
},
|
||||
series: &v3.Series{
|
||||
Points: []v3.Point{
|
||||
@ -56,10 +74,9 @@ func TestBaseRule_RequireMinPoints(t *testing.T) {
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
_, shouldAlert := test.rule.ShouldAlert(*test.series)
|
||||
if shouldAlert != test.shouldAlert {
|
||||
t.Errorf("expected shouldAlert to be %v, got %v", test.shouldAlert, shouldAlert)
|
||||
}
|
||||
_, err := test.rule.Threshold.ShouldAlert(*test.series, "")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, len(test.series.Points) >= test.rule.ruleCondition.RequiredNumPoints, test.shouldAlert)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
|
||||
"log/slog"
|
||||
"sort"
|
||||
"strings"
|
||||
@ -350,39 +351,35 @@ func (m *Manager) EditRule(ctx context.Context, ruleStr string, id valuer.UUID)
|
||||
existingRule.Data = ruleStr
|
||||
|
||||
return m.ruleStore.EditRule(ctx, existingRule, func(ctx context.Context) error {
|
||||
cfg, err := m.alertmanager.GetConfig(ctx, claims.OrgID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var preferredChannels []string
|
||||
if len(parsedRule.PreferredChannels) == 0 {
|
||||
channels, err := m.alertmanager.ListChannels(ctx, claims.OrgID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, channel := range channels {
|
||||
preferredChannels = append(preferredChannels, channel.Name)
|
||||
}
|
||||
} else {
|
||||
preferredChannels = parsedRule.PreferredChannels
|
||||
}
|
||||
err = cfg.UpdateRuleIDMatcher(id.StringValue(), preferredChannels)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if parsedRule.NotificationSettings != nil {
|
||||
config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig()
|
||||
err = m.alertmanager.SetNotificationConfig(ctx, orgID, existingRule.ID.StringValue(), &config)
|
||||
err = m.alertmanager.SetNotificationConfig(ctx, orgID, id.StringValue(), &config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if !parsedRule.NotificationSettings.UsePolicy {
|
||||
request, err := parsedRule.GetRuleRouteRequest(id.StringValue())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = m.alertmanager.UpdateAllRoutePoliciesByRuleId(ctx, id.StringValue(), request)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = m.alertmanager.DeleteAllInhibitRulesByRuleId(ctx, orgID, id.StringValue())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = m.alertmanager.SetConfig(ctx, cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
inhibitRules, err := parsedRule.GetInhibitRules(id.StringValue())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = m.alertmanager.CreateInhibitRules(ctx, orgID, inhibitRules)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
err = m.syncRuleStateWithTask(ctx, orgID, prepareTaskName(existingRule.ID.StringValue()), &parsedRule)
|
||||
if err != nil {
|
||||
@ -488,6 +485,19 @@ func (m *Manager) DeleteRule(ctx context.Context, idStr string) error {
|
||||
}
|
||||
|
||||
err = m.alertmanager.DeleteNotificationConfig(ctx, orgID, id.String())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = m.alertmanager.DeleteAllRoutePoliciesByRuleId(ctx, id.String())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = m.alertmanager.DeleteAllInhibitRulesByRuleId(ctx, orgID, id.String())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
taskName := prepareTaskName(id.StringValue())
|
||||
m.deleteTask(taskName)
|
||||
@ -548,41 +558,30 @@ func (m *Manager) CreateRule(ctx context.Context, ruleStr string) (*ruletypes.Ge
|
||||
}
|
||||
|
||||
id, err := m.ruleStore.CreateRule(ctx, storedRule, func(ctx context.Context, id valuer.UUID) error {
|
||||
cfg, err := m.alertmanager.GetConfig(ctx, claims.OrgID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var preferredChannels []string
|
||||
if len(parsedRule.PreferredChannels) == 0 {
|
||||
channels, err := m.alertmanager.ListChannels(ctx, claims.OrgID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, channel := range channels {
|
||||
preferredChannels = append(preferredChannels, channel.Name)
|
||||
}
|
||||
} else {
|
||||
preferredChannels = parsedRule.PreferredChannels
|
||||
}
|
||||
|
||||
if parsedRule.NotificationSettings != nil {
|
||||
config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig()
|
||||
err = m.alertmanager.SetNotificationConfig(ctx, orgID, storedRule.ID.StringValue(), &config)
|
||||
err = m.alertmanager.SetNotificationConfig(ctx, orgID, id.StringValue(), &config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
err = cfg.CreateRuleIDMatcher(id.StringValue(), preferredChannels)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = m.alertmanager.SetConfig(ctx, cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
if !parsedRule.NotificationSettings.UsePolicy {
|
||||
request, err := parsedRule.GetRuleRouteRequest(id.StringValue())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = m.alertmanager.CreateRoutePolicies(ctx, request)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
inhibitRules, err := parsedRule.GetInhibitRules(id.StringValue())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = m.alertmanager.CreateInhibitRules(ctx, orgID, inhibitRules)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
taskName := prepareTaskName(id.StringValue())
|
||||
@ -756,36 +755,30 @@ func (m *Manager) prepareTestNotifyFunc() NotifyFunc {
|
||||
if len(alerts) == 0 {
|
||||
return
|
||||
}
|
||||
ruleID := alerts[0].Labels.Map()[labels.AlertRuleIdLabel]
|
||||
receiverMap := make(map[*alertmanagertypes.PostableAlert][]string)
|
||||
for _, alert := range alerts {
|
||||
generatorURL := alert.GeneratorURL
|
||||
|
||||
alert := alerts[0]
|
||||
generatorURL := alert.GeneratorURL
|
||||
|
||||
a := &alertmanagertypes.PostableAlert{}
|
||||
a.Annotations = alert.Annotations.Map()
|
||||
a.StartsAt = strfmt.DateTime(alert.FiredAt)
|
||||
a.Alert = alertmanagertypes.AlertModel{
|
||||
Labels: alert.Labels.Map(),
|
||||
GeneratorURL: strfmt.URI(generatorURL),
|
||||
}
|
||||
if !alert.ResolvedAt.IsZero() {
|
||||
a.EndsAt = strfmt.DateTime(alert.ResolvedAt)
|
||||
} else {
|
||||
a.EndsAt = strfmt.DateTime(alert.ValidUntil)
|
||||
}
|
||||
|
||||
if len(alert.Receivers) == 0 {
|
||||
channels, err := m.alertmanager.ListChannels(ctx, orgID)
|
||||
if err != nil {
|
||||
zap.L().Error("failed to list channels while sending test notification", zap.Error(err))
|
||||
return
|
||||
a := &alertmanagertypes.PostableAlert{}
|
||||
a.Annotations = alert.Annotations.Map()
|
||||
a.StartsAt = strfmt.DateTime(alert.FiredAt)
|
||||
a.Alert = alertmanagertypes.AlertModel{
|
||||
Labels: alert.Labels.Map(),
|
||||
GeneratorURL: strfmt.URI(generatorURL),
|
||||
}
|
||||
|
||||
for _, channel := range channels {
|
||||
alert.Receivers = append(alert.Receivers, channel.Name)
|
||||
if !alert.ResolvedAt.IsZero() {
|
||||
a.EndsAt = strfmt.DateTime(alert.ResolvedAt)
|
||||
} else {
|
||||
a.EndsAt = strfmt.DateTime(alert.ValidUntil)
|
||||
}
|
||||
receiverMap[a] = alert.Receivers
|
||||
}
|
||||
err := m.alertmanager.TestAlert(ctx, orgID, ruleID, receiverMap)
|
||||
if err != nil {
|
||||
zap.L().Error("failed to send test notification", zap.Error(err))
|
||||
return
|
||||
}
|
||||
|
||||
m.alertmanager.TestAlert(ctx, orgID, a, alert.Receivers)
|
||||
}
|
||||
}
|
||||
|
||||
@ -983,6 +976,17 @@ func (m *Manager) TestNotification(ctx context.Context, orgID valuer.UUID, ruleS
|
||||
if err != nil {
|
||||
return 0, model.BadRequest(err)
|
||||
}
|
||||
if !parsedRule.NotificationSettings.UsePolicy {
|
||||
parsedRule.NotificationSettings.GroupBy = append(parsedRule.NotificationSettings.GroupBy, ruletypes.LabelThresholdName)
|
||||
}
|
||||
config := parsedRule.NotificationSettings.GetAlertManagerNotificationConfig()
|
||||
err = m.alertmanager.SetNotificationConfig(ctx, orgID, parsedRule.AlertName, &config)
|
||||
if err != nil {
|
||||
return 0, &model.ApiError{
|
||||
Typ: model.ErrorBadData,
|
||||
Err: err,
|
||||
}
|
||||
}
|
||||
|
||||
alertCount, apiErr := m.prepareTestRuleFunc(PrepareTestRuleOptions{
|
||||
Rule: &parsedRule,
|
||||
|
||||
@ -2,10 +2,15 @@ package rules
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/nfroutingstoretest"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification"
|
||||
"github.com/prometheus/common/model"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfmanagertest"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"go.uber.org/zap"
|
||||
|
||||
@ -32,19 +37,38 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
|
||||
Email: "test@example.com",
|
||||
Role: "admin",
|
||||
}
|
||||
manager, mockSQLRuleStore, orgId := setupTestManager(t)
|
||||
manager, mockSQLRuleStore, mockRouteStore, nfmanager, orgId := setupTestManager(t)
|
||||
claims.OrgID = orgId
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
originalData string
|
||||
patchData string
|
||||
Route []*alertmanagertypes.RoutePolicy
|
||||
Config *alertmanagertypes.NotificationConfig
|
||||
expectedResult func(*ruletypes.GettableRule) bool
|
||||
expectError bool
|
||||
description string
|
||||
}{
|
||||
{
|
||||
name: "patch complete rule with task sync validation",
|
||||
Route: []*alertmanagertypes.RoutePolicy{
|
||||
{
|
||||
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
|
||||
ExpressionKind: alertmanagertypes.RuleBasedExpression,
|
||||
Channels: []string{"test-alerts"},
|
||||
Name: "{{.ruleId}}",
|
||||
Enabled: true,
|
||||
},
|
||||
},
|
||||
Config: &alertmanagertypes.NotificationConfig{
|
||||
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
|
||||
Renotify: alertmanagertypes.ReNotificationConfig{
|
||||
RenotifyInterval: 4 * time.Hour,
|
||||
NoDataInterval: 4 * time.Hour,
|
||||
},
|
||||
UsePolicy: false,
|
||||
},
|
||||
originalData: `{
|
||||
"schemaVersion":"v1",
|
||||
"alert": "test-original-alert",
|
||||
@ -95,6 +119,23 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "patch rule to disabled state",
|
||||
Route: []*alertmanagertypes.RoutePolicy{
|
||||
{
|
||||
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
|
||||
ExpressionKind: alertmanagertypes.RuleBasedExpression,
|
||||
Channels: []string{"test-alerts"},
|
||||
Name: "{{.ruleId}}",
|
||||
Enabled: true,
|
||||
},
|
||||
},
|
||||
Config: &alertmanagertypes.NotificationConfig{
|
||||
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
|
||||
Renotify: alertmanagertypes.ReNotificationConfig{
|
||||
RenotifyInterval: 4 * time.Hour,
|
||||
NoDataInterval: 4 * time.Hour,
|
||||
},
|
||||
UsePolicy: false,
|
||||
},
|
||||
originalData: `{
|
||||
"schemaVersion":"v2",
|
||||
"alert": "test-disable-alert",
|
||||
@ -179,6 +220,20 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
|
||||
OrgID: claims.OrgID,
|
||||
}
|
||||
|
||||
// Update route expectations with actual rule ID
|
||||
routesWithRuleID := make([]*alertmanagertypes.RoutePolicy, len(tc.Route))
|
||||
for i, route := range tc.Route {
|
||||
routesWithRuleID[i] = &alertmanagertypes.RoutePolicy{
|
||||
Expression: strings.Replace(route.Expression, "{{.ruleId}}", ruleID.String(), -1),
|
||||
ExpressionKind: route.ExpressionKind,
|
||||
Channels: route.Channels,
|
||||
Name: strings.Replace(route.Name, "{{.ruleId}}", ruleID.String(), -1),
|
||||
Enabled: route.Enabled,
|
||||
}
|
||||
}
|
||||
|
||||
mockRouteStore.ExpectDeleteRouteByName(existingRule.OrgID, ruleID.String())
|
||||
mockRouteStore.ExpectCreateBatch(routesWithRuleID)
|
||||
mockSQLRuleStore.ExpectGetStoredRule(ruleID, existingRule)
|
||||
mockSQLRuleStore.ExpectEditRule(existingRule)
|
||||
|
||||
@ -200,6 +255,12 @@ func TestManager_PatchRule_PayloadVariations(t *testing.T) {
|
||||
assert.Nil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be removed for disabled rule")
|
||||
} else {
|
||||
syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second)
|
||||
|
||||
// Verify notification config
|
||||
config, err := nfmanager.GetNotificationConfig(orgId, result.Id)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tc.Config, config)
|
||||
|
||||
assert.True(t, syncCompleted, "Task synchronization should complete within timeout")
|
||||
assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be created/updated for enabled rule")
|
||||
assert.Greater(t, len(manager.Rules()), 0, "Rules should be updated in manager")
|
||||
@ -234,7 +295,7 @@ func findTaskByName(tasks []Task, taskName string) Task {
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore, string) {
|
||||
func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore, *nfroutingstoretest.MockSQLRouteStore, nfmanager.NotificationManager, string) {
|
||||
settings := instrumentationtest.New().ToProviderSettings()
|
||||
testDB := utils.NewQueryServiceDBForTests(t)
|
||||
|
||||
@ -266,7 +327,11 @@ func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore,
|
||||
t.Fatalf("Failed to create noop sharder: %v", err)
|
||||
}
|
||||
orgGetter := implorganization.NewGetter(implorganization.NewStore(testDB), noopSharder)
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
routeStore := nfroutingstoretest.NewMockSQLRouteStore()
|
||||
notificationManager, err := rulebasednotification.New(t.Context(), settings, nfmanager.Config{}, routeStore)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create alert manager: %v", err)
|
||||
}
|
||||
alertManager, err := signozalertmanager.New(context.TODO(), settings, alertmanager.Config{Provider: "signoz", Signoz: alertmanager.Signoz{PollInterval: 10 * time.Second, Config: alertmanagerserver.NewConfig()}}, testDB, orgGetter, notificationManager)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create alert manager: %v", err)
|
||||
@ -290,21 +355,40 @@ func setupTestManager(t *testing.T) (*Manager, *rulestoretest.MockSQLRuleStore,
|
||||
}
|
||||
|
||||
close(manager.block)
|
||||
return manager, mockSQLRuleStore, testOrgID.StringValue()
|
||||
return manager, mockSQLRuleStore, routeStore, notificationManager, testOrgID.StringValue()
|
||||
}
|
||||
|
||||
func TestCreateRule(t *testing.T) {
|
||||
claims := &authtypes.Claims{
|
||||
Email: "test@example.com",
|
||||
}
|
||||
manager, mockSQLRuleStore, orgId := setupTestManager(t)
|
||||
manager, mockSQLRuleStore, mockRouteStore, nfmanager, orgId := setupTestManager(t)
|
||||
claims.OrgID = orgId
|
||||
testCases := []struct {
|
||||
name string
|
||||
Route []*alertmanagertypes.RoutePolicy
|
||||
Config *alertmanagertypes.NotificationConfig
|
||||
ruleStr string
|
||||
}{
|
||||
{
|
||||
name: "validate stored rule data structure",
|
||||
Route: []*alertmanagertypes.RoutePolicy{
|
||||
{
|
||||
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
|
||||
ExpressionKind: alertmanagertypes.RuleBasedExpression,
|
||||
Channels: []string{"test-alerts"},
|
||||
Name: "{{.ruleId}}",
|
||||
Enabled: true,
|
||||
},
|
||||
},
|
||||
Config: &alertmanagertypes.NotificationConfig{
|
||||
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
|
||||
Renotify: alertmanagertypes.ReNotificationConfig{
|
||||
RenotifyInterval: 4 * time.Hour,
|
||||
NoDataInterval: 4 * time.Hour,
|
||||
},
|
||||
UsePolicy: false,
|
||||
},
|
||||
ruleStr: `{
|
||||
"alert": "cpu usage",
|
||||
"ruleType": "threshold_rule",
|
||||
@ -341,6 +425,30 @@ func TestCreateRule(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "create complete v2 rule with thresholds",
|
||||
Route: []*alertmanagertypes.RoutePolicy{
|
||||
{
|
||||
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"critical\""),
|
||||
ExpressionKind: alertmanagertypes.RuleBasedExpression,
|
||||
Channels: []string{"test-alerts"},
|
||||
Name: "{{.ruleId}}",
|
||||
Enabled: true,
|
||||
},
|
||||
{
|
||||
Expression: fmt.Sprintf("ruleId == \"{{.ruleId}}\" && threshold.name == \"warning\""),
|
||||
ExpressionKind: alertmanagertypes.RuleBasedExpression,
|
||||
Channels: []string{"test-alerts"},
|
||||
Name: "{{.ruleId}}",
|
||||
Enabled: true,
|
||||
},
|
||||
},
|
||||
Config: &alertmanagertypes.NotificationConfig{
|
||||
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("k8s.node.name"): {}, model.LabelName("ruleId"): {}},
|
||||
Renotify: alertmanagertypes.ReNotificationConfig{
|
||||
RenotifyInterval: 10 * time.Minute,
|
||||
NoDataInterval: 4 * time.Hour,
|
||||
},
|
||||
UsePolicy: false,
|
||||
},
|
||||
ruleStr: `{
|
||||
"schemaVersion":"v2",
|
||||
"state": "firing",
|
||||
@ -399,6 +507,18 @@ func TestCreateRule(t *testing.T) {
|
||||
"frequency": "1m"
|
||||
}
|
||||
},
|
||||
"notificationSettings": {
|
||||
"GroupBy": [
|
||||
"k8s.node.name"
|
||||
],
|
||||
"renotify": {
|
||||
"interval": "10m",
|
||||
"enabled": true,
|
||||
"alertStates": [
|
||||
"firing"
|
||||
]
|
||||
}
|
||||
},
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
},
|
||||
@ -429,6 +549,20 @@ func TestCreateRule(t *testing.T) {
|
||||
},
|
||||
OrgID: claims.OrgID,
|
||||
}
|
||||
|
||||
// Update route expectations with actual rule ID
|
||||
routesWithRuleID := make([]*alertmanagertypes.RoutePolicy, len(tc.Route))
|
||||
for i, route := range tc.Route {
|
||||
routesWithRuleID[i] = &alertmanagertypes.RoutePolicy{
|
||||
Expression: strings.Replace(route.Expression, "{{.ruleId}}", rule.ID.String(), -1),
|
||||
ExpressionKind: route.ExpressionKind,
|
||||
Channels: route.Channels,
|
||||
Name: strings.Replace(route.Name, "{{.ruleId}}", rule.ID.String(), -1),
|
||||
Enabled: route.Enabled,
|
||||
}
|
||||
}
|
||||
|
||||
mockRouteStore.ExpectCreateBatch(routesWithRuleID)
|
||||
mockSQLRuleStore.ExpectCreateRule(rule)
|
||||
|
||||
ctx := authtypes.NewContextWithClaims(context.Background(), *claims)
|
||||
@ -441,6 +575,12 @@ func TestCreateRule(t *testing.T) {
|
||||
// Wait for task creation with proper synchronization
|
||||
taskName := prepareTaskName(result.Id)
|
||||
syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second)
|
||||
|
||||
// Verify notification config
|
||||
config, err := nfmanager.GetNotificationConfig(orgId, result.Id)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tc.Config, config)
|
||||
|
||||
assert.True(t, syncCompleted, "Task creation should complete within timeout")
|
||||
assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be created with correct name")
|
||||
assert.Greater(t, len(manager.Rules()), 0, "Rules should be added to manager")
|
||||
@ -455,14 +595,35 @@ func TestEditRule(t *testing.T) {
|
||||
claims := &authtypes.Claims{
|
||||
Email: "test@example.com",
|
||||
}
|
||||
manager, mockSQLRuleStore, orgId := setupTestManager(t)
|
||||
manager, mockSQLRuleStore, mockRouteStore, nfmanager, orgId := setupTestManager(t)
|
||||
claims.OrgID = orgId
|
||||
testCases := []struct {
|
||||
ruleID string
|
||||
name string
|
||||
Route []*alertmanagertypes.RoutePolicy
|
||||
Config *alertmanagertypes.NotificationConfig
|
||||
ruleStr string
|
||||
}{
|
||||
{
|
||||
name: "validate edit rule functionality",
|
||||
ruleID: "12345678-1234-1234-1234-123456789012",
|
||||
name: "validate edit rule functionality",
|
||||
Route: []*alertmanagertypes.RoutePolicy{
|
||||
{
|
||||
Expression: fmt.Sprintf("ruleId == \"rule1\" && threshold.name == \"critical\""),
|
||||
ExpressionKind: alertmanagertypes.RuleBasedExpression,
|
||||
Channels: []string{"critical-alerts"},
|
||||
Name: "12345678-1234-1234-1234-123456789012",
|
||||
Enabled: true,
|
||||
},
|
||||
},
|
||||
Config: &alertmanagertypes.NotificationConfig{
|
||||
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}},
|
||||
Renotify: alertmanagertypes.ReNotificationConfig{
|
||||
RenotifyInterval: 4 * time.Hour,
|
||||
NoDataInterval: 4 * time.Hour,
|
||||
},
|
||||
UsePolicy: false,
|
||||
},
|
||||
ruleStr: `{
|
||||
"alert": "updated cpu usage",
|
||||
"ruleType": "threshold_rule",
|
||||
@ -498,7 +659,32 @@ func TestEditRule(t *testing.T) {
|
||||
}`,
|
||||
},
|
||||
{
|
||||
name: "edit complete v2 rule with thresholds",
|
||||
ruleID: "12345678-1234-1234-1234-123456789013",
|
||||
name: "edit complete v2 rule with thresholds",
|
||||
Route: []*alertmanagertypes.RoutePolicy{
|
||||
{
|
||||
Expression: fmt.Sprintf("ruleId == \"rule2\" && threshold.name == \"critical\""),
|
||||
ExpressionKind: alertmanagertypes.RuleBasedExpression,
|
||||
Channels: []string{"test-alerts"},
|
||||
Name: "12345678-1234-1234-1234-123456789013",
|
||||
Enabled: true,
|
||||
},
|
||||
{
|
||||
Expression: fmt.Sprintf("ruleId == \"rule2\" && threshold.name == \"warning\""),
|
||||
ExpressionKind: alertmanagertypes.RuleBasedExpression,
|
||||
Channels: []string{"test-alerts"},
|
||||
Name: "12345678-1234-1234-1234-123456789013",
|
||||
Enabled: true,
|
||||
},
|
||||
},
|
||||
Config: &alertmanagertypes.NotificationConfig{
|
||||
NotificationGroup: map[model.LabelName]struct{}{model.LabelName("ruleId"): {}, model.LabelName("k8s.node.name"): {}},
|
||||
Renotify: alertmanagertypes.ReNotificationConfig{
|
||||
RenotifyInterval: 10 * time.Minute,
|
||||
NoDataInterval: 4 * time.Hour,
|
||||
},
|
||||
UsePolicy: false,
|
||||
},
|
||||
ruleStr: `{
|
||||
"schemaVersion":"v2",
|
||||
"state": "firing",
|
||||
@ -560,6 +746,18 @@ func TestEditRule(t *testing.T) {
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
},
|
||||
"notificationSettings": {
|
||||
"GroupBy": [
|
||||
"k8s.node.name"
|
||||
],
|
||||
"renotify": {
|
||||
"interval": "10m",
|
||||
"enabled": true,
|
||||
"alertStates": [
|
||||
"firing"
|
||||
]
|
||||
}
|
||||
},
|
||||
"annotations": {
|
||||
"description": "This alert is fired when memory usage crosses the threshold",
|
||||
"summary": "Memory usage threshold exceeded"
|
||||
@ -573,11 +771,13 @@ func TestEditRule(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
ruleID := valuer.GenerateUUID()
|
||||
|
||||
ruleId, err := valuer.NewUUID(tc.ruleID)
|
||||
if err != nil {
|
||||
t.Errorf("error creating ruleId: %s", err)
|
||||
}
|
||||
existingRule := &ruletypes.Rule{
|
||||
Identifiable: types.Identifiable{
|
||||
ID: ruleID,
|
||||
ID: ruleId,
|
||||
},
|
||||
TimeAuditable: types.TimeAuditable{
|
||||
CreatedAt: time.Now(),
|
||||
@ -590,18 +790,24 @@ func TestEditRule(t *testing.T) {
|
||||
Data: `{"alert": "original cpu usage", "disabled": false}`,
|
||||
OrgID: claims.OrgID,
|
||||
}
|
||||
|
||||
mockSQLRuleStore.ExpectGetStoredRule(ruleID, existingRule)
|
||||
mockRouteStore.ExpectDeleteRouteByName(existingRule.OrgID, ruleId.String())
|
||||
mockRouteStore.ExpectCreateBatch(tc.Route)
|
||||
mockSQLRuleStore.ExpectGetStoredRule(ruleId, existingRule)
|
||||
mockSQLRuleStore.ExpectEditRule(existingRule)
|
||||
|
||||
ctx := authtypes.NewContextWithClaims(context.Background(), *claims)
|
||||
err := manager.EditRule(ctx, tc.ruleStr, ruleID)
|
||||
err = manager.EditRule(ctx, tc.ruleStr, ruleId)
|
||||
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Wait for task update with proper synchronization
|
||||
taskName := prepareTaskName(ruleID.StringValue())
|
||||
|
||||
taskName := prepareTaskName(ruleId.String())
|
||||
syncCompleted := waitForTaskSync(manager, taskName, true, 2*time.Second)
|
||||
|
||||
config, err := nfmanager.GetNotificationConfig(orgId, ruleId.String())
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tc.Config, config)
|
||||
assert.True(t, syncCompleted, "Task update should complete within timeout")
|
||||
assert.NotNil(t, findTaskByName(manager.RuleTasks(), taskName), "Task should be updated with correct name")
|
||||
assert.Greater(t, len(manager.Rules()), 0, "Rules should be updated in manager")
|
||||
|
||||
@ -147,13 +147,19 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
|
||||
|
||||
var alerts = make(map[uint64]*ruletypes.Alert, len(res))
|
||||
|
||||
ruleReceivers := r.Threshold.GetRuleReceivers()
|
||||
ruleReceiverMap := make(map[string][]string)
|
||||
for _, value := range ruleReceivers {
|
||||
ruleReceiverMap[value.Name] = value.Channels
|
||||
}
|
||||
|
||||
for _, series := range res {
|
||||
|
||||
if len(series.Floats) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
results, err := r.Threshold.ShouldAlert(toCommonSeries(series))
|
||||
results, err := r.Threshold.ShouldAlert(toCommonSeries(series), r.Unit())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -165,7 +171,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
|
||||
}
|
||||
r.logger.DebugContext(ctx, "alerting for series", "rule_name", r.Name(), "series", series)
|
||||
|
||||
threshold := valueFormatter.Format(r.targetVal(), r.Unit())
|
||||
threshold := valueFormatter.Format(result.Target, result.TargetUnit)
|
||||
|
||||
tmplData := ruletypes.AlertTemplateData(l, valueFormatter.Format(result.V, r.Unit()), threshold)
|
||||
// Inject some convenience variables that are easier to remember for users
|
||||
@ -218,7 +224,6 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
|
||||
r.lastError = err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
alerts[h] = &ruletypes.Alert{
|
||||
Labels: lbs,
|
||||
QueryResultLables: resultLabels,
|
||||
@ -227,13 +232,12 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
|
||||
State: model.StatePending,
|
||||
Value: result.V,
|
||||
GeneratorURL: r.GeneratorURL(),
|
||||
Receivers: r.preferredChannels,
|
||||
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
|
||||
|
||||
// alerts[h] is ready, add or update active list now
|
||||
for h, a := range alerts {
|
||||
// Check whether we already have alerting state for the identifying label set.
|
||||
@ -241,7 +245,9 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
|
||||
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
|
||||
alert.Value = a.Value
|
||||
alert.Annotations = a.Annotations
|
||||
alert.Receivers = r.preferredChannels
|
||||
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
|
||||
alert.Receivers = ruleReceiverMap[v]
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@ -696,7 +696,7 @@ func TestPromRuleShouldAlert(t *testing.T) {
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
resultVectors, err := rule.Threshold.ShouldAlert(toCommonSeries(c.values))
|
||||
resultVectors, err := rule.Threshold.ShouldAlert(toCommonSeries(c.values), rule.Unit())
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Compare full result vector with expected vector
|
||||
|
||||
@ -38,7 +38,6 @@ func defaultTestNotification(opts PrepareTestRuleOptions) (int, *model.ApiError)
|
||||
if parsedRule.RuleType == ruletypes.RuleTypeThreshold {
|
||||
|
||||
// add special labels for test alerts
|
||||
parsedRule.Annotations[labels.AlertSummaryLabel] = fmt.Sprintf("The rule threshold is set to %.4f, and the observed metric value is {{$value}}.", *parsedRule.RuleCondition.Target)
|
||||
parsedRule.Labels[labels.RuleSourceLabel] = ""
|
||||
parsedRule.Labels[labels.AlertRuleIdLabel] = ""
|
||||
|
||||
|
||||
@ -488,7 +488,7 @@ func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID,
|
||||
continue
|
||||
}
|
||||
}
|
||||
resultSeries, err := r.Threshold.ShouldAlert(*series)
|
||||
resultSeries, err := r.Threshold.ShouldAlert(*series, r.Unit())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -565,7 +565,7 @@ func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUI
|
||||
continue
|
||||
}
|
||||
}
|
||||
resultSeries, err := r.Threshold.ShouldAlert(*series)
|
||||
resultSeries, err := r.Threshold.ShouldAlert(*series, r.Unit())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -602,6 +602,12 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
|
||||
resultFPs := map[uint64]struct{}{}
|
||||
var alerts = make(map[uint64]*ruletypes.Alert, len(res))
|
||||
|
||||
ruleReceivers := r.Threshold.GetRuleReceivers()
|
||||
ruleReceiverMap := make(map[string][]string)
|
||||
for _, value := range ruleReceivers {
|
||||
ruleReceiverMap[value.Name] = value.Channels
|
||||
}
|
||||
|
||||
for _, smpl := range res {
|
||||
l := make(map[string]string, len(smpl.Metric))
|
||||
for _, lbl := range smpl.Metric {
|
||||
@ -610,7 +616,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
|
||||
|
||||
value := valueFormatter.Format(smpl.V, r.Unit())
|
||||
//todo(aniket): handle different threshold
|
||||
threshold := valueFormatter.Format(r.targetVal(), r.Unit())
|
||||
threshold := valueFormatter.Format(smpl.Target, smpl.TargetUnit)
|
||||
r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold)
|
||||
|
||||
tmplData := ruletypes.AlertTemplateData(l, value, threshold)
|
||||
@ -690,7 +696,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
|
||||
State: model.StatePending,
|
||||
Value: smpl.V,
|
||||
GeneratorURL: r.GeneratorURL(),
|
||||
Receivers: r.preferredChannels,
|
||||
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
|
||||
Missing: smpl.IsMissing,
|
||||
}
|
||||
}
|
||||
@ -705,7 +711,9 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
|
||||
|
||||
alert.Value = a.Value
|
||||
alert.Annotations = a.Annotations
|
||||
alert.Receivers = r.preferredChannels
|
||||
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
|
||||
alert.Receivers = ruleReceiverMap[v]
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
@ -824,7 +824,7 @@ func TestThresholdRuleShouldAlert(t *testing.T) {
|
||||
values.Points[i].Timestamp = time.Now().UnixMilli()
|
||||
}
|
||||
|
||||
resultVectors, err := rule.Threshold.ShouldAlert(c.values)
|
||||
resultVectors, err := rule.Threshold.ShouldAlert(c.values, rule.Unit())
|
||||
assert.NoError(t, err, "Test case %d", idx)
|
||||
|
||||
// Compare result vectors with expected behavior
|
||||
@ -1201,7 +1201,7 @@ func TestThresholdRuleLabelNormalization(t *testing.T) {
|
||||
values.Points[i].Timestamp = time.Now().UnixMilli()
|
||||
}
|
||||
|
||||
vector, err := rule.Threshold.ShouldAlert(c.values)
|
||||
vector, err := rule.Threshold.ShouldAlert(c.values, rule.Unit())
|
||||
assert.NoError(t, err)
|
||||
|
||||
for name, value := range c.values.Labels {
|
||||
@ -1211,7 +1211,7 @@ func TestThresholdRuleLabelNormalization(t *testing.T) {
|
||||
}
|
||||
|
||||
// Get result vectors from threshold evaluation
|
||||
resultVectors, err := rule.Threshold.ShouldAlert(c.values)
|
||||
resultVectors, err := rule.Threshold.ShouldAlert(c.values, rule.Unit())
|
||||
assert.NoError(t, err, "Test case %d", idx)
|
||||
|
||||
// Compare result vectors with expected behavior
|
||||
@ -1501,13 +1501,11 @@ func TestThresholdRuleUnitCombinations(t *testing.T) {
|
||||
Kind: ruletypes.BasicThresholdKind,
|
||||
Spec: ruletypes.BasicRuleThresholds{
|
||||
{
|
||||
Name: postableRule.AlertName,
|
||||
TargetValue: &c.target,
|
||||
TargetUnit: c.targetUnit,
|
||||
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
|
||||
MatchType: ruletypes.MatchType(c.matchType),
|
||||
CompareOp: ruletypes.CompareOp(c.compareOp),
|
||||
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
|
||||
Name: postableRule.AlertName,
|
||||
TargetValue: &c.target,
|
||||
TargetUnit: c.targetUnit,
|
||||
MatchType: ruletypes.MatchType(c.matchType),
|
||||
CompareOp: ruletypes.CompareOp(c.compareOp),
|
||||
},
|
||||
},
|
||||
}
|
||||
@ -1612,12 +1610,10 @@ func TestThresholdRuleNoData(t *testing.T) {
|
||||
Kind: ruletypes.BasicThresholdKind,
|
||||
Spec: ruletypes.BasicRuleThresholds{
|
||||
{
|
||||
Name: postableRule.AlertName,
|
||||
TargetValue: &target,
|
||||
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOp: ruletypes.ValueIsEq,
|
||||
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
|
||||
Name: postableRule.AlertName,
|
||||
TargetValue: &target,
|
||||
MatchType: ruletypes.AtleastOnce,
|
||||
CompareOp: ruletypes.ValueIsEq,
|
||||
},
|
||||
},
|
||||
}
|
||||
@ -1734,13 +1730,11 @@ func TestThresholdRuleTracesLink(t *testing.T) {
|
||||
Kind: ruletypes.BasicThresholdKind,
|
||||
Spec: ruletypes.BasicRuleThresholds{
|
||||
{
|
||||
Name: postableRule.AlertName,
|
||||
TargetValue: &c.target,
|
||||
TargetUnit: c.targetUnit,
|
||||
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
|
||||
MatchType: ruletypes.MatchType(c.matchType),
|
||||
CompareOp: ruletypes.CompareOp(c.compareOp),
|
||||
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
|
||||
Name: postableRule.AlertName,
|
||||
TargetValue: &c.target,
|
||||
TargetUnit: c.targetUnit,
|
||||
MatchType: ruletypes.MatchType(c.matchType),
|
||||
CompareOp: ruletypes.CompareOp(c.compareOp),
|
||||
},
|
||||
},
|
||||
}
|
||||
@ -1873,13 +1867,11 @@ func TestThresholdRuleLogsLink(t *testing.T) {
|
||||
Kind: ruletypes.BasicThresholdKind,
|
||||
Spec: ruletypes.BasicRuleThresholds{
|
||||
{
|
||||
Name: postableRule.AlertName,
|
||||
TargetValue: &c.target,
|
||||
TargetUnit: c.targetUnit,
|
||||
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
|
||||
MatchType: ruletypes.MatchType(c.matchType),
|
||||
CompareOp: ruletypes.CompareOp(c.compareOp),
|
||||
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
|
||||
Name: postableRule.AlertName,
|
||||
TargetValue: &c.target,
|
||||
TargetUnit: c.targetUnit,
|
||||
MatchType: ruletypes.MatchType(c.matchType),
|
||||
CompareOp: ruletypes.CompareOp(c.compareOp),
|
||||
},
|
||||
},
|
||||
}
|
||||
@ -2125,22 +2117,18 @@ func TestMultipleThresholdRule(t *testing.T) {
|
||||
Kind: ruletypes.BasicThresholdKind,
|
||||
Spec: ruletypes.BasicRuleThresholds{
|
||||
{
|
||||
Name: "first_threshold",
|
||||
TargetValue: &c.target,
|
||||
TargetUnit: c.targetUnit,
|
||||
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
|
||||
MatchType: ruletypes.MatchType(c.matchType),
|
||||
CompareOp: ruletypes.CompareOp(c.compareOp),
|
||||
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
|
||||
Name: "first_threshold",
|
||||
TargetValue: &c.target,
|
||||
TargetUnit: c.targetUnit,
|
||||
MatchType: ruletypes.MatchType(c.matchType),
|
||||
CompareOp: ruletypes.CompareOp(c.compareOp),
|
||||
},
|
||||
{
|
||||
Name: "second_threshold",
|
||||
TargetValue: &c.secondTarget,
|
||||
TargetUnit: c.targetUnit,
|
||||
RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit,
|
||||
MatchType: ruletypes.MatchType(c.matchType),
|
||||
CompareOp: ruletypes.CompareOp(c.compareOp),
|
||||
SelectedQuery: postableRule.RuleCondition.SelectedQuery,
|
||||
Name: "second_threshold",
|
||||
TargetValue: &c.secondTarget,
|
||||
TargetUnit: c.targetUnit,
|
||||
MatchType: ruletypes.MatchType(c.matchType),
|
||||
CompareOp: ruletypes.CompareOp(c.compareOp),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@ -38,6 +38,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore/clickhousetelemetrystore"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrystore/telemetrystorehook"
|
||||
routeTypes "github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/SigNoz/signoz/pkg/version"
|
||||
"github.com/SigNoz/signoz/pkg/web"
|
||||
"github.com/SigNoz/signoz/pkg/web/noopweb"
|
||||
@ -133,6 +134,7 @@ func NewSQLMigrationProviderFactories(
|
||||
sqlmigration.NewQueryBuilderV5MigrationFactory(sqlstore, telemetryStore),
|
||||
sqlmigration.NewAddMeterQuickFiltersFactory(sqlstore, sqlschema),
|
||||
sqlmigration.NewUpdateTTLSettingForCustomRetentionFactory(sqlstore, sqlschema),
|
||||
sqlmigration.NewAddRoutePolicyFactory(sqlstore, sqlschema),
|
||||
)
|
||||
}
|
||||
|
||||
@ -155,9 +157,9 @@ func NewPrometheusProviderFactories(telemetryStore telemetrystore.TelemetryStore
|
||||
)
|
||||
}
|
||||
|
||||
func NewNotificationManagerProviderFactories() factory.NamedMap[factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config]] {
|
||||
func NewNotificationManagerProviderFactories(routeStore routeTypes.RouteStore) factory.NamedMap[factory.ProviderFactory[nfmanager.NotificationManager, nfmanager.Config]] {
|
||||
return factory.MustNewNamedMap(
|
||||
rulebasednotification.NewFactory(),
|
||||
rulebasednotification.NewFactory(routeStore),
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/sqlroutingstore"
|
||||
"github.com/SigNoz/signoz/pkg/analytics"
|
||||
"github.com/SigNoz/signoz/pkg/cache"
|
||||
"github.com/SigNoz/signoz/pkg/emailing"
|
||||
@ -230,12 +231,14 @@ func New(
|
||||
// Initialize user getter
|
||||
userGetter := impluser.NewGetter(impluser.NewStore(sqlstore, providerSettings))
|
||||
|
||||
// will need to create factory for all stores
|
||||
routeStore := sqlroutingstore.NewStore(sqlstore)
|
||||
// shared NotificationManager instance for both alertmanager and rules
|
||||
notificationManager, err := factory.NewProviderFromNamedMap(
|
||||
ctx,
|
||||
providerSettings,
|
||||
nfmanager.Config{},
|
||||
NewNotificationManagerProviderFactories(),
|
||||
NewNotificationManagerProviderFactories(routeStore),
|
||||
"rulebased",
|
||||
)
|
||||
if err != nil {
|
||||
|
||||
260
pkg/sqlmigration/049_add_route_policy.go
Normal file
260
pkg/sqlmigration/049_add_route_policy.go
Normal file
@ -0,0 +1,260 @@
|
||||
package sqlmigration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/factory"
|
||||
"github.com/SigNoz/signoz/pkg/sqlschema"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
"github.com/SigNoz/signoz/pkg/types"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
"github.com/uptrace/bun"
|
||||
"github.com/uptrace/bun/migrate"
|
||||
"log/slog"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Shared types for migration
|
||||
|
||||
type expressionRoute struct {
|
||||
bun.BaseModel `bun:"table:route_policy"`
|
||||
types.Identifiable
|
||||
types.TimeAuditable
|
||||
types.UserAuditable
|
||||
|
||||
Expression string `bun:"expression,type:text"`
|
||||
ExpressionKind string `bun:"kind,type:text"`
|
||||
|
||||
Channels []string `bun:"channels,type:text"`
|
||||
|
||||
Name string `bun:"name,type:text"`
|
||||
Description string `bun:"description,type:text"`
|
||||
Enabled bool `bun:"enabled,type:boolean,default:true"`
|
||||
Tags []string `bun:"tags,type:text"`
|
||||
|
||||
OrgID string `bun:"org_id,type:text"`
|
||||
}
|
||||
|
||||
type rule struct {
|
||||
bun.BaseModel `bun:"table:rule"`
|
||||
types.Identifiable
|
||||
types.TimeAuditable
|
||||
types.UserAuditable
|
||||
Deleted int `bun:"deleted,default:0"`
|
||||
Data string `bun:"data,type:text"`
|
||||
OrgID string `bun:"org_id,type:text"`
|
||||
}
|
||||
|
||||
type addRoutePolicies struct {
|
||||
sqlstore sqlstore.SQLStore
|
||||
sqlschema sqlschema.SQLSchema
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
func NewAddRoutePolicyFactory(sqlstore sqlstore.SQLStore, sqlschema sqlschema.SQLSchema) factory.ProviderFactory[SQLMigration, Config] {
|
||||
return factory.NewProviderFactory(factory.MustNewName("add_route_policy"), func(ctx context.Context, providerSettings factory.ProviderSettings, config Config) (SQLMigration, error) {
|
||||
return newAddRoutePolicy(ctx, providerSettings, config, sqlstore, sqlschema)
|
||||
})
|
||||
}
|
||||
|
||||
func newAddRoutePolicy(_ context.Context, settings factory.ProviderSettings, _ Config, sqlstore sqlstore.SQLStore, sqlschema sqlschema.SQLSchema) (SQLMigration, error) {
|
||||
return &addRoutePolicies{
|
||||
sqlstore: sqlstore,
|
||||
sqlschema: sqlschema,
|
||||
logger: settings.Logger,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (migration *addRoutePolicies) Register(migrations *migrate.Migrations) error {
|
||||
if err := migrations.Register(migration.Up, migration.Down); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (migration *addRoutePolicies) Up(ctx context.Context, db *bun.DB) error {
|
||||
_, _, err := migration.sqlschema.GetTable(ctx, sqlschema.TableName("route_policy"))
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
tx, err := db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
_ = tx.Rollback()
|
||||
}()
|
||||
|
||||
sqls := [][]byte{}
|
||||
|
||||
// Create the route_policy table
|
||||
table := &sqlschema.Table{
|
||||
Name: "route_policy",
|
||||
Columns: []*sqlschema.Column{
|
||||
{Name: "id", DataType: sqlschema.DataTypeText, Nullable: false},
|
||||
{Name: "created_at", DataType: sqlschema.DataTypeTimestamp, Nullable: false},
|
||||
{Name: "updated_at", DataType: sqlschema.DataTypeTimestamp, Nullable: false},
|
||||
{Name: "created_by", DataType: sqlschema.DataTypeText, Nullable: false},
|
||||
{Name: "updated_by", DataType: sqlschema.DataTypeText, Nullable: false},
|
||||
{Name: "expression", DataType: sqlschema.DataTypeText, Nullable: false},
|
||||
{Name: "kind", DataType: sqlschema.DataTypeText, Nullable: false},
|
||||
{Name: "channels", DataType: sqlschema.DataTypeText, Nullable: false},
|
||||
{Name: "name", DataType: sqlschema.DataTypeText, Nullable: false},
|
||||
{Name: "description", DataType: sqlschema.DataTypeText, Nullable: true},
|
||||
{Name: "enabled", DataType: sqlschema.DataTypeBoolean, Nullable: false, Default: "true"},
|
||||
{Name: "tags", DataType: sqlschema.DataTypeText, Nullable: true},
|
||||
{Name: "org_id", DataType: sqlschema.DataTypeText, Nullable: false},
|
||||
},
|
||||
PrimaryKeyConstraint: &sqlschema.PrimaryKeyConstraint{
|
||||
ColumnNames: []sqlschema.ColumnName{"id"},
|
||||
},
|
||||
ForeignKeyConstraints: []*sqlschema.ForeignKeyConstraint{
|
||||
{
|
||||
ReferencingColumnName: "org_id",
|
||||
ReferencedTableName: "organizations",
|
||||
ReferencedColumnName: "id",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
tableSQLs := migration.sqlschema.Operator().CreateTable(table)
|
||||
sqls = append(sqls, tableSQLs...)
|
||||
|
||||
for _, sqlStmt := range sqls {
|
||||
if _, err := tx.ExecContext(ctx, string(sqlStmt)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
err = migration.migrateRulesToRoutePolicies(ctx, tx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := tx.Commit(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (migration *addRoutePolicies) migrateRulesToRoutePolicies(ctx context.Context, tx bun.Tx) error {
|
||||
var rules []*rule
|
||||
err := tx.NewSelect().
|
||||
Model(&rules).
|
||||
Where("deleted = ?", 0).
|
||||
Scan(ctx)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return nil // No rules to migrate
|
||||
}
|
||||
return errors.NewInternalf(errors.CodeInternal, "failed to fetch rules")
|
||||
}
|
||||
|
||||
channelsByOrg, err := migration.getAllChannels(ctx, tx)
|
||||
if err != nil {
|
||||
return errors.NewInternalf(errors.CodeInternal, "fetching channels error: %v", err)
|
||||
}
|
||||
|
||||
var routesToInsert []*expressionRoute
|
||||
|
||||
routesToInsert, err = migration.convertRulesToRoutes(rules, channelsByOrg)
|
||||
if err != nil {
|
||||
return errors.NewInternalf(errors.CodeInternal, "converting rules to routes error: %v", err)
|
||||
}
|
||||
|
||||
// Insert all routes in a single batch operation
|
||||
if len(routesToInsert) > 0 {
|
||||
_, err = tx.NewInsert().
|
||||
Model(&routesToInsert).
|
||||
Exec(ctx)
|
||||
if err != nil {
|
||||
return errors.NewInternalf(errors.CodeInternal, "failed to insert notification routes")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (migration *addRoutePolicies) convertRulesToRoutes(rules []*rule, channelsByOrg map[string][]string) ([]*expressionRoute, error) {
|
||||
var routes []*expressionRoute
|
||||
for _, r := range rules {
|
||||
var gettableRule ruletypes.GettableRule
|
||||
if err := json.Unmarshal([]byte(r.Data), &gettableRule); err != nil {
|
||||
return nil, errors.NewInternalf(errors.CodeInternal, "failed to unmarshal rule data for rule ID %s: %v", r.ID, err)
|
||||
}
|
||||
|
||||
if len(gettableRule.PreferredChannels) == 0 {
|
||||
channels, exists := channelsByOrg[r.OrgID]
|
||||
if !exists || len(channels) == 0 {
|
||||
continue
|
||||
}
|
||||
gettableRule.PreferredChannels = channels
|
||||
}
|
||||
severity := "critical"
|
||||
if v, ok := gettableRule.Labels["severity"]; ok {
|
||||
severity = v
|
||||
}
|
||||
expression := fmt.Sprintf(`%s == "%s" && %s == "%s"`, "threshold.name", severity, "ruleId", r.ID.String())
|
||||
route := &expressionRoute{
|
||||
Identifiable: types.Identifiable{
|
||||
ID: valuer.GenerateUUID(),
|
||||
},
|
||||
TimeAuditable: types.TimeAuditable{
|
||||
CreatedAt: time.Now(),
|
||||
UpdatedAt: time.Now(),
|
||||
},
|
||||
UserAuditable: types.UserAuditable{
|
||||
CreatedBy: r.CreatedBy,
|
||||
UpdatedBy: r.UpdatedBy,
|
||||
},
|
||||
Expression: expression,
|
||||
ExpressionKind: "rule",
|
||||
Channels: gettableRule.PreferredChannels,
|
||||
Name: r.ID.StringValue(),
|
||||
Enabled: true,
|
||||
OrgID: r.OrgID,
|
||||
}
|
||||
routes = append(routes, route)
|
||||
}
|
||||
return routes, nil
|
||||
}
|
||||
|
||||
func (migration *addRoutePolicies) getAllChannels(ctx context.Context, tx bun.Tx) (map[string][]string, error) {
|
||||
type channel struct {
|
||||
bun.BaseModel `bun:"table:notification_channel"`
|
||||
types.Identifiable
|
||||
types.TimeAuditable
|
||||
Name string `json:"name" bun:"name"`
|
||||
Type string `json:"type" bun:"type"`
|
||||
Data string `json:"data" bun:"data"`
|
||||
OrgID string `json:"org_id" bun:"org_id"`
|
||||
}
|
||||
|
||||
var channels []*channel
|
||||
err := tx.NewSelect().
|
||||
Model(&channels).
|
||||
Scan(ctx)
|
||||
if err != nil {
|
||||
return nil, errors.NewInternalf(errors.CodeInternal, "failed to fetch all channels")
|
||||
}
|
||||
|
||||
// Group channels by org ID
|
||||
channelsByOrg := make(map[string][]string)
|
||||
for _, ch := range channels {
|
||||
channelsByOrg[ch.OrgID] = append(channelsByOrg[ch.OrgID], ch.Name)
|
||||
}
|
||||
|
||||
return channelsByOrg, nil
|
||||
}
|
||||
|
||||
func (migration *addRoutePolicies) Down(ctx context.Context, db *bun.DB) error {
|
||||
return nil
|
||||
}
|
||||
@ -27,6 +27,8 @@ type (
|
||||
// An alias for the Alert type from the alertmanager package.
|
||||
Alert = types.Alert
|
||||
|
||||
AlertSlice = types.AlertSlice
|
||||
|
||||
PostableAlert = models.PostableAlert
|
||||
|
||||
PostableAlerts = models.PostableAlerts
|
||||
@ -38,6 +40,10 @@ type (
|
||||
GettableAlerts = models.GettableAlerts
|
||||
)
|
||||
|
||||
const (
|
||||
NoDataLabel = model.LabelName("nodata")
|
||||
)
|
||||
|
||||
type DeprecatedGettableAlert struct {
|
||||
*model.Alert
|
||||
Status types.AlertStatus `json:"status"`
|
||||
@ -307,3 +313,11 @@ func receiversMatchFilter(receivers []string, filter *regexp.Regexp) bool {
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func NoDataAlert(alert *types.Alert) bool {
|
||||
if _, ok := alert.Labels[NoDataLabel]; ok {
|
||||
return true
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
@ -21,6 +21,7 @@ import (
|
||||
const (
|
||||
DefaultReceiverName string = "default-receiver"
|
||||
DefaultGroupBy string = "ruleId"
|
||||
DefaultGroupByAll string = "__all__"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -193,6 +194,20 @@ func (c *Config) SetRouteConfig(routeConfig RouteConfig) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Config) AddInhibitRules(rules []config.InhibitRule) error {
|
||||
if c.alertmanagerConfig == nil {
|
||||
return errors.New(errors.TypeInvalidInput, ErrCodeAlertmanagerConfigInvalid, "config is nil")
|
||||
}
|
||||
|
||||
c.alertmanagerConfig.InhibitRules = append(c.alertmanagerConfig.InhibitRules, rules...)
|
||||
|
||||
c.storeableConfig.Config = string(newRawFromConfig(c.alertmanagerConfig))
|
||||
c.storeableConfig.Hash = fmt.Sprintf("%x", newConfigHash(c.storeableConfig.Config))
|
||||
c.storeableConfig.UpdatedAt = time.Now()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Config) AlertmanagerConfig() *config.Config {
|
||||
return c.alertmanagerConfig
|
||||
}
|
||||
@ -304,6 +319,27 @@ func (c *Config) CreateRuleIDMatcher(ruleID string, receiverNames []string) erro
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Config) DeleteRuleIDInhibitor(ruleID string) error {
|
||||
if c.alertmanagerConfig.InhibitRules == nil {
|
||||
return nil // already nil
|
||||
}
|
||||
|
||||
var filteredRules []config.InhibitRule
|
||||
for _, inhibitor := range c.alertmanagerConfig.InhibitRules {
|
||||
sourceContainsRuleID := matcherContainsRuleID(inhibitor.SourceMatchers, ruleID)
|
||||
targetContainsRuleID := matcherContainsRuleID(inhibitor.TargetMatchers, ruleID)
|
||||
if !sourceContainsRuleID && !targetContainsRuleID {
|
||||
filteredRules = append(filteredRules, inhibitor)
|
||||
}
|
||||
}
|
||||
c.alertmanagerConfig.InhibitRules = filteredRules
|
||||
c.storeableConfig.Config = string(newRawFromConfig(c.alertmanagerConfig))
|
||||
c.storeableConfig.Hash = fmt.Sprintf("%x", newConfigHash(c.storeableConfig.Config))
|
||||
c.storeableConfig.UpdatedAt = time.Now()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Config) UpdateRuleIDMatcher(ruleID string, receiverNames []string) error {
|
||||
err := c.DeleteRuleIDMatcher(ruleID)
|
||||
if err != nil {
|
||||
@ -405,6 +441,8 @@ func init() {
|
||||
type NotificationConfig struct {
|
||||
NotificationGroup map[model.LabelName]struct{}
|
||||
Renotify ReNotificationConfig
|
||||
UsePolicy bool
|
||||
GroupByAll bool
|
||||
}
|
||||
|
||||
func (nc *NotificationConfig) DeepCopy() NotificationConfig {
|
||||
@ -415,6 +453,7 @@ func (nc *NotificationConfig) DeepCopy() NotificationConfig {
|
||||
for k, v := range nc.NotificationGroup {
|
||||
deepCopy.NotificationGroup[k] = v
|
||||
}
|
||||
deepCopy.UsePolicy = nc.UsePolicy
|
||||
return deepCopy
|
||||
}
|
||||
|
||||
@ -423,7 +462,7 @@ type ReNotificationConfig struct {
|
||||
RenotifyInterval time.Duration
|
||||
}
|
||||
|
||||
func NewNotificationConfig(groups []string, renotifyInterval time.Duration, noDataRenotifyInterval time.Duration) NotificationConfig {
|
||||
func NewNotificationConfig(groups []string, renotifyInterval time.Duration, noDataRenotifyInterval time.Duration, policy bool) NotificationConfig {
|
||||
notificationConfig := GetDefaultNotificationConfig()
|
||||
|
||||
if renotifyInterval != 0 {
|
||||
@ -435,8 +474,13 @@ func NewNotificationConfig(groups []string, renotifyInterval time.Duration, noDa
|
||||
}
|
||||
for _, group := range groups {
|
||||
notificationConfig.NotificationGroup[model.LabelName(group)] = struct{}{}
|
||||
if group == DefaultGroupByAll {
|
||||
notificationConfig.GroupByAll = true
|
||||
}
|
||||
}
|
||||
|
||||
notificationConfig.UsePolicy = policy
|
||||
|
||||
return notificationConfig
|
||||
}
|
||||
|
||||
|
||||
139
pkg/types/alertmanagertypes/expressionroute.go
Normal file
139
pkg/types/alertmanagertypes/expressionroute.go
Normal file
@ -0,0 +1,139 @@
|
||||
package alertmanagertypes
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/expr-lang/expr"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/types"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
"github.com/uptrace/bun"
|
||||
)
|
||||
|
||||
type PostableRoutePolicy struct {
|
||||
Expression string `json:"expression"`
|
||||
ExpressionKind ExpressionKind `json:"kind"`
|
||||
Channels []string `json:"channels"`
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
}
|
||||
|
||||
func (p *PostableRoutePolicy) Validate() error {
|
||||
if p.Expression == "" {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "expression is required")
|
||||
}
|
||||
|
||||
if p.Name == "" {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "name is required")
|
||||
}
|
||||
|
||||
if len(p.Channels) == 0 {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "at least one channel is required")
|
||||
}
|
||||
|
||||
// Validate channels are not empty
|
||||
for i, channel := range p.Channels {
|
||||
if channel == "" {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "channel at index %d cannot be empty", i)
|
||||
}
|
||||
}
|
||||
|
||||
if p.ExpressionKind != PolicyBasedExpression && p.ExpressionKind != RuleBasedExpression {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported expression kind: %s", p.ExpressionKind.StringValue())
|
||||
}
|
||||
|
||||
_, err := expr.Compile(p.Expression)
|
||||
if err != nil {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid expression syntax: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type GettableRoutePolicy struct {
|
||||
PostableRoutePolicy // Embedded
|
||||
|
||||
ID string `json:"id"`
|
||||
|
||||
// Audit fields
|
||||
CreatedAt *time.Time `json:"createdAt"`
|
||||
UpdatedAt *time.Time `json:"updatedAt"`
|
||||
CreatedBy *string `json:"createdBy"`
|
||||
UpdatedBy *string `json:"updatedBy"`
|
||||
}
|
||||
|
||||
type ExpressionKind struct {
|
||||
valuer.String
|
||||
}
|
||||
|
||||
var (
|
||||
RuleBasedExpression = ExpressionKind{valuer.NewString("rule")}
|
||||
PolicyBasedExpression = ExpressionKind{valuer.NewString("policy")}
|
||||
)
|
||||
|
||||
// RoutePolicy represents the database model for expression routes
|
||||
type RoutePolicy struct {
|
||||
bun.BaseModel `bun:"table:route_policy"`
|
||||
types.Identifiable
|
||||
types.TimeAuditable
|
||||
types.UserAuditable
|
||||
|
||||
Expression string `bun:"expression,type:text,notnull" json:"expression"`
|
||||
ExpressionKind ExpressionKind `bun:"kind,type:text" json:"kind"`
|
||||
|
||||
Channels []string `bun:"channels,type:jsonb" json:"channels"`
|
||||
|
||||
Name string `bun:"name,type:text" json:"name"`
|
||||
Description string `bun:"description,type:text" json:"description"`
|
||||
Enabled bool `bun:"enabled,type:boolean,default:true" json:"enabled"`
|
||||
Tags []string `bun:"tags,type:jsonb" json:"tags,omitempty"`
|
||||
|
||||
OrgID string `bun:"org_id,type:text,notnull" json:"orgId"`
|
||||
}
|
||||
|
||||
func (er *RoutePolicy) Validate() error {
|
||||
if er == nil {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "route_policy cannot be nil")
|
||||
}
|
||||
|
||||
if er.Expression == "" {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "expression is required")
|
||||
}
|
||||
|
||||
if er.Name == "" {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "name is required")
|
||||
}
|
||||
|
||||
if er.OrgID == "" {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "organization ID is required")
|
||||
}
|
||||
|
||||
if len(er.Channels) == 0 {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "at least one channel is required")
|
||||
}
|
||||
|
||||
// Validate channels are not empty
|
||||
for i, channel := range er.Channels {
|
||||
if channel == "" {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "channel at index %d cannot be empty", i)
|
||||
}
|
||||
}
|
||||
|
||||
if er.ExpressionKind != PolicyBasedExpression && er.ExpressionKind != RuleBasedExpression {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported expression kind: %s", er.ExpressionKind.StringValue())
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type RouteStore interface {
|
||||
GetByID(ctx context.Context, orgId string, id string) (*RoutePolicy, error)
|
||||
Create(ctx context.Context, route *RoutePolicy) error
|
||||
CreateBatch(ctx context.Context, routes []*RoutePolicy) error
|
||||
Delete(ctx context.Context, orgId string, id string) error
|
||||
GetAllByKind(ctx context.Context, orgID string, kind ExpressionKind) ([]*RoutePolicy, error)
|
||||
GetAllByName(ctx context.Context, orgID string, name string) ([]*RoutePolicy, error)
|
||||
DeleteRouteByName(ctx context.Context, orgID string, name string) error
|
||||
}
|
||||
@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/prometheus/common/model"
|
||||
"log/slog"
|
||||
"time"
|
||||
|
||||
@ -49,9 +50,9 @@ func NewReceiver(input string) (Receiver, error) {
|
||||
return receiverWithDefaults, nil
|
||||
}
|
||||
|
||||
func TestReceiver(ctx context.Context, receiver Receiver, receiverIntegrationsFunc ReceiverIntegrationsFunc, config *Config, tmpl *template.Template, logger *slog.Logger, alert *Alert) error {
|
||||
ctx = notify.WithGroupKey(ctx, fmt.Sprintf("%s-%s-%d", receiver.Name, alert.Labels.Fingerprint(), time.Now().Unix()))
|
||||
ctx = notify.WithGroupLabels(ctx, alert.Labels)
|
||||
func TestReceiver(ctx context.Context, receiver Receiver, receiverIntegrationsFunc ReceiverIntegrationsFunc, config *Config, tmpl *template.Template, logger *slog.Logger, lSet model.LabelSet, alert ...*Alert) error {
|
||||
ctx = notify.WithGroupKey(ctx, fmt.Sprintf("%s-%s-%d", receiver.Name, lSet.Fingerprint(), time.Now().Unix()))
|
||||
ctx = notify.WithGroupLabels(ctx, lSet)
|
||||
ctx = notify.WithReceiverName(ctx, receiver.Name)
|
||||
|
||||
// We need to create a new config with the same global and route config but empty receivers and routes
|
||||
@ -80,7 +81,7 @@ func TestReceiver(ctx context.Context, receiver Receiver, receiverIntegrationsFu
|
||||
return errors.Newf(errors.TypeNotFound, errors.CodeNotFound, "no integrations found for receiver %s", receiver.Name)
|
||||
}
|
||||
|
||||
if _, err = integrations[0].Notify(ctx, alert); err != nil {
|
||||
if _, err = integrations[0].Notify(ctx, alert...); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
@ -15,6 +15,8 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
|
||||
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
|
||||
"github.com/prometheus/alertmanager/config"
|
||||
)
|
||||
|
||||
type AlertType string
|
||||
@ -65,21 +67,95 @@ type PostableRule struct {
|
||||
}
|
||||
|
||||
type NotificationSettings struct {
|
||||
NotificationGroupBy []string `json:"notificationGroupBy,omitempty"`
|
||||
ReNotifyInterval Duration `json:"renotify,omitempty"`
|
||||
AlertStates []model.AlertState `json:"alertStates,omitempty"`
|
||||
GroupBy []string `json:"groupBy,omitempty"`
|
||||
Renotify Renotify `json:"renotify,omitempty"`
|
||||
UsePolicy bool `json:"usePolicy,omitempty"`
|
||||
}
|
||||
|
||||
type Renotify struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
ReNotifyInterval Duration `json:"interval,omitempty"`
|
||||
AlertStates []model.AlertState `json:"alertStates,omitempty"`
|
||||
}
|
||||
|
||||
func (ns *NotificationSettings) GetAlertManagerNotificationConfig() alertmanagertypes.NotificationConfig {
|
||||
var renotifyInterval Duration
|
||||
var noDataRenotifyInterval Duration
|
||||
if slices.Contains(ns.AlertStates, model.StateNoData) {
|
||||
noDataRenotifyInterval = ns.ReNotifyInterval
|
||||
var renotifyInterval time.Duration
|
||||
var noDataRenotifyInterval time.Duration
|
||||
if ns.Renotify.Enabled {
|
||||
if slices.Contains(ns.Renotify.AlertStates, model.StateNoData) {
|
||||
noDataRenotifyInterval = time.Duration(ns.Renotify.ReNotifyInterval)
|
||||
}
|
||||
if slices.Contains(ns.Renotify.AlertStates, model.StateFiring) {
|
||||
renotifyInterval = time.Duration(ns.Renotify.ReNotifyInterval)
|
||||
}
|
||||
} else {
|
||||
renotifyInterval = 8760 * time.Hour //1 year for no renotify substitute
|
||||
noDataRenotifyInterval = 8760 * time.Hour
|
||||
}
|
||||
if slices.Contains(ns.AlertStates, model.StateFiring) {
|
||||
renotifyInterval = ns.ReNotifyInterval
|
||||
return alertmanagertypes.NewNotificationConfig(ns.GroupBy, renotifyInterval, noDataRenotifyInterval, ns.UsePolicy)
|
||||
}
|
||||
|
||||
func (r *PostableRule) GetRuleRouteRequest(ruleId string) ([]*alertmanagertypes.PostableRoutePolicy, error) {
|
||||
threshold, err := r.RuleCondition.Thresholds.GetRuleThreshold()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return alertmanagertypes.NewNotificationConfig(ns.NotificationGroupBy, time.Duration(renotifyInterval), time.Duration(noDataRenotifyInterval))
|
||||
receivers := threshold.GetRuleReceivers()
|
||||
routeRequests := make([]*alertmanagertypes.PostableRoutePolicy, 0)
|
||||
for _, receiver := range receivers {
|
||||
expression := fmt.Sprintf(`%s == "%s" && %s == "%s"`, LabelThresholdName, receiver.Name, LabelRuleId, ruleId)
|
||||
routeRequests = append(routeRequests, &alertmanagertypes.PostableRoutePolicy{
|
||||
Expression: expression,
|
||||
ExpressionKind: alertmanagertypes.RuleBasedExpression,
|
||||
Channels: receiver.Channels,
|
||||
Name: ruleId,
|
||||
Description: fmt.Sprintf("Auto-generated route for rule %s", ruleId),
|
||||
Tags: []string{"auto-generated", "rule-based"},
|
||||
})
|
||||
}
|
||||
return routeRequests, nil
|
||||
}
|
||||
|
||||
func (r *PostableRule) GetInhibitRules(ruleId string) ([]config.InhibitRule, error) {
|
||||
threshold, err := r.RuleCondition.Thresholds.GetRuleThreshold()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var groups []string
|
||||
if r.NotificationSettings != nil {
|
||||
for k := range r.NotificationSettings.GetAlertManagerNotificationConfig().NotificationGroup {
|
||||
groups = append(groups, string(k))
|
||||
}
|
||||
}
|
||||
receivers := threshold.GetRuleReceivers()
|
||||
var inhibitRules []config.InhibitRule
|
||||
for i := 0; i < len(receivers)-1; i++ {
|
||||
rule := config.InhibitRule{
|
||||
SourceMatchers: config.Matchers{
|
||||
{
|
||||
Name: LabelThresholdName,
|
||||
Value: receivers[i].Name,
|
||||
},
|
||||
{
|
||||
Name: LabelRuleId,
|
||||
Value: ruleId,
|
||||
},
|
||||
},
|
||||
TargetMatchers: config.Matchers{
|
||||
{
|
||||
Name: LabelThresholdName,
|
||||
Value: receivers[i+1].Name,
|
||||
},
|
||||
{
|
||||
Name: LabelRuleId,
|
||||
Value: ruleId,
|
||||
},
|
||||
},
|
||||
Equal: groups,
|
||||
}
|
||||
inhibitRules = append(inhibitRules, rule)
|
||||
}
|
||||
return inhibitRules, nil
|
||||
}
|
||||
|
||||
func (ns *NotificationSettings) UnmarshalJSON(data []byte) error {
|
||||
@ -95,7 +171,7 @@ func (ns *NotificationSettings) UnmarshalJSON(data []byte) error {
|
||||
}
|
||||
|
||||
// Validate states after unmarshaling
|
||||
for _, state := range ns.AlertStates {
|
||||
for _, state := range ns.Renotify.AlertStates {
|
||||
if state != model.StateFiring && state != model.StateNoData {
|
||||
return fmt.Errorf("invalid alert state: %s", state)
|
||||
}
|
||||
@ -143,15 +219,25 @@ func (r *PostableRule) processRuleDefaults() error {
|
||||
Kind: BasicThresholdKind,
|
||||
Spec: BasicRuleThresholds{{
|
||||
Name: thresholdName,
|
||||
RuleUnit: r.RuleCondition.CompositeQuery.Unit,
|
||||
TargetUnit: r.RuleCondition.TargetUnit,
|
||||
TargetValue: r.RuleCondition.Target,
|
||||
MatchType: r.RuleCondition.MatchType,
|
||||
CompareOp: r.RuleCondition.CompareOp,
|
||||
Channels: r.PreferredChannels,
|
||||
}},
|
||||
}
|
||||
r.RuleCondition.Thresholds = &thresholdData
|
||||
r.Evaluation = &EvaluationEnvelope{RollingEvaluation, RollingWindow{EvalWindow: r.EvalWindow, Frequency: r.Frequency}}
|
||||
r.NotificationSettings = &NotificationSettings{
|
||||
Renotify: Renotify{
|
||||
Enabled: true,
|
||||
ReNotifyInterval: Duration(4 * time.Hour),
|
||||
AlertStates: []model.AlertState{model.StateFiring},
|
||||
},
|
||||
}
|
||||
if r.RuleCondition.AlertOnAbsent {
|
||||
r.NotificationSettings.Renotify.AlertStates = append(r.NotificationSettings.Renotify.AlertStates, model.StateNoData)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -170,6 +256,7 @@ func (r *PostableRule) MarshalJSON() ([]byte, error) {
|
||||
}
|
||||
aux.Evaluation = nil
|
||||
aux.SchemaVersion = ""
|
||||
aux.NotificationSettings = nil
|
||||
return json.Marshal(aux)
|
||||
default:
|
||||
copyStruct := *r
|
||||
@ -192,7 +279,7 @@ func isValidLabelName(ln string) bool {
|
||||
return false
|
||||
}
|
||||
for i, b := range ln {
|
||||
if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || (b >= '0' && b <= '9' && i > 0)) {
|
||||
if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == '.' || (b >= '0' && b <= '9' && i > 0)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
@ -347,6 +434,7 @@ func (g *GettableRule) MarshalJSON() ([]byte, error) {
|
||||
}
|
||||
aux.Evaluation = nil
|
||||
aux.SchemaVersion = ""
|
||||
aux.NotificationSettings = nil
|
||||
return json.Marshal(aux)
|
||||
default:
|
||||
copyStruct := *g
|
||||
|
||||
@ -2,10 +2,11 @@ package ruletypes
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
|
||||
)
|
||||
|
||||
@ -303,10 +304,6 @@ func TestParseIntoRuleSchemaVersioning(t *testing.T) {
|
||||
t.Errorf("Expected threshold name 'warning' from severity label, got '%s'", spec.Name)
|
||||
}
|
||||
|
||||
// Verify all fields are copied from RuleCondition
|
||||
if spec.RuleUnit != "percent" {
|
||||
t.Errorf("Expected RuleUnit 'percent', got '%s'", spec.RuleUnit)
|
||||
}
|
||||
if spec.TargetUnit != "%" {
|
||||
t.Errorf("Expected TargetUnit '%%', got '%s'", spec.TargetUnit)
|
||||
}
|
||||
@ -455,9 +452,6 @@ func TestParseIntoRuleSchemaVersioning(t *testing.T) {
|
||||
if spec.TargetUnit != "%" {
|
||||
t.Errorf("Expected TargetUnit '%%' (overwritten), got '%s'", spec.TargetUnit)
|
||||
}
|
||||
if spec.RuleUnit != "percent" {
|
||||
t.Errorf("Expected RuleUnit 'percent' (overwritten), got '%s'", spec.RuleUnit)
|
||||
}
|
||||
|
||||
if rule.Evaluation == nil {
|
||||
t.Fatal("Expected Evaluation to be populated")
|
||||
@ -630,9 +624,9 @@ func TestParseIntoRuleThresholdGeneration(t *testing.T) {
|
||||
vector, err := threshold.ShouldAlert(v3.Series{
|
||||
Points: []v3.Point{{Value: 0.15, Timestamp: 1000}}, // 150ms in seconds
|
||||
Labels: map[string]string{"test": "label"},
|
||||
})
|
||||
}, "")
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error in ShouldAlert: %v", err)
|
||||
t.Fatalf("Unexpected error in shouldAlert: %v", err)
|
||||
}
|
||||
|
||||
if len(vector) == 0 {
|
||||
@ -707,9 +701,9 @@ func TestParseIntoRuleMultipleThresholds(t *testing.T) {
|
||||
vector, err := threshold.ShouldAlert(v3.Series{
|
||||
Points: []v3.Point{{Value: 95.0, Timestamp: 1000}}, // 95% CPU usage
|
||||
Labels: map[string]string{"service": "test"},
|
||||
})
|
||||
}, "")
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error in ShouldAlert: %v", err)
|
||||
t.Fatalf("Unexpected error in shouldAlert: %v", err)
|
||||
}
|
||||
|
||||
assert.Equal(t, 2, len(vector))
|
||||
@ -717,9 +711,9 @@ func TestParseIntoRuleMultipleThresholds(t *testing.T) {
|
||||
vector, err = threshold.ShouldAlert(v3.Series{
|
||||
Points: []v3.Point{{Value: 75.0, Timestamp: 1000}}, // 75% CPU usage
|
||||
Labels: map[string]string{"service": "test"},
|
||||
})
|
||||
}, "")
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error in ShouldAlert: %v", err)
|
||||
t.Fatalf("Unexpected error in shouldAlert: %v", err)
|
||||
}
|
||||
|
||||
assert.Equal(t, 1, len(vector))
|
||||
|
||||
@ -2,3 +2,4 @@ package ruletypes
|
||||
|
||||
const CriticalThresholdName = "CRITICAL"
|
||||
const LabelThresholdName = "threshold.name"
|
||||
const LabelRuleId = "ruleId"
|
||||
|
||||
@ -18,6 +18,10 @@ type Sample struct {
|
||||
Metric labels.Labels
|
||||
|
||||
IsMissing bool
|
||||
|
||||
Target float64
|
||||
|
||||
TargetUnit string
|
||||
}
|
||||
|
||||
func (s Sample) String() string {
|
||||
|
||||
@ -51,23 +51,41 @@ func (r *RuleThresholdData) UnmarshalJSON(data []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type RuleReceivers struct {
|
||||
Channels []string `json:"channels"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
type RuleThreshold interface {
|
||||
ShouldAlert(series v3.Series) (Vector, error)
|
||||
ShouldAlert(series v3.Series, unit string) (Vector, error)
|
||||
GetRuleReceivers() []RuleReceivers
|
||||
}
|
||||
|
||||
type BasicRuleThreshold struct {
|
||||
Name string `json:"name"`
|
||||
TargetValue *float64 `json:"target"`
|
||||
TargetUnit string `json:"targetUnit"`
|
||||
RuleUnit string `json:"ruleUnit"`
|
||||
RecoveryTarget *float64 `json:"recoveryTarget"`
|
||||
MatchType MatchType `json:"matchType"`
|
||||
CompareOp CompareOp `json:"op"`
|
||||
SelectedQuery string `json:"selectedQuery"`
|
||||
Channels []string `json:"channels"`
|
||||
}
|
||||
|
||||
type BasicRuleThresholds []BasicRuleThreshold
|
||||
|
||||
func (r BasicRuleThresholds) GetRuleReceivers() []RuleReceivers {
|
||||
thresholds := []BasicRuleThreshold(r)
|
||||
var receiverRoutes []RuleReceivers
|
||||
sortThresholds(thresholds)
|
||||
for _, threshold := range thresholds {
|
||||
receiverRoutes = append(receiverRoutes, RuleReceivers{
|
||||
Name: threshold.Name,
|
||||
Channels: threshold.Channels,
|
||||
})
|
||||
}
|
||||
return receiverRoutes
|
||||
}
|
||||
|
||||
func (r BasicRuleThresholds) Validate() error {
|
||||
var errs []error
|
||||
for _, basicThreshold := range r {
|
||||
@ -78,13 +96,27 @@ func (r BasicRuleThresholds) Validate() error {
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
func (r BasicRuleThresholds) ShouldAlert(series v3.Series) (Vector, error) {
|
||||
func (r BasicRuleThresholds) ShouldAlert(series v3.Series, unit string) (Vector, error) {
|
||||
var resultVector Vector
|
||||
thresholds := []BasicRuleThreshold(r)
|
||||
sortThresholds(thresholds)
|
||||
for _, threshold := range thresholds {
|
||||
smpl, shouldAlert := threshold.shouldAlert(series, unit)
|
||||
if shouldAlert {
|
||||
smpl.Target = threshold.target(unit)
|
||||
smpl.TargetUnit = threshold.TargetUnit
|
||||
resultVector = append(resultVector, smpl)
|
||||
}
|
||||
}
|
||||
return resultVector, nil
|
||||
}
|
||||
|
||||
func sortThresholds(thresholds []BasicRuleThreshold) {
|
||||
sort.Slice(thresholds, func(i, j int) bool {
|
||||
compareOp := thresholds[i].GetCompareOp()
|
||||
targetI := thresholds[i].Target()
|
||||
targetJ := thresholds[j].Target()
|
||||
|
||||
compareOp := thresholds[i].getCompareOp()
|
||||
targetI := thresholds[i].target(thresholds[i].TargetUnit) //for sorting we dont need rule unit
|
||||
targetJ := thresholds[j].target(thresholds[j].TargetUnit)
|
||||
|
||||
switch compareOp {
|
||||
case ValueIsAbove, ValueAboveOrEq, ValueOutsideBounds:
|
||||
@ -98,49 +130,22 @@ func (r BasicRuleThresholds) ShouldAlert(series v3.Series) (Vector, error) {
|
||||
return targetI > targetJ
|
||||
}
|
||||
})
|
||||
for _, threshold := range thresholds {
|
||||
smpl, shouldAlert := threshold.ShouldAlert(series)
|
||||
if shouldAlert {
|
||||
resultVector = append(resultVector, smpl)
|
||||
}
|
||||
}
|
||||
return resultVector, nil
|
||||
}
|
||||
|
||||
func (b BasicRuleThreshold) GetName() string {
|
||||
return b.Name
|
||||
}
|
||||
|
||||
func (b BasicRuleThreshold) Target() float64 {
|
||||
func (b BasicRuleThreshold) target(ruleUnit string) float64 {
|
||||
unitConverter := converter.FromUnit(converter.Unit(b.TargetUnit))
|
||||
// convert the target value to the y-axis unit
|
||||
value := unitConverter.Convert(converter.Value{
|
||||
F: *b.TargetValue,
|
||||
U: converter.Unit(b.TargetUnit),
|
||||
}, converter.Unit(b.RuleUnit))
|
||||
}, converter.Unit(ruleUnit))
|
||||
return value.F
|
||||
}
|
||||
|
||||
func (b BasicRuleThreshold) GetRecoveryTarget() float64 {
|
||||
if b.RecoveryTarget == nil {
|
||||
return 0
|
||||
} else {
|
||||
return *b.RecoveryTarget
|
||||
}
|
||||
}
|
||||
|
||||
func (b BasicRuleThreshold) GetMatchType() MatchType {
|
||||
return b.MatchType
|
||||
}
|
||||
|
||||
func (b BasicRuleThreshold) GetCompareOp() CompareOp {
|
||||
func (b BasicRuleThreshold) getCompareOp() CompareOp {
|
||||
return b.CompareOp
|
||||
}
|
||||
|
||||
func (b BasicRuleThreshold) GetSelectedQuery() string {
|
||||
return b.SelectedQuery
|
||||
}
|
||||
|
||||
func (b BasicRuleThreshold) Validate() error {
|
||||
var errs []error
|
||||
if b.Name == "" {
|
||||
@ -182,7 +187,7 @@ func removeGroupinSetPoints(series v3.Series) []v3.Point {
|
||||
return result
|
||||
}
|
||||
|
||||
func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
|
||||
func (b BasicRuleThreshold) shouldAlert(series v3.Series, ruleUnit string) (Sample, bool) {
|
||||
var shouldAlert bool
|
||||
var alertSmpl Sample
|
||||
var lbls labels.Labels
|
||||
@ -191,6 +196,8 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
|
||||
lbls = append(lbls, labels.Label{Name: name, Value: value})
|
||||
}
|
||||
|
||||
target := b.target(ruleUnit)
|
||||
|
||||
lbls = append(lbls, labels.Label{Name: LabelThresholdName, Value: b.Name})
|
||||
|
||||
series.Points = removeGroupinSetPoints(series)
|
||||
@ -205,7 +212,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
|
||||
// If any sample matches the condition, the rule is firing.
|
||||
if b.CompareOp == ValueIsAbove {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value > b.Target() {
|
||||
if smpl.Value > target {
|
||||
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
break
|
||||
@ -213,7 +220,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
|
||||
}
|
||||
} else if b.CompareOp == ValueIsBelow {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value < b.Target() {
|
||||
if smpl.Value < target {
|
||||
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
break
|
||||
@ -221,7 +228,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
|
||||
}
|
||||
} else if b.CompareOp == ValueIsEq {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value == b.Target() {
|
||||
if smpl.Value == target {
|
||||
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
break
|
||||
@ -229,7 +236,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
|
||||
}
|
||||
} else if b.CompareOp == ValueIsNotEq {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value != b.Target() {
|
||||
if smpl.Value != target {
|
||||
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
break
|
||||
@ -237,7 +244,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
|
||||
}
|
||||
} else if b.CompareOp == ValueOutsideBounds {
|
||||
for _, smpl := range series.Points {
|
||||
if math.Abs(smpl.Value) >= b.Target() {
|
||||
if math.Abs(smpl.Value) >= target {
|
||||
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = true
|
||||
break
|
||||
@ -247,10 +254,10 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
|
||||
case AllTheTimes:
|
||||
// If all samples match the condition, the rule is firing.
|
||||
shouldAlert = true
|
||||
alertSmpl = Sample{Point: Point{V: b.Target()}, Metric: lbls}
|
||||
alertSmpl = Sample{Point: Point{V: target}, Metric: lbls}
|
||||
if b.CompareOp == ValueIsAbove {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value <= b.Target() {
|
||||
if smpl.Value <= target {
|
||||
shouldAlert = false
|
||||
break
|
||||
}
|
||||
@ -267,7 +274,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
|
||||
}
|
||||
} else if b.CompareOp == ValueIsBelow {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value >= b.Target() {
|
||||
if smpl.Value >= target {
|
||||
shouldAlert = false
|
||||
break
|
||||
}
|
||||
@ -283,14 +290,14 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
|
||||
}
|
||||
} else if b.CompareOp == ValueIsEq {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value != b.Target() {
|
||||
if smpl.Value != target {
|
||||
shouldAlert = false
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if b.CompareOp == ValueIsNotEq {
|
||||
for _, smpl := range series.Points {
|
||||
if smpl.Value == b.Target() {
|
||||
if smpl.Value == target {
|
||||
shouldAlert = false
|
||||
break
|
||||
}
|
||||
@ -306,7 +313,7 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
|
||||
}
|
||||
} else if b.CompareOp == ValueOutsideBounds {
|
||||
for _, smpl := range series.Points {
|
||||
if math.Abs(smpl.Value) < b.Target() {
|
||||
if math.Abs(smpl.Value) < target {
|
||||
alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls}
|
||||
shouldAlert = false
|
||||
break
|
||||
@ -326,23 +333,23 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
|
||||
avg := sum / count
|
||||
alertSmpl = Sample{Point: Point{V: avg}, Metric: lbls}
|
||||
if b.CompareOp == ValueIsAbove {
|
||||
if avg > b.Target() {
|
||||
if avg > target {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if b.CompareOp == ValueIsBelow {
|
||||
if avg < b.Target() {
|
||||
if avg < target {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if b.CompareOp == ValueIsEq {
|
||||
if avg == b.Target() {
|
||||
if avg == target {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if b.CompareOp == ValueIsNotEq {
|
||||
if avg != b.Target() {
|
||||
if avg != target {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if b.CompareOp == ValueOutsideBounds {
|
||||
if math.Abs(avg) >= b.Target() {
|
||||
if math.Abs(avg) >= target {
|
||||
shouldAlert = true
|
||||
}
|
||||
}
|
||||
@ -358,23 +365,23 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
|
||||
}
|
||||
alertSmpl = Sample{Point: Point{V: sum}, Metric: lbls}
|
||||
if b.CompareOp == ValueIsAbove {
|
||||
if sum > b.Target() {
|
||||
if sum > target {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if b.CompareOp == ValueIsBelow {
|
||||
if sum < b.Target() {
|
||||
if sum < target {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if b.CompareOp == ValueIsEq {
|
||||
if sum == b.Target() {
|
||||
if sum == target {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if b.CompareOp == ValueIsNotEq {
|
||||
if sum != b.Target() {
|
||||
if sum != target {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if b.CompareOp == ValueOutsideBounds {
|
||||
if math.Abs(sum) >= b.Target() {
|
||||
if math.Abs(sum) >= target {
|
||||
shouldAlert = true
|
||||
}
|
||||
}
|
||||
@ -383,19 +390,19 @@ func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) {
|
||||
shouldAlert = false
|
||||
alertSmpl = Sample{Point: Point{V: series.Points[len(series.Points)-1].Value}, Metric: lbls}
|
||||
if b.CompareOp == ValueIsAbove {
|
||||
if series.Points[len(series.Points)-1].Value > b.Target() {
|
||||
if series.Points[len(series.Points)-1].Value > target {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if b.CompareOp == ValueIsBelow {
|
||||
if series.Points[len(series.Points)-1].Value < b.Target() {
|
||||
if series.Points[len(series.Points)-1].Value < target {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if b.CompareOp == ValueIsEq {
|
||||
if series.Points[len(series.Points)-1].Value == b.Target() {
|
||||
if series.Points[len(series.Points)-1].Value == target {
|
||||
shouldAlert = true
|
||||
}
|
||||
} else if b.CompareOp == ValueIsNotEq {
|
||||
if series.Points[len(series.Points)-1].Value != b.Target() {
|
||||
if series.Points[len(series.Points)-1].Value != target {
|
||||
shouldAlert = true
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user