diff --git a/ee/query-service/rules/anomaly.go b/ee/query-service/rules/anomaly.go index 3fbf1e32b1f2..ff0aa40be8d8 100644 --- a/ee/query-service/rules/anomaly.go +++ b/ee/query-service/rules/anomaly.go @@ -35,7 +35,6 @@ import ( anomalyV2 "github.com/SigNoz/signoz/ee/anomaly" qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" - yaml "gopkg.in/yaml.v2" ) const ( @@ -253,10 +252,17 @@ func (r *AnomalyRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID, t r.logger.InfoContext(ctx, "anomaly scores", "scores", string(scoresJSON)) for _, series := range queryResult.AnomalyScores { - smpl, shouldAlert := r.ShouldAlert(*series) - if shouldAlert { - resultVector = append(resultVector, smpl) + if r.Condition() != nil && r.Condition().RequireMinPoints { + if len(series.Points) < r.Condition().RequiredNumPoints { + r.logger.InfoContext(ctx, "not enough data points to evaluate series, skipping", "ruleid", r.ID(), "numPoints", len(series.Points), "requiredPoints", r.Condition().RequiredNumPoints) + continue + } } + results, err := r.Threshold.ShouldAlert(*series) + if err != nil { + return nil, err + } + resultVector = append(resultVector, results...) } return resultVector, nil } @@ -296,10 +302,17 @@ func (r *AnomalyRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID, r.logger.InfoContext(ctx, "anomaly scores", "scores", string(scoresJSON)) for _, series := range queryResult.AnomalyScores { - smpl, shouldAlert := r.ShouldAlert(*series) - if shouldAlert { - resultVector = append(resultVector, smpl) + if r.Condition().RequireMinPoints { + if len(series.Points) < r.Condition().RequiredNumPoints { + r.logger.InfoContext(ctx, "not enough data points to evaluate series, skipping", "ruleid", r.ID(), "numPoints", len(series.Points), "requiredPoints", r.Condition().RequiredNumPoints) + continue + } } + results, err := r.Threshold.ShouldAlert(*series) + if err != nil { + return nil, err + } + resultVector = append(resultVector, results...) } return resultVector, nil } @@ -499,7 +512,7 @@ func (r *AnomalyRule) String() string { PreferredChannels: r.PreferredChannels(), } - byt, err := yaml.Marshal(ar) + byt, err := json.Marshal(ar) if err != nil { return fmt.Sprintf("error marshaling alerting rule: %s", err.Error()) } diff --git a/pkg/contextlinks/alert_link_visitor.go b/pkg/contextlinks/alert_link_visitor.go index b1166ca0519d..09714553e9e7 100644 --- a/pkg/contextlinks/alert_link_visitor.go +++ b/pkg/contextlinks/alert_link_visitor.go @@ -46,6 +46,11 @@ func PrepareFilterExpression(labels map[string]string, whereClause string, group return "" } + //delete predefined alert labels + for _, label := range PredefinedAlertLabels { + delete(labels, label) + } + groupBySet := make(map[string]struct{}) for _, item := range groupByItems { groupBySet[item.Name] = struct{}{} diff --git a/pkg/contextlinks/links.go b/pkg/contextlinks/links.go index 8412b4757c00..8497cb55de9b 100644 --- a/pkg/contextlinks/links.go +++ b/pkg/contextlinks/links.go @@ -150,6 +150,11 @@ func PrepareLinksToLogs(start, end time.Time, filterItems []v3.FilterItem) strin func PrepareFilters(labels map[string]string, whereClauseItems []v3.FilterItem, groupByItems []v3.AttributeKey, keys map[string]v3.AttributeKey) []v3.FilterItem { filterItems := make([]v3.FilterItem, 0) + //delete predefined alert labels + for _, label := range PredefinedAlertLabels { + delete(labels, label) + } + added := make(map[string]struct{}) for _, item := range whereClauseItems { diff --git a/pkg/contextlinks/types.go b/pkg/contextlinks/types.go index bfc1ab6abbe0..9edf92a5e541 100644 --- a/pkg/contextlinks/types.go +++ b/pkg/contextlinks/types.go @@ -1,6 +1,9 @@ package contextlinks -import v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3" +import ( + v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3" + "github.com/SigNoz/signoz/pkg/types/ruletypes" +) // TODO(srikanthccv): Fix the URL management type URLShareableTimeRange struct { @@ -38,3 +41,5 @@ type URLShareableOptions struct { Format string `json:"format"` SelectColumns []v3.AttributeKey `json:"selectColumns"` } + +var PredefinedAlertLabels = []string{ruletypes.LabelThresholdName} diff --git a/pkg/query-service/rules/base_rule.go b/pkg/query-service/rules/base_rule.go index 40952e4a5059..be14b9133f9b 100644 --- a/pkg/query-service/rules/base_rule.go +++ b/pkg/query-service/rules/base_rule.go @@ -32,6 +32,8 @@ type BaseRule struct { typ ruletypes.AlertType ruleCondition *ruletypes.RuleCondition + + Threshold ruletypes.RuleThreshold // evalWindow is the time window used for evaluating the rule // i.e each time we lookback from the current time, we look at data for the last // evalWindow duration @@ -123,6 +125,10 @@ func NewBaseRule(id string, orgID valuer.UUID, p *ruletypes.PostableRule, reader if p.RuleCondition == nil || !p.RuleCondition.IsValid() { return nil, fmt.Errorf("invalid rule condition") } + threshold, err := p.RuleCondition.Thresholds.GetRuleThreshold() + if err != nil { + return nil, err + } baseRule := &BaseRule{ id: id, @@ -139,6 +145,7 @@ func NewBaseRule(id string, orgID valuer.UUID, p *ruletypes.PostableRule, reader Active: map[uint64]*ruletypes.Alert{}, reader: reader, TemporalityMap: make(map[string]map[v3.Temporality]bool), + Threshold: threshold, } if baseRule.evalWindow == 0 { diff --git a/pkg/query-service/rules/manager.go b/pkg/query-service/rules/manager.go index 341bac74291d..f80686682687 100644 --- a/pkg/query-service/rules/manager.go +++ b/pkg/query-service/rules/manager.go @@ -266,30 +266,17 @@ func (m *Manager) initiate(ctx context.Context) error { for _, rec := range storedRules { taskName := fmt.Sprintf("%s-groupname", rec.ID.StringValue()) - parsedRule, err := ruletypes.ParsePostableRule([]byte(rec.Data)) + parsedRule := ruletypes.PostableRule{} + err := json.Unmarshal([]byte(rec.Data), &parsedRule) if err != nil { - if errors.Is(err, ruletypes.ErrFailedToParseJSON) { - zap.L().Info("failed to load rule in json format, trying yaml now:", zap.String("name", taskName)) - - // see if rule is stored in yaml format - parsedRule, err = ruletypes.ParsePostableRuleWithKind([]byte(rec.Data), ruletypes.RuleDataKindYaml) - - if err != nil { - zap.L().Error("failed to parse and initialize yaml rule", zap.String("name", taskName), zap.Error(err)) - // just one rule is being parsed so expect just one error - loadErrors = append(loadErrors, err) - continue - } - } else { - zap.L().Error("failed to parse and initialize rule", zap.String("name", taskName), zap.Error(err)) - // just one rule is being parsed so expect just one error - loadErrors = append(loadErrors, err) - continue - } + zap.L().Info("failed to load rule in json format", zap.String("name", taskName)) + loadErrors = append(loadErrors, err) + continue } + if !parsedRule.Disabled { - err := m.addTask(ctx, org.ID, parsedRule, taskName) + err := m.addTask(ctx, org.ID, &parsedRule, taskName) if err != nil { zap.L().Error("failed to load the rule definition", zap.String("name", taskName), zap.Error(err)) } @@ -335,8 +322,8 @@ func (m *Manager) EditRule(ctx context.Context, ruleStr string, id valuer.UUID) if err != nil { return err } - - parsedRule, err := ruletypes.ParsePostableRule([]byte(ruleStr)) + parsedRule := ruletypes.PostableRule{} + err = json.Unmarshal([]byte(ruleStr), &parsedRule) if err != nil { return err } @@ -380,7 +367,7 @@ func (m *Manager) EditRule(ctx context.Context, ruleStr string, id valuer.UUID) return err } - err = m.syncRuleStateWithTask(ctx, orgID, prepareTaskName(existingRule.ID.StringValue()), parsedRule) + err = m.syncRuleStateWithTask(ctx, orgID, prepareTaskName(existingRule.ID.StringValue()), &parsedRule) if err != nil { return err } @@ -513,8 +500,8 @@ func (m *Manager) CreateRule(ctx context.Context, ruleStr string) (*ruletypes.Ge if err != nil { return nil, err } - - parsedRule, err := ruletypes.ParsePostableRule([]byte(ruleStr)) + parsedRule := ruletypes.PostableRule{} + err = json.Unmarshal([]byte(ruleStr), &parsedRule) if err != nil { return nil, err } @@ -567,7 +554,7 @@ func (m *Manager) CreateRule(ctx context.Context, ruleStr string) (*ruletypes.Ge } taskName := prepareTaskName(id.StringValue()) - if err := m.addTask(ctx, orgID, parsedRule, taskName); err != nil { + if err := m.addTask(ctx, orgID, &parsedRule, taskName); err != nil { return err } @@ -579,7 +566,7 @@ func (m *Manager) CreateRule(ctx context.Context, ruleStr string) (*ruletypes.Ge return &ruletypes.GettableRule{ Id: id.StringValue(), - PostableRule: *parsedRule, + PostableRule: parsedRule, }, nil } @@ -797,8 +784,9 @@ func (m *Manager) ListRuleStates(ctx context.Context) (*ruletypes.GettableRules, for _, s := range storedRules { - ruleResponse := &ruletypes.GettableRule{} - if err := json.Unmarshal([]byte(s.Data), ruleResponse); err != nil { // Parse []byte to go struct pointer + ruleResponse := ruletypes.GettableRule{} + err = json.Unmarshal([]byte(s.Data), &ruleResponse) + if err != nil { zap.L().Error("failed to unmarshal rule from db", zap.String("id", s.ID.StringValue()), zap.Error(err)) continue } @@ -816,7 +804,7 @@ func (m *Manager) ListRuleStates(ctx context.Context) (*ruletypes.GettableRules, ruleResponse.CreatedBy = &s.CreatedBy ruleResponse.UpdatedAt = &s.UpdatedAt ruleResponse.UpdatedBy = &s.UpdatedBy - resp = append(resp, ruleResponse) + resp = append(resp, &ruleResponse) } return &ruletypes.GettableRules{Rules: resp}, nil @@ -827,8 +815,10 @@ func (m *Manager) GetRule(ctx context.Context, id valuer.UUID) (*ruletypes.Getta if err != nil { return nil, err } - r := &ruletypes.GettableRule{} - if err := json.Unmarshal([]byte(s.Data), r); err != nil { + r := ruletypes.GettableRule{} + err = json.Unmarshal([]byte(s.Data), &r) + if err != nil { + zap.L().Error("failed to unmarshal rule from db", zap.String("id", s.ID.StringValue()), zap.Error(err)) return nil, err } r.Id = id.StringValue() @@ -844,7 +834,7 @@ func (m *Manager) GetRule(ctx context.Context, id valuer.UUID) (*ruletypes.Getta r.UpdatedAt = &s.UpdatedAt r.UpdatedBy = &s.UpdatedBy - return r, nil + return &r, nil } // syncRuleStateWithTask ensures that the state of a stored rule matches @@ -902,20 +892,14 @@ func (m *Manager) PatchRule(ctx context.Context, ruleStr string, id valuer.UUID) } // storedRule holds the current stored rule from DB - storedRule := ruletypes.PostableRule{} - if err := json.Unmarshal([]byte(storedJSON.Data), &storedRule); err != nil { + patchedRule := ruletypes.PostableRule{} + if err := json.Unmarshal([]byte(ruleStr), &patchedRule); err != nil { zap.L().Error("failed to unmarshal stored rule with given id", zap.String("id", id.StringValue()), zap.Error(err)) return nil, err } - // patchedRule is combo of stored rule and patch received in the request - patchedRule, err := ruletypes.ParseIntoRule(storedRule, []byte(ruleStr), "json") - if err != nil { - return nil, err - } - // deploy or un-deploy task according to patched (new) rule state - if err := m.syncRuleStateWithTask(ctx, orgID, taskName, patchedRule); err != nil { + if err := m.syncRuleStateWithTask(ctx, orgID, taskName, &patchedRule); err != nil { zap.L().Error("failed to sync stored rule state with the task", zap.String("taskName", taskName), zap.Error(err)) return nil, err } @@ -933,7 +917,7 @@ func (m *Manager) PatchRule(ctx context.Context, ruleStr string, id valuer.UUID) err = m.ruleStore.EditRule(ctx, storedJSON, func(ctx context.Context) error { return nil }) if err != nil { - if err := m.syncRuleStateWithTask(ctx, orgID, taskName, &storedRule); err != nil { + if err := m.syncRuleStateWithTask(ctx, orgID, taskName, &patchedRule); err != nil { zap.L().Error("failed to restore rule after patch failure", zap.String("taskName", taskName), zap.Error(err)) } return nil, err @@ -942,7 +926,7 @@ func (m *Manager) PatchRule(ctx context.Context, ruleStr string, id valuer.UUID) // prepare http response response := ruletypes.GettableRule{ Id: id.StringValue(), - PostableRule: *patchedRule, + PostableRule: patchedRule, } // fetch state of rule from memory @@ -959,15 +943,14 @@ func (m *Manager) PatchRule(ctx context.Context, ruleStr string, id valuer.UUID) // TestNotification prepares a dummy rule for given rule parameters and // sends a test notification. returns alert count and error (if any) func (m *Manager) TestNotification(ctx context.Context, orgID valuer.UUID, ruleStr string) (int, *model.ApiError) { - - parsedRule, err := ruletypes.ParsePostableRule([]byte(ruleStr)) - + parsedRule := ruletypes.PostableRule{} + err := json.Unmarshal([]byte(ruleStr), &parsedRule) if err != nil { return 0, model.BadRequest(err) } alertCount, apiErr := m.prepareTestRuleFunc(PrepareTestRuleOptions{ - Rule: parsedRule, + Rule: &parsedRule, RuleStore: m.ruleStore, MaintenanceStore: m.maintenanceStore, Logger: m.logger, @@ -1001,15 +984,15 @@ func (m *Manager) GetAlertDetailsForMetricNames(ctx context.Context, metricNames for _, storedRule := range rules { var rule ruletypes.GettableRule - if err := json.Unmarshal([]byte(storedRule.Data), &rule); err != nil { - zap.L().Error("Invalid rule data", zap.Error(err)) + err = json.Unmarshal([]byte(storedRule.Data), &rule) + if err != nil { + zap.L().Error("failed to unmarshal rule from db", zap.String("id", storedRule.ID.StringValue()), zap.Error(err)) continue } if rule.AlertType != ruletypes.AlertTypeMetric || rule.RuleCondition == nil || rule.RuleCondition.CompositeQuery == nil { continue } - rule.Id = storedRule.ID.StringValue() rule.CreatedAt = &storedRule.CreatedAt rule.CreatedBy = &storedRule.CreatedBy diff --git a/pkg/query-service/rules/prom_rule.go b/pkg/query-service/rules/prom_rule.go index 90c9d4619c8e..ea07f85e04b7 100644 --- a/pkg/query-service/rules/prom_rule.go +++ b/pkg/query-service/rules/prom_rule.go @@ -16,12 +16,10 @@ import ( qslabels "github.com/SigNoz/signoz/pkg/query-service/utils/labels" "github.com/SigNoz/signoz/pkg/query-service/utils/times" "github.com/SigNoz/signoz/pkg/query-service/utils/timestamp" + qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes" "github.com/SigNoz/signoz/pkg/valuer" "github.com/prometheus/prometheus/promql" - yaml "gopkg.in/yaml.v2" - - qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" ) type PromRule struct { @@ -151,84 +149,87 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error) var alerts = make(map[uint64]*ruletypes.Alert, len(res)) for _, series := range res { - l := make(map[string]string, len(series.Metric)) - for _, lbl := range series.Metric { - l[lbl.Name] = lbl.Value - } if len(series.Floats) == 0 { continue } - alertSmpl, shouldAlert := r.ShouldAlert(toCommonSeries(series)) - if !shouldAlert { - continue - } - r.logger.DebugContext(ctx, "alerting for series", "rule_name", r.Name(), "series", series) - - threshold := valueFormatter.Format(r.targetVal(), r.Unit()) - - tmplData := ruletypes.AlertTemplateData(l, valueFormatter.Format(alertSmpl.V, r.Unit()), threshold) - // Inject some convenience variables that are easier to remember for users - // who are not used to Go's templating system. - defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}" - - expand := func(text string) string { - - tmpl := ruletypes.NewTemplateExpander( - ctx, - defs+text, - "__alert_"+r.Name(), - tmplData, - times.Time(timestamp.FromTime(ts)), - nil, - ) - result, err := tmpl.Expand() - if err != nil { - result = fmt.Sprintf("", err) - r.logger.WarnContext(ctx, "Expanding alert template failed", "rule_name", r.Name(), "error", err, "data", tmplData) - } - return result - } - - lb := qslabels.NewBuilder(alertSmpl.Metric).Del(qslabels.MetricNameLabel) - resultLabels := qslabels.NewBuilder(alertSmpl.Metric).Del(qslabels.MetricNameLabel).Labels() - - for name, value := range r.labels.Map() { - lb.Set(name, expand(value)) - } - - lb.Set(qslabels.AlertNameLabel, r.Name()) - lb.Set(qslabels.AlertRuleIdLabel, r.ID()) - lb.Set(qslabels.RuleSourceLabel, r.GeneratorURL()) - - annotations := make(qslabels.Labels, 0, len(r.annotations.Map())) - for name, value := range r.annotations.Map() { - annotations = append(annotations, qslabels.Label{Name: name, Value: expand(value)}) - } - - lbs := lb.Labels() - h := lbs.Hash() - resultFPs[h] = struct{}{} - - if _, ok := alerts[h]; ok { - err = fmt.Errorf("vector contains metrics with the same labelset after applying alert labels") - // We have already acquired the lock above hence using SetHealth and - // SetLastError will deadlock. - r.health = ruletypes.HealthBad - r.lastError = err + results, err := r.Threshold.ShouldAlert(toCommonSeries(series)) + if err != nil { return nil, err } - alerts[h] = &ruletypes.Alert{ - Labels: lbs, - QueryResultLables: resultLabels, - Annotations: annotations, - ActiveAt: ts, - State: model.StatePending, - Value: alertSmpl.V, - GeneratorURL: r.GeneratorURL(), - Receivers: r.preferredChannels, + for _, result := range results { + l := make(map[string]string, len(series.Metric)) + for _, lbl := range series.Metric { + l[lbl.Name] = lbl.Value + } + r.logger.DebugContext(ctx, "alerting for series", "rule_name", r.Name(), "series", series) + + threshold := valueFormatter.Format(r.targetVal(), r.Unit()) + + tmplData := ruletypes.AlertTemplateData(l, valueFormatter.Format(result.V, r.Unit()), threshold) + // Inject some convenience variables that are easier to remember for users + // who are not used to Go's templating system. + defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}" + + expand := func(text string) string { + + tmpl := ruletypes.NewTemplateExpander( + ctx, + defs+text, + "__alert_"+r.Name(), + tmplData, + times.Time(timestamp.FromTime(ts)), + nil, + ) + result, err := tmpl.Expand() + if err != nil { + result = fmt.Sprintf("", err) + r.logger.WarnContext(ctx, "Expanding alert template failed", "rule_name", r.Name(), "error", err, "data", tmplData) + } + return result + } + + lb := qslabels.NewBuilder(result.Metric).Del(qslabels.MetricNameLabel) + resultLabels := qslabels.NewBuilder(result.Metric).Del(qslabels.MetricNameLabel).Labels() + + for name, value := range r.labels.Map() { + lb.Set(name, expand(value)) + } + + lb.Set(qslabels.AlertNameLabel, r.Name()) + lb.Set(qslabels.AlertRuleIdLabel, r.ID()) + lb.Set(qslabels.RuleSourceLabel, r.GeneratorURL()) + + annotations := make(qslabels.Labels, 0, len(r.annotations.Map())) + for name, value := range r.annotations.Map() { + annotations = append(annotations, qslabels.Label{Name: name, Value: expand(value)}) + } + + lbs := lb.Labels() + h := lbs.Hash() + resultFPs[h] = struct{}{} + + if _, ok := alerts[h]; ok { + err = fmt.Errorf("vector contains metrics with the same labelset after applying alert labels") + // We have already acquired the lock above hence using SetHealth and + // SetLastError will deadlock. + r.health = ruletypes.HealthBad + r.lastError = err + return nil, err + } + + alerts[h] = &ruletypes.Alert{ + Labels: lbs, + QueryResultLables: resultLabels, + Annotations: annotations, + ActiveAt: ts, + State: model.StatePending, + Value: result.V, + GeneratorURL: r.GeneratorURL(), + Receivers: r.preferredChannels, + } } } @@ -327,7 +328,7 @@ func (r *PromRule) String() string { PreferredChannels: r.preferredChannels, } - byt, err := yaml.Marshal(ar) + byt, err := json.Marshal(ar) if err != nil { return fmt.Sprintf("error marshaling alerting rule: %s", err.Error()) } diff --git a/pkg/query-service/rules/promrule_test.go b/pkg/query-service/rules/promrule_test.go index 1ae753634b9e..a4e0b94d06a9 100644 --- a/pkg/query-service/rules/promrule_test.go +++ b/pkg/query-service/rules/promrule_test.go @@ -12,6 +12,17 @@ import ( "github.com/stretchr/testify/assert" ) +func getVectorValues(vectors []ruletypes.Sample) []float64 { + if len(vectors) == 0 { + return []float64{} // Return empty slice instead of nil + } + var values []float64 + for _, v := range vectors { + values = append(values, v.V) + } + return values +} + func TestPromRuleShouldAlert(t *testing.T) { postableRule := ruletypes.PostableRule{ AlertName: "Test Rule", @@ -32,12 +43,13 @@ func TestPromRuleShouldAlert(t *testing.T) { } cases := []struct { - values pql.Series - expectAlert bool - compareOp string - matchType string - target float64 - expectedAlertSample v3.Point + values pql.Series + expectAlert bool + compareOp string + matchType string + target float64 + expectedAlertSample v3.Point + expectedVectorValues []float64 // Expected values in result vector }{ // Test cases for Equals Always { @@ -50,11 +62,12 @@ func TestPromRuleShouldAlert(t *testing.T) { {F: 0.0}, }, }, - expectAlert: true, - compareOp: "3", // Equals - matchType: "2", // Always - target: 0.0, - expectedAlertSample: v3.Point{Value: 0.0}, + expectAlert: true, + compareOp: "3", // Equals + matchType: "2", // Always + target: 0.0, + expectedAlertSample: v3.Point{Value: 0.0}, + expectedVectorValues: []float64{0.0}, }, { values: pql.Series{ @@ -66,10 +79,11 @@ func TestPromRuleShouldAlert(t *testing.T) { {F: 1.0}, }, }, - expectAlert: false, - compareOp: "3", // Equals - matchType: "2", // Always - target: 0.0, + expectAlert: false, + compareOp: "3", // Equals + matchType: "2", // Always + target: 0.0, + expectedVectorValues: []float64{}, }, { values: pql.Series{ @@ -81,10 +95,11 @@ func TestPromRuleShouldAlert(t *testing.T) { {F: 1.0}, }, }, - expectAlert: false, - compareOp: "3", // Equals - matchType: "2", // Always - target: 0.0, + expectAlert: false, + compareOp: "3", // Equals + matchType: "2", // Always + target: 0.0, + expectedVectorValues: []float64{}, }, { values: pql.Series{ @@ -112,11 +127,12 @@ func TestPromRuleShouldAlert(t *testing.T) { {F: 0.0}, }, }, - expectAlert: true, - compareOp: "3", // Equals - matchType: "1", // Once - target: 0.0, - expectedAlertSample: v3.Point{Value: 0.0}, + expectAlert: true, + compareOp: "3", // Equals + matchType: "1", // Once + target: 0.0, + expectedAlertSample: v3.Point{Value: 0.0}, + expectedVectorValues: []float64{0.0}, }, { values: pql.Series{ @@ -160,10 +176,11 @@ func TestPromRuleShouldAlert(t *testing.T) { {F: 1.0}, }, }, - expectAlert: false, - compareOp: "3", // Equals - matchType: "1", // Once - target: 0.0, + expectAlert: false, + compareOp: "3", // Equals + matchType: "1", // Once + target: 0.0, + expectedVectorValues: []float64{}, }, // Test cases for Greater Than Always { @@ -176,11 +193,12 @@ func TestPromRuleShouldAlert(t *testing.T) { {F: 2.0}, }, }, - expectAlert: true, - compareOp: "1", // Greater Than - matchType: "2", // Always - target: 1.5, - expectedAlertSample: v3.Point{Value: 2.0}, + expectAlert: true, + compareOp: "1", // Greater Than + matchType: "2", // Always + target: 1.5, + expectedAlertSample: v3.Point{Value: 2.0}, + expectedVectorValues: []float64{2.0}, }, { values: pql.Series{ @@ -240,11 +258,12 @@ func TestPromRuleShouldAlert(t *testing.T) { {F: 2.0}, }, }, - expectAlert: true, - compareOp: "1", // Greater Than - matchType: "1", // Once - target: 4.5, - expectedAlertSample: v3.Point{Value: 10.0}, + expectAlert: true, + compareOp: "1", // Greater Than + matchType: "1", // Once + target: 4.5, + expectedAlertSample: v3.Point{Value: 10.0}, + expectedVectorValues: []float64{10.0}, }, { values: pql.Series{ @@ -659,13 +678,49 @@ func TestPromRuleShouldAlert(t *testing.T) { postableRule.RuleCondition.CompareOp = ruletypes.CompareOp(c.compareOp) postableRule.RuleCondition.MatchType = ruletypes.MatchType(c.matchType) postableRule.RuleCondition.Target = &c.target + postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + TargetValue: &c.target, + MatchType: ruletypes.MatchType(c.matchType), + CompareOp: ruletypes.CompareOp(c.compareOp), + }, + }, + } rule, err := NewPromRule("69", valuer.GenerateUUID(), &postableRule, logger, nil, nil) if err != nil { assert.NoError(t, err) } - _, shoulAlert := rule.ShouldAlert(toCommonSeries(c.values)) - assert.Equal(t, c.expectAlert, shoulAlert, "Test case %d", idx) + resultVectors, err := rule.Threshold.ShouldAlert(toCommonSeries(c.values)) + assert.NoError(t, err) + + // Compare full result vector with expected vector + actualValues := getVectorValues(resultVectors) + if c.expectedVectorValues != nil { + // If expected vector values are specified, compare them exactly + assert.Equal(t, c.expectedVectorValues, actualValues, "Result vector values don't match expected for case %d", idx) + } else { + // Fallback to the old logic for cases without expectedVectorValues + if c.expectAlert { + assert.NotEmpty(t, resultVectors, "Expected alert but got no result vectors for case %d", idx) + // Verify at least one of the result vectors matches the expected alert sample + if len(resultVectors) > 0 { + found := false + for _, sample := range resultVectors { + if sample.V == c.expectedAlertSample.Value { + found = true + break + } + } + assert.True(t, found, "Expected alert sample value %.2f not found in result vectors for case %d. Got values: %v", c.expectedAlertSample.Value, idx, actualValues) + } + } else { + assert.Empty(t, resultVectors, "Expected no alert but got result vectors for case %d", idx) + } + } + } } diff --git a/pkg/query-service/rules/threshold_rule.go b/pkg/query-service/rules/threshold_rule.go index 6828d3221105..b1ebb069056b 100644 --- a/pkg/query-service/rules/threshold_rule.go +++ b/pkg/query-service/rules/threshold_rule.go @@ -38,8 +38,6 @@ import ( querierV5 "github.com/SigNoz/signoz/pkg/querier" qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" - - yaml "gopkg.in/yaml.v2" ) type ThresholdRule struct { @@ -484,10 +482,17 @@ func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID, } for _, series := range queryResult.Series { - smpl, shouldAlert := r.ShouldAlert(*series) - if shouldAlert { - resultVector = append(resultVector, smpl) + if r.Condition() != nil && r.Condition().RequireMinPoints { + if len(series.Points) < r.ruleCondition.RequiredNumPoints { + r.logger.InfoContext(ctx, "not enough data points to evaluate series, skipping", "ruleid", r.ID(), "numPoints", len(series.Points), "requiredPoints", r.Condition().RequiredNumPoints) + continue + } } + resultSeries, err := r.Threshold.ShouldAlert(*series) + if err != nil { + return nil, err + } + resultVector = append(resultVector, resultSeries...) } return resultVector, nil @@ -554,10 +559,17 @@ func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUI } for _, series := range queryResult.Series { - smpl, shouldAlert := r.ShouldAlert(*series) - if shouldAlert { - resultVector = append(resultVector, smpl) + if r.Condition() != nil && r.Condition().RequireMinPoints { + if len(series.Points) < r.Condition().RequiredNumPoints { + r.logger.InfoContext(ctx, "not enough data points to evaluate series, skipping", "ruleid", r.ID(), "numPoints", len(series.Points), "requiredPoints", r.Condition().RequiredNumPoints) + continue + } } + resultSeries, err := r.Threshold.ShouldAlert(*series) + if err != nil { + return nil, err + } + resultVector = append(resultVector, resultSeries...) } return resultVector, nil @@ -597,6 +609,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er } value := valueFormatter.Format(smpl.V, r.Unit()) + //todo(aniket): handle different threshold threshold := valueFormatter.Format(r.targetVal(), r.Unit()) r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold) @@ -777,7 +790,7 @@ func (r *ThresholdRule) String() string { PreferredChannels: r.preferredChannels, } - byt, err := yaml.Marshal(ar) + byt, err := json.Marshal(ar) if err != nil { return fmt.Sprintf("error marshaling alerting rule: %s", err.Error()) } diff --git a/pkg/query-service/rules/threshold_rule_test.go b/pkg/query-service/rules/threshold_rule_test.go index 94b4f7c5c658..d6bc92c8ab44 100644 --- a/pkg/query-service/rules/threshold_rule_test.go +++ b/pkg/query-service/rules/threshold_rule_test.go @@ -801,9 +801,16 @@ func TestThresholdRuleShouldAlert(t *testing.T) { } for idx, c := range cases { - postableRule.RuleCondition.CompareOp = ruletypes.CompareOp(c.compareOp) - postableRule.RuleCondition.MatchType = ruletypes.MatchType(c.matchType) - postableRule.RuleCondition.Target = &c.target + postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + TargetValue: &c.target, + MatchType: ruletypes.MatchType(c.matchType), + CompareOp: ruletypes.CompareOp(c.compareOp), + }, + }, + } rule, err := NewThresholdRule("69", valuer.GenerateUUID(), &postableRule, nil, nil, logger, WithEvalDelay(2*time.Minute)) if err != nil { @@ -815,10 +822,24 @@ func TestThresholdRuleShouldAlert(t *testing.T) { values.Points[i].Timestamp = time.Now().UnixMilli() } - smpl, shoulAlert := rule.ShouldAlert(c.values) - assert.Equal(t, c.expectAlert, shoulAlert, "Test case %d", idx) - if shoulAlert { - assert.Equal(t, c.expectedAlertSample.Value, smpl.V, "Test case %d", idx) + resultVectors, err := rule.Threshold.ShouldAlert(c.values) + assert.NoError(t, err, "Test case %d", idx) + + // Compare result vectors with expected behavior + if c.expectAlert { + assert.NotEmpty(t, resultVectors, "Expected alert but got no result vectors for case %d", idx) + if len(resultVectors) > 0 { + found := false + for _, sample := range resultVectors { + if sample.V == c.expectedAlertSample.Value { + found = true + break + } + } + assert.True(t, found, "Expected alert sample value %.2f not found in result vectors for case %d. Got values: %v", c.expectedAlertSample.Value, idx, getVectorValues(resultVectors)) + } + } else { + assert.Empty(t, resultVectors, "Expected no alert but got result vectors for case %d", idx) } } } @@ -894,7 +915,16 @@ func TestPrepareLinksToLogs(t *testing.T) { } logger := instrumentationtest.New().Logger() - + postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + TargetValue: postableRule.RuleCondition.Target, + MatchType: ruletypes.MatchType(postableRule.RuleCondition.MatchType), + CompareOp: ruletypes.CompareOp(postableRule.RuleCondition.CompareOp), + }, + }, + } rule, err := NewThresholdRule("69", valuer.GenerateUUID(), &postableRule, nil, nil, logger, WithEvalDelay(2*time.Minute)) if err != nil { assert.NoError(t, err) @@ -944,7 +974,16 @@ func TestPrepareLinksToLogsV5(t *testing.T) { } logger := instrumentationtest.New().Logger() - + postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + TargetValue: postableRule.RuleCondition.Target, + MatchType: ruletypes.MatchType(postableRule.RuleCondition.MatchType), + CompareOp: ruletypes.CompareOp(postableRule.RuleCondition.CompareOp), + }, + }, + } rule, err := NewThresholdRule("69", valuer.GenerateUUID(), &postableRule, nil, nil, logger, WithEvalDelay(2*time.Minute)) if err != nil { assert.NoError(t, err) @@ -994,7 +1033,16 @@ func TestPrepareLinksToTracesV5(t *testing.T) { } logger := instrumentationtest.New().Logger() - + postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + TargetValue: postableRule.RuleCondition.Target, + MatchType: ruletypes.MatchType(postableRule.RuleCondition.MatchType), + CompareOp: ruletypes.CompareOp(postableRule.RuleCondition.CompareOp), + }, + }, + } rule, err := NewThresholdRule("69", valuer.GenerateUUID(), &postableRule, nil, nil, logger, WithEvalDelay(2*time.Minute)) if err != nil { assert.NoError(t, err) @@ -1037,7 +1085,16 @@ func TestPrepareLinksToTraces(t *testing.T) { } logger := instrumentationtest.New().Logger() - + postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + TargetValue: postableRule.RuleCondition.Target, + MatchType: ruletypes.MatchType(postableRule.RuleCondition.MatchType), + CompareOp: ruletypes.CompareOp(postableRule.RuleCondition.CompareOp), + }, + }, + } rule, err := NewThresholdRule("69", valuer.GenerateUUID(), &postableRule, nil, nil, logger, WithEvalDelay(2*time.Minute)) if err != nil { assert.NoError(t, err) @@ -1111,9 +1168,16 @@ func TestThresholdRuleLabelNormalization(t *testing.T) { logger := instrumentationtest.New().Logger() for idx, c := range cases { - postableRule.RuleCondition.CompareOp = ruletypes.CompareOp(c.compareOp) - postableRule.RuleCondition.MatchType = ruletypes.MatchType(c.matchType) - postableRule.RuleCondition.Target = &c.target + postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + TargetValue: &c.target, + MatchType: ruletypes.MatchType(c.matchType), + CompareOp: ruletypes.CompareOp(c.compareOp), + }, + }, + } rule, err := NewThresholdRule("69", valuer.GenerateUUID(), &postableRule, nil, nil, logger, WithEvalDelay(2*time.Minute)) if err != nil { @@ -1125,12 +1189,26 @@ func TestThresholdRuleLabelNormalization(t *testing.T) { values.Points[i].Timestamp = time.Now().UnixMilli() } - sample, shoulAlert := rule.ShouldAlert(c.values) + vector, err := rule.Threshold.ShouldAlert(c.values) + assert.NoError(t, err) + for name, value := range c.values.Labels { - assert.Equal(t, value, sample.Metric.Get(name)) + for _, sample := range vector { + assert.Equal(t, value, sample.Metric.Get(name)) + } } - assert.Equal(t, c.expectAlert, shoulAlert, "Test case %d", idx) + // Get result vectors from threshold evaluation + resultVectors, err := rule.Threshold.ShouldAlert(c.values) + assert.NoError(t, err, "Test case %d", idx) + + // Compare result vectors with expected behavior + if c.expectAlert { + assert.NotEmpty(t, resultVectors, "Expected alert but got no result vectors for case %d", idx) + // For this test, we don't have expectedAlertSample, so just verify vectors exist + } else { + assert.Empty(t, resultVectors, "Expected no alert but got result vectors for case %d", idx) + } } } @@ -1152,6 +1230,16 @@ func TestThresholdRuleEvalDelay(t *testing.T) { }, }, } + postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + TargetValue: postableRule.RuleCondition.Target, + MatchType: ruletypes.MatchType(postableRule.RuleCondition.MatchType), + CompareOp: ruletypes.CompareOp(postableRule.RuleCondition.CompareOp), + }, + }, + } // 01:39:47 ts := time.Unix(1717205987, 0) @@ -1169,6 +1257,7 @@ func TestThresholdRuleEvalDelay(t *testing.T) { logger := instrumentationtest.New().Logger() for idx, c := range cases { + rule, err := NewThresholdRule("69", valuer.GenerateUUID(), &postableRule, nil, nil, logger) // no eval delay if err != nil { assert.NoError(t, err) @@ -1202,6 +1291,16 @@ func TestThresholdRuleClickHouseTmpl(t *testing.T) { }, }, } + postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + TargetValue: postableRule.RuleCondition.Target, + MatchType: ruletypes.MatchType(postableRule.RuleCondition.MatchType), + CompareOp: ruletypes.CompareOp(postableRule.RuleCondition.CompareOp), + }, + }, + } // 01:39:47 ts := time.Unix(1717205987, 0) @@ -1380,6 +1479,20 @@ func TestThresholdRuleUnitCombinations(t *testing.T) { postableRule.RuleCondition.Target = &c.target postableRule.RuleCondition.CompositeQuery.Unit = c.yAxisUnit postableRule.RuleCondition.TargetUnit = c.targetUnit + postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + Name: postableRule.AlertName, + TargetValue: &c.target, + TargetUnit: c.targetUnit, + RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit, + MatchType: ruletypes.MatchType(c.matchType), + CompareOp: ruletypes.CompareOp(c.compareOp), + SelectedQuery: postableRule.RuleCondition.SelectedQuery, + }, + }, + } postableRule.Annotations = map[string]string{ "description": "This alert is fired when the defined metric (current value: {{$value}}) crosses the threshold ({{$threshold}})", "summary": "The rule threshold is set to {{$threshold}}, and the observed metric value is {{$value}}", @@ -1475,9 +1588,19 @@ func TestThresholdRuleNoData(t *testing.T) { ExpectQuery(queryString). WillReturnRows(rows) var target float64 = 0 - postableRule.RuleCondition.CompareOp = ruletypes.ValueIsEq - postableRule.RuleCondition.MatchType = ruletypes.AtleastOnce - postableRule.RuleCondition.Target = &target + postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + Name: postableRule.AlertName, + TargetValue: &target, + RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit, + MatchType: ruletypes.AtleastOnce, + CompareOp: ruletypes.ValueIsEq, + SelectedQuery: postableRule.RuleCondition.SelectedQuery, + }, + }, + } postableRule.Annotations = map[string]string{ "description": "This alert is fired when the defined metric (current value: {{$value}}) crosses the threshold ({{$threshold}})", "summary": "The rule threshold is set to {{$threshold}}, and the observed metric value is {{$value}}", @@ -1585,6 +1708,20 @@ func TestThresholdRuleTracesLink(t *testing.T) { postableRule.RuleCondition.Target = &c.target postableRule.RuleCondition.CompositeQuery.Unit = c.yAxisUnit postableRule.RuleCondition.TargetUnit = c.targetUnit + postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + Name: postableRule.AlertName, + TargetValue: &c.target, + TargetUnit: c.targetUnit, + RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit, + MatchType: ruletypes.MatchType(c.matchType), + CompareOp: ruletypes.CompareOp(c.compareOp), + SelectedQuery: postableRule.RuleCondition.SelectedQuery, + }, + }, + } postableRule.Annotations = map[string]string{ "description": "This alert is fired when the defined metric (current value: {{$value}}) crosses the threshold ({{$threshold}})", "summary": "The rule threshold is set to {{$threshold}}, and the observed metric value is {{$value}}", @@ -1708,6 +1845,20 @@ func TestThresholdRuleLogsLink(t *testing.T) { postableRule.RuleCondition.Target = &c.target postableRule.RuleCondition.CompositeQuery.Unit = c.yAxisUnit postableRule.RuleCondition.TargetUnit = c.targetUnit + postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + Name: postableRule.AlertName, + TargetValue: &c.target, + TargetUnit: c.targetUnit, + RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit, + MatchType: ruletypes.MatchType(c.matchType), + CompareOp: ruletypes.CompareOp(c.compareOp), + SelectedQuery: postableRule.RuleCondition.SelectedQuery, + }, + }, + } postableRule.Annotations = map[string]string{ "description": "This alert is fired when the defined metric (current value: {{$value}}) crosses the threshold ({{$threshold}})", "summary": "The rule threshold is set to {{$threshold}}, and the observed metric value is {{$value}}", @@ -1756,6 +1907,16 @@ func TestThresholdRuleShiftBy(t *testing.T) { EvalWindow: ruletypes.Duration(5 * time.Minute), Frequency: ruletypes.Duration(1 * time.Minute), RuleCondition: &ruletypes.RuleCondition{ + Thresholds: &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + TargetValue: &target, + CompareOp: ruletypes.ValueAboveOrEq, + MatchType: ruletypes.AtleastOnce, + }, + }, + }, CompositeQuery: &v3.CompositeQuery{ QueryType: v3.QueryTypeBuilder, BuilderQueries: map[string]*v3.BuilderQuery{ @@ -1787,8 +1948,6 @@ func TestThresholdRuleShiftBy(t *testing.T) { }, }, }, - Target: &target, - CompareOp: ruletypes.ValueAboveOrEq, }, } @@ -1811,3 +1970,188 @@ func TestThresholdRuleShiftBy(t *testing.T) { assert.Equal(t, int64(10), params.CompositeQuery.BuilderQueries["A"].ShiftBy) } + +func TestMultipleThresholdRule(t *testing.T) { + postableRule := ruletypes.PostableRule{ + AlertName: "Mulitple threshold test", + AlertType: ruletypes.AlertTypeMetric, + RuleType: ruletypes.RuleTypeThreshold, + EvalWindow: ruletypes.Duration(5 * time.Minute), + Frequency: ruletypes.Duration(1 * time.Minute), + RuleCondition: &ruletypes.RuleCondition{ + CompositeQuery: &v3.CompositeQuery{ + QueryType: v3.QueryTypeBuilder, + BuilderQueries: map[string]*v3.BuilderQuery{ + "A": { + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{ + Key: "signoz_calls_total", + }, + AggregateOperator: v3.AggregateOperatorSumRate, + DataSource: v3.DataSourceMetrics, + Expression: "A", + }, + }, + }, + }, + } + telemetryStore := telemetrystoretest.New(telemetrystore.Config{}, &queryMatcherAny{}) + + cols := make([]cmock.ColumnType, 0) + cols = append(cols, cmock.ColumnType{Name: "value", Type: "Float64"}) + cols = append(cols, cmock.ColumnType{Name: "attr", Type: "String"}) + cols = append(cols, cmock.ColumnType{Name: "timestamp", Type: "String"}) + + cases := []struct { + targetUnit string + yAxisUnit string + values [][]interface{} + expectAlerts int + compareOp string + matchType string + target float64 + secondTarget float64 + summaryAny []string + }{ + { + targetUnit: "s", + yAxisUnit: "ns", + values: [][]interface{}{ + {float64(572588400), "attr", time.Now()}, // 0.57 seconds + {float64(572386400), "attr", time.Now().Add(1 * time.Second)}, // 0.57 seconds + {float64(300947400), "attr", time.Now().Add(2 * time.Second)}, // 0.3 seconds + {float64(299316000), "attr", time.Now().Add(3 * time.Second)}, // 0.3 seconds + {float64(66640400.00000001), "attr", time.Now().Add(4 * time.Second)}, // 0.06 seconds + }, + expectAlerts: 2, + compareOp: "1", // Above + matchType: "1", // Once + target: 1, // 1 second + secondTarget: .5, + summaryAny: []string{ + "observed metric value is 573 ms", + "observed metric value is 572 ms", + }, + }, + { + targetUnit: "ms", + yAxisUnit: "ns", + values: [][]interface{}{ + {float64(572588400), "attr", time.Now()}, // 572.58 ms + {float64(572386400), "attr", time.Now().Add(1 * time.Second)}, // 572.38 ms + {float64(300947400), "attr", time.Now().Add(2 * time.Second)}, // 300.94 ms + {float64(299316000), "attr", time.Now().Add(3 * time.Second)}, // 299.31 ms + {float64(66640400.00000001), "attr", time.Now().Add(4 * time.Second)}, // 66.64 ms + }, + expectAlerts: 6, // Expects 6 values exceed 200ms (572.58, 572.38, 300.94, 299.31) + 2 values exceed 500ms (572.58, 572.38) + compareOp: "1", // Above + matchType: "1", // Once + target: 200, // 200 ms + secondTarget: 500, + summaryAny: []string{ + "observed metric value is 299 ms", + "the observed metric value is 573 ms", + "the observed metric value is 572 ms", + "the observed metric value is 301 ms", + }, + }, + { + targetUnit: "decgbytes", + yAxisUnit: "bytes", + values: [][]interface{}{ + {float64(2863284053), "attr", time.Now()}, // 2.86 GB + {float64(2863388842), "attr", time.Now().Add(1 * time.Second)}, // 2.86 GB + {float64(300947400), "attr", time.Now().Add(2 * time.Second)}, // 0.3 GB + {float64(299316000), "attr", time.Now().Add(3 * time.Second)}, // 0.3 GB + {float64(66640400.00000001), "attr", time.Now().Add(4 * time.Second)}, // 66.64 MB + }, + expectAlerts: 2, + compareOp: "1", // Above + matchType: "1", // Once + target: 200, // 200 GB + secondTarget: 2, // 2GB + summaryAny: []string{ + "observed metric value is 2.7 GiB", + "the observed metric value is 0.3 GB", + }, + }, + } + + logger := instrumentationtest.New().Logger() + + for idx, c := range cases { + rows := cmock.NewRows(cols, c.values) + // We are testing the eval logic after the query is run + // so we don't care about the query string here + queryString := "SELECT any" + telemetryStore.Mock(). + ExpectQuery(queryString). + WillReturnRows(rows) + postableRule.RuleCondition.CompareOp = ruletypes.CompareOp(c.compareOp) + postableRule.RuleCondition.MatchType = ruletypes.MatchType(c.matchType) + postableRule.RuleCondition.Target = &c.target + postableRule.RuleCondition.CompositeQuery.Unit = c.yAxisUnit + postableRule.RuleCondition.TargetUnit = c.targetUnit + postableRule.RuleCondition.Thresholds = &ruletypes.RuleThresholdData{ + Kind: ruletypes.BasicThresholdKind, + Spec: ruletypes.BasicRuleThresholds{ + { + Name: "first_threshold", + TargetValue: &c.target, + TargetUnit: c.targetUnit, + RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit, + MatchType: ruletypes.MatchType(c.matchType), + CompareOp: ruletypes.CompareOp(c.compareOp), + SelectedQuery: postableRule.RuleCondition.SelectedQuery, + }, + { + Name: "second_threshold", + TargetValue: &c.secondTarget, + TargetUnit: c.targetUnit, + RuleUnit: postableRule.RuleCondition.CompositeQuery.Unit, + MatchType: ruletypes.MatchType(c.matchType), + CompareOp: ruletypes.CompareOp(c.compareOp), + SelectedQuery: postableRule.RuleCondition.SelectedQuery, + }, + }, + } + postableRule.Annotations = map[string]string{ + "description": "This alert is fired when the defined metric (current value: {{$value}}) crosses the threshold ({{$threshold}})", + "summary": "The rule threshold is set to {{$threshold}}, and the observed metric value is {{$value}}", + } + + options := clickhouseReader.NewOptions("", "", "archiveNamespace") + readerCache, err := cachetest.New(cache.Config{Provider: "memory", Memory: cache.Memory{TTL: DefaultFrequency}}) + require.NoError(t, err) + reader := clickhouseReader.NewReaderFromClickhouseConnection(options, nil, telemetryStore, prometheustest.New(instrumentationtest.New().Logger(), prometheus.Config{}), "", time.Duration(time.Second), readerCache) + rule, err := NewThresholdRule("69", valuer.GenerateUUID(), &postableRule, reader, nil, logger) + rule.TemporalityMap = map[string]map[v3.Temporality]bool{ + "signoz_calls_total": { + v3.Delta: true, + }, + } + if err != nil { + assert.NoError(t, err) + } + + retVal, err := rule.Eval(context.Background(), time.Now()) + if err != nil { + assert.NoError(t, err) + } + + assert.Equal(t, c.expectAlerts, retVal.(int), "case %d", idx) + if c.expectAlerts != 0 { + foundCount := 0 + for _, item := range rule.Active { + for _, summary := range c.summaryAny { + if strings.Contains(item.Annotations.Get("summary"), summary) { + foundCount++ + break + } + } + } + assert.Equal(t, c.expectAlerts, foundCount, "case %d", idx) + } + } +} diff --git a/pkg/types/ruletypes/alerting.go b/pkg/types/ruletypes/alerting.go index 32f276163651..52f56c479905 100644 --- a/pkg/types/ruletypes/alerting.go +++ b/pkg/types/ruletypes/alerting.go @@ -11,7 +11,6 @@ import ( "github.com/SigNoz/signoz/pkg/query-service/model" v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3" "github.com/SigNoz/signoz/pkg/query-service/utils/labels" - qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5" ) @@ -106,18 +105,19 @@ const ( ) type RuleCondition struct { - CompositeQuery *v3.CompositeQuery `json:"compositeQuery,omitempty" yaml:"compositeQuery,omitempty"` - CompareOp CompareOp `yaml:"op,omitempty" json:"op,omitempty"` - Target *float64 `yaml:"target,omitempty" json:"target,omitempty"` - AlertOnAbsent bool `yaml:"alertOnAbsent,omitempty" json:"alertOnAbsent,omitempty"` - AbsentFor uint64 `yaml:"absentFor,omitempty" json:"absentFor,omitempty"` + CompositeQuery *v3.CompositeQuery `json:"compositeQuery,omitempty"` + CompareOp CompareOp `json:"op,omitempty"` + Target *float64 `json:"target,omitempty"` + AlertOnAbsent bool `json:"alertOnAbsent,omitempty"` + AbsentFor uint64 `json:"absentFor,omitempty"` MatchType MatchType `json:"matchType,omitempty"` TargetUnit string `json:"targetUnit,omitempty"` Algorithm string `json:"algorithm,omitempty"` Seasonality string `json:"seasonality,omitempty"` SelectedQuery string `json:"selectedQueryName,omitempty"` - RequireMinPoints bool `yaml:"requireMinPoints,omitempty" json:"requireMinPoints,omitempty"` - RequiredNumPoints int `yaml:"requiredNumPoints,omitempty" json:"requiredNumPoints,omitempty"` + RequireMinPoints bool `json:"requireMinPoints,omitempty"` + RequiredNumPoints int `json:"requiredNumPoints,omitempty"` + Thresholds *RuleThresholdData `json:"thresholds,omitempty"` } func (rc *RuleCondition) GetSelectedQueryName() string { @@ -188,10 +188,7 @@ func (rc *RuleCondition) IsValid() bool { } if rc.QueryType() == v3.QueryTypeBuilder { - if rc.Target == nil { - return false - } - if rc.CompareOp == "" { + if rc.Thresholds == nil { return false } } diff --git a/pkg/types/ruletypes/api_params.go b/pkg/types/ruletypes/api_params.go index b6be66d93149..f4ad6b55cd2b 100644 --- a/pkg/types/ruletypes/api_params.go +++ b/pkg/types/ruletypes/api_params.go @@ -3,18 +3,15 @@ package ruletypes import ( "context" "encoding/json" - "fmt" "time" "unicode/utf8" + signozError "github.com/SigNoz/signoz/pkg/errors" "github.com/SigNoz/signoz/pkg/query-service/model" v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3" - "github.com/pkg/errors" - "go.uber.org/multierr" "github.com/SigNoz/signoz/pkg/query-service/utils/times" "github.com/SigNoz/signoz/pkg/query-service/utils/timestamp" - yaml "gopkg.in/yaml.v2" ) type AlertType string @@ -30,30 +27,20 @@ type RuleDataKind string const ( RuleDataKindJson RuleDataKind = "json" - RuleDataKindYaml RuleDataKind = "yaml" ) -var ( - ErrFailedToParseJSON = errors.New("failed to parse json") - ErrFailedToParseYAML = errors.New("failed to parse yaml") - ErrInvalidDataType = errors.New("invalid data type") -) - -// this file contains api request and responses to be -// served over http - // PostableRule is used to create alerting rule from HTTP api type PostableRule struct { - AlertName string `yaml:"alert,omitempty" json:"alert,omitempty"` - AlertType AlertType `yaml:"alertType,omitempty" json:"alertType,omitempty"` - Description string `yaml:"description,omitempty" json:"description,omitempty"` - RuleType RuleType `yaml:"ruleType,omitempty" json:"ruleType,omitempty"` - EvalWindow Duration `yaml:"evalWindow,omitempty" json:"evalWindow,omitempty"` - Frequency Duration `yaml:"frequency,omitempty" json:"frequency,omitempty"` + AlertName string `json:"alert,omitempty"` + AlertType AlertType `json:"alertType,omitempty"` + Description string `json:"description,omitempty"` + RuleType RuleType `json:"ruleType,omitempty"` + EvalWindow Duration `json:"evalWindow,omitempty"` + Frequency Duration `json:"frequency,omitempty"` - RuleCondition *RuleCondition `yaml:"condition,omitempty" json:"condition,omitempty"` - Labels map[string]string `yaml:"labels,omitempty" json:"labels,omitempty"` - Annotations map[string]string `yaml:"annotations,omitempty" json:"annotations,omitempty"` + RuleCondition *RuleCondition `json:"condition,omitempty"` + Labels map[string]string `json:"labels,omitempty"` + Annotations map[string]string `json:"annotations,omitempty"` Disabled bool `json:"disabled"` @@ -63,84 +50,65 @@ type PostableRule struct { PreferredChannels []string `json:"preferredChannels,omitempty"` Version string `json:"version,omitempty"` - - // legacy - Expr string `yaml:"expr,omitempty" json:"expr,omitempty"` - OldYaml string `json:"yaml,omitempty"` } -func ParsePostableRule(content []byte) (*PostableRule, error) { - return ParsePostableRuleWithKind(content, "json") -} +func (r *PostableRule) processRuleDefaults() error { -func ParsePostableRuleWithKind(content []byte, kind RuleDataKind) (*PostableRule, error) { - return ParseIntoRule(PostableRule{}, content, kind) -} - -// parseIntoRule loads the content (data) into PostableRule and also -// validates the end result -func ParseIntoRule(initRule PostableRule, content []byte, kind RuleDataKind) (*PostableRule, error) { - rule := &initRule - - var err error - if kind == RuleDataKindJson { - if err = json.Unmarshal(content, rule); err != nil { - return nil, ErrFailedToParseJSON - } - } else if kind == RuleDataKindYaml { - if err = yaml.Unmarshal(content, rule); err != nil { - return nil, ErrFailedToParseYAML - } - } else { - return nil, ErrInvalidDataType + if r.EvalWindow == 0 { + r.EvalWindow = Duration(5 * time.Minute) } - if rule.RuleCondition == nil && rule.Expr != "" { - // account for legacy rules - rule.RuleType = RuleTypeProm - rule.EvalWindow = Duration(5 * time.Minute) - rule.Frequency = Duration(1 * time.Minute) - rule.RuleCondition = &RuleCondition{ - CompositeQuery: &v3.CompositeQuery{ - QueryType: v3.QueryTypePromQL, - PromQueries: map[string]*v3.PromQuery{ - "A": { - Query: rule.Expr, - }, - }, - }, - } + if r.Frequency == 0 { + r.Frequency = Duration(1 * time.Minute) } - if rule.EvalWindow == 0 { - rule.EvalWindow = Duration(5 * time.Minute) - } - - if rule.Frequency == 0 { - rule.Frequency = Duration(1 * time.Minute) - } - - if rule.RuleCondition != nil { - if rule.RuleCondition.CompositeQuery.QueryType == v3.QueryTypeBuilder { - if rule.RuleType == "" { - rule.RuleType = RuleTypeThreshold + if r.RuleCondition != nil { + if r.RuleCondition.CompositeQuery.QueryType == v3.QueryTypeBuilder { + if r.RuleType == "" { + r.RuleType = RuleTypeThreshold } - } else if rule.RuleCondition.CompositeQuery.QueryType == v3.QueryTypePromQL { - rule.RuleType = RuleTypeProm + } else if r.RuleCondition.CompositeQuery.QueryType == v3.QueryTypePromQL { + r.RuleType = RuleTypeProm } - for qLabel, q := range rule.RuleCondition.CompositeQuery.BuilderQueries { + for qLabel, q := range r.RuleCondition.CompositeQuery.BuilderQueries { if q.AggregateAttribute.Key != "" && q.Expression == "" { q.Expression = qLabel } } + //added alerts v2 fields + if r.RuleCondition.Thresholds == nil { + thresholdName := CriticalThresholdName + if r.Labels != nil { + if severity, ok := r.Labels["severity"]; ok { + thresholdName = severity + } + } + thresholdData := RuleThresholdData{ + Kind: BasicThresholdKind, + Spec: BasicRuleThresholds{{ + Name: thresholdName, + RuleUnit: r.RuleCondition.CompositeQuery.Unit, + TargetUnit: r.RuleCondition.TargetUnit, + TargetValue: r.RuleCondition.Target, + MatchType: r.RuleCondition.MatchType, + CompareOp: r.RuleCondition.CompareOp, + }}, + } + r.RuleCondition.Thresholds = &thresholdData + } } - if err := rule.Validate(); err != nil { - return nil, err - } + return r.Validate() +} - return rule, nil +func (r *PostableRule) UnmarshalJSON(bytes []byte) error { + type Alias PostableRule + aux := (*Alias)(r) + if err := json.Unmarshal(bytes, aux); err != nil { + return signozError.NewInvalidInputf(signozError.CodeInvalidInput, "failed to parse json: %v", err) + } + return r.processRuleDefaults() } func isValidLabelName(ln string) bool { @@ -204,47 +172,35 @@ func (r *PostableRule) Validate() error { if r.RuleCondition == nil { // will get panic if we try to access CompositeQuery, so return here - return errors.Errorf("rule condition is required") + return signozError.NewInvalidInputf(signozError.CodeInvalidInput, "rule condition is required") } else { if r.RuleCondition.CompositeQuery == nil { - errs = append(errs, errors.Errorf("composite metric query is required")) + errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "composite metric query is required")) } } if isAllQueriesDisabled(r.RuleCondition.CompositeQuery) { - errs = append(errs, errors.Errorf("all queries are disabled in rule condition")) - } - - if r.RuleType == RuleTypeThreshold { - if r.RuleCondition.Target == nil { - errs = append(errs, errors.Errorf("rule condition missing the threshold")) - } - if r.RuleCondition.CompareOp == "" { - errs = append(errs, errors.Errorf("rule condition missing the compare op")) - } - if r.RuleCondition.MatchType == "" { - errs = append(errs, errors.Errorf("rule condition missing the match option")) - } + errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "all queries are disabled in rule condition")) } for k, v := range r.Labels { if !isValidLabelName(k) { - errs = append(errs, errors.Errorf("invalid label name: %s", k)) + errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "invalid label name: %s", k)) } if !isValidLabelValue(v) { - errs = append(errs, errors.Errorf("invalid label value: %s", v)) + errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "invalid label value: %s", v)) } } for k := range r.Annotations { if !isValidLabelName(k) { - errs = append(errs, errors.Errorf("invalid annotation name: %s", k)) + errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "invalid annotation name: %s", k)) } } errs = append(errs, testTemplateParsing(r)...) - return multierr.Combine(errs...) + return signozError.Join(errs...) } func testTemplateParsing(rl *PostableRule) (errs []error) { @@ -272,7 +228,7 @@ func testTemplateParsing(rl *PostableRule) (errs []error) { for _, val := range rl.Labels { err := parseTest(val) if err != nil { - errs = append(errs, fmt.Errorf("msg=%s", err.Error())) + errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "template parsing error: %s", err.Error())) } } @@ -280,7 +236,7 @@ func testTemplateParsing(rl *PostableRule) (errs []error) { for _, val := range rl.Annotations { err := parseTest(val) if err != nil { - errs = append(errs, fmt.Errorf("msg=%s", err.Error())) + errs = append(errs, signozError.NewInvalidInputf(signozError.CodeInvalidInput, "template parsing error: %s", err.Error())) } } diff --git a/pkg/types/ruletypes/api_params_test.go b/pkg/types/ruletypes/api_params_test.go index 44968e3fd137..27ec5883714e 100644 --- a/pkg/types/ruletypes/api_params_test.go +++ b/pkg/types/ruletypes/api_params_test.go @@ -1,7 +1,10 @@ package ruletypes import ( + "encoding/json" + "github.com/stretchr/testify/assert" "testing" + "time" v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3" ) @@ -84,3 +87,307 @@ func TestIsAllQueriesDisabled(t *testing.T) { } } } + +func TestParseIntoRule(t *testing.T) { + tests := []struct { + name string + initRule PostableRule + content []byte + kind RuleDataKind + expectError bool + validate func(*testing.T, *PostableRule) + }{ + { + name: "valid JSON with complete rule", + initRule: PostableRule{}, + content: []byte(`{ + "alert": "TestAlert", + "alertType": "METRIC_BASED_ALERT", + "description": "Test description", + "ruleType": "threshold_rule", + "evalWindow": "5m", + "frequency": "1m", + "condition": { + "compositeQuery": { + "queryType": "builder", + "builderQueries": { + "A": { + "expression": "A", + "disabled": false, + "aggregateAttribute": { + "key": "test_metric" + } + } + } + }, + "target": 10.0, + "matchType": "1", + "op": "1", + "selectedQuery": "A" + } + }`), + kind: RuleDataKindJson, + expectError: false, + validate: func(t *testing.T, rule *PostableRule) { + if rule.AlertName != "TestAlert" { + t.Errorf("Expected alert name 'TestAlert', got '%s'", rule.AlertName) + } + if rule.RuleType != RuleTypeThreshold { + t.Errorf("Expected rule type '%s', got '%s'", RuleTypeThreshold, rule.RuleType) + } + if rule.RuleCondition.Thresholds.Kind.IsZero() { + t.Error("Expected thresholds to be populated") + } + if rule.RuleCondition.Target == nil { + t.Error("Expected target to be populated") + } + }, + }, + { + name: "rule with default values applied", + initRule: PostableRule{}, + content: []byte(`{ + "alert": "DefaultsRule", + "ruleType": "threshold_rule", + "condition": { + "compositeQuery": { + "queryType": "builder", + "builderQueries": { + "A": { + "disabled": false, + "aggregateAttribute": { + "key": "test_metric" + } + } + } + }, + "target": 5.0, + "matchType": "1", + "op": "1", + "selectedQuery": "A" + } + }`), + kind: RuleDataKindJson, + expectError: false, + validate: func(t *testing.T, rule *PostableRule) { + if rule.EvalWindow != Duration(5*time.Minute) { + t.Errorf("Expected default eval window '5m', got '%v'", rule.EvalWindow) + } + if rule.Frequency != Duration(1*time.Minute) { + t.Errorf("Expected default frequency '1m', got '%v'", rule.Frequency) + } + if rule.RuleCondition.CompositeQuery.BuilderQueries["A"].Expression != "A" { + t.Errorf("Expected expression 'A', got '%s'", rule.RuleCondition.CompositeQuery.BuilderQueries["A"].Expression) + } + }, + }, + { + name: "PromQL rule type detection", + initRule: PostableRule{}, + content: []byte(`{ + "alert": "PromQLRule", + "condition": { + "compositeQuery": { + "queryType": "promql", + "promQueries": { + "A": { + "query": "rate(http_requests_total[5m])", + "disabled": false + } + } + }, + "target": 10.0, + "matchType": "1", + "op": "1", + "selectedQuery": "A" + } + }`), + kind: RuleDataKindJson, + expectError: false, + validate: func(t *testing.T, rule *PostableRule) { + if rule.RuleType != RuleTypeProm { + t.Errorf("Expected rule type 'PROM_QL_RULE', got '%s'", rule.RuleType) + } + if rule.RuleCondition.Thresholds.Kind.IsZero() { + t.Error("Expected thresholds to be populated") + } + if rule.RuleCondition.Target == nil { + t.Error("Expected target to be populated") + } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := json.Unmarshal(tt.content, &tt.initRule) + if tt.expectError { + if err == nil { + t.Error("Expected error but got none") + } + return + } + + if err != nil { + t.Errorf("Unexpected error: %v", err) + return + } + + if tt.validate != nil { + tt.validate(t, &tt.initRule) + } + }) + } +} + +func TestParseIntoRuleThresholdGeneration(t *testing.T) { + content := []byte(`{ + "alert": "TestThresholds", + "condition": { + "compositeQuery": { + "queryType": "builder", + "builderQueries": { + "A": { + "expression": "A", + "disabled": false, + "aggregateAttribute": { + "key": "response_time" + } + } + } + }, + "target": 100.0, + "matchType": "1", + "op": "1", + "selectedQuery": "A", + "targetUnit": "ms", + "thresholds": { + "kind": "basic", + "spec": [ + { + "name": "CRITICAL", + "target": 100.0, + "targetUnit": "ms", + "ruleUnit": "s", + "matchType": "1", + "op": "1", + "selectedQuery": "A" + } + ] + } + } + }`) + rule := PostableRule{} + err := json.Unmarshal(content, &rule) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + // Check that thresholds were parsed correctly + if rule.RuleCondition.Thresholds.Kind != BasicThresholdKind { + t.Errorf("Expected threshold kind 'basic', got '%s'", rule.RuleCondition.Thresholds.Kind) + } + + // Get the threshold and test functionality + threshold, err := rule.RuleCondition.Thresholds.GetRuleThreshold() + if err != nil { + t.Fatalf("Failed to get threshold: %v", err) + } + + // Test that threshold can evaluate properly + vector, err := threshold.ShouldAlert(v3.Series{ + Points: []v3.Point{{Value: 0.15, Timestamp: 1000}}, // 150ms in seconds + Labels: map[string]string{"test": "label"}, + }) + if err != nil { + t.Fatalf("Unexpected error in ShouldAlert: %v", err) + } + + if len(vector) == 0 { + t.Error("Expected alert to be triggered for value above threshold") + } +} + +func TestParseIntoRuleMultipleThresholds(t *testing.T) { + content := []byte(`{ + "alert": "MultiThresholdAlert", + "ruleType": "threshold_rule", + "condition": { + "compositeQuery": { + "queryType": "builder", + "unit": "%", + "builderQueries": { + "A": { + "expression": "A", + "disabled": false, + "aggregateAttribute": { + "key": "cpu_usage" + } + } + } + }, + "target": 90.0, + "matchType": "1", + "op": "1", + "selectedQuery": "A", + "thresholds": { + "kind": "basic", + "spec": [ + { + "name": "WARNING", + "target": 70.0, + "targetUnit": "%", + "ruleUnit": "%", + "matchType": "1", + "op": "1", + "selectedQuery": "A" + }, + { + "name": "CRITICAL", + "target": 90.0, + "targetUnit": "%", + "ruleUnit": "%", + "matchType": "1", + "op": "1", + "selectedQuery": "A" + } + ] + } + } + }`) + rule := PostableRule{} + err := json.Unmarshal(content, &rule) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + if rule.RuleCondition.Thresholds.Kind != BasicThresholdKind { + t.Errorf("Expected threshold kind 'basic', got '%s'", rule.RuleCondition.Thresholds.Kind) + } + + threshold, err := rule.RuleCondition.Thresholds.GetRuleThreshold() + if err != nil { + t.Fatalf("Failed to get threshold: %v", err) + } + + // Test with a value that should trigger both WARNING and CRITICAL thresholds + vector, err := threshold.ShouldAlert(v3.Series{ + Points: []v3.Point{{Value: 95.0, Timestamp: 1000}}, // 95% CPU usage + Labels: map[string]string{"service": "test"}, + }) + if err != nil { + t.Fatalf("Unexpected error in ShouldAlert: %v", err) + } + + assert.Equal(t, 2, len(vector)) + + vector, err = threshold.ShouldAlert(v3.Series{ + Points: []v3.Point{{Value: 75.0, Timestamp: 1000}}, // 75% CPU usage + Labels: map[string]string{"service": "test"}, + }) + if err != nil { + t.Fatalf("Unexpected error in ShouldAlert: %v", err) + } + + assert.Equal(t, 1, len(vector)) +} diff --git a/pkg/types/ruletypes/constants.go b/pkg/types/ruletypes/constants.go new file mode 100644 index 000000000000..1851ef919a11 --- /dev/null +++ b/pkg/types/ruletypes/constants.go @@ -0,0 +1,4 @@ +package ruletypes + +const CriticalThresholdName = "CRITICAL" +const LabelThresholdName = "threshold.name" diff --git a/pkg/types/ruletypes/threshold.go b/pkg/types/ruletypes/threshold.go new file mode 100644 index 000000000000..4c47c790dc40 --- /dev/null +++ b/pkg/types/ruletypes/threshold.go @@ -0,0 +1,419 @@ +package ruletypes + +import ( + "encoding/json" + "github.com/SigNoz/signoz/pkg/errors" + "github.com/SigNoz/signoz/pkg/query-service/converter" + "github.com/SigNoz/signoz/pkg/query-service/model/v3" + "github.com/SigNoz/signoz/pkg/query-service/utils/labels" + "github.com/SigNoz/signoz/pkg/valuer" + "math" + "sort" +) + +type ThresholdKind struct { + valuer.String +} + +var ( + BasicThresholdKind = ThresholdKind{valuer.NewString("basic")} +) + +type RuleThresholdData struct { + Kind ThresholdKind `json:"kind"` + Spec any `json:"spec"` +} + +func (r *RuleThresholdData) UnmarshalJSON(data []byte) error { + var raw map[string]json.RawMessage + if err := json.Unmarshal(data, &raw); err != nil { + return errors.NewInvalidInputf(errors.CodeInvalidInput, "failed to unmarshal raw rule threshold json: %v", err) + } + if err := json.Unmarshal(raw["kind"], &r.Kind); err != nil { + return errors.NewInvalidInputf(errors.CodeInvalidInput, "failed to unmarshal rule threshold kind: %v", err) + } + switch r.Kind { + case BasicThresholdKind: + var basicThresholds BasicRuleThresholds + if err := json.Unmarshal(raw["spec"], &basicThresholds); err != nil { + return errors.NewInvalidInputf(errors.CodeInvalidInput, "failed to unmarshal rule threhsold spec: %v", err) + } + if err := basicThresholds.Validate(); err != nil { + return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid rule threshold spec: %v", err) + } + r.Spec = basicThresholds + + default: + return errors.NewInvalidInputf(errors.CodeUnsupported, "unknown threshold kind") + } + + return nil +} + +type RuleThreshold interface { + ShouldAlert(series v3.Series) (Vector, error) +} + +type BasicRuleThreshold struct { + Name string `json:"name"` + TargetValue *float64 `json:"target"` + TargetUnit string `json:"targetUnit"` + RuleUnit string `json:"ruleUnit"` + RecoveryTarget *float64 `json:"recoveryTarget"` + MatchType MatchType `json:"matchType"` + CompareOp CompareOp `json:"op"` + SelectedQuery string `json:"selectedQuery"` +} + +type BasicRuleThresholds []BasicRuleThreshold + +func (r BasicRuleThresholds) Validate() error { + var errs []error + for _, basicThreshold := range r { + if err := basicThreshold.Validate(); err != nil { + errs = append(errs, err) + } + } + return errors.Join(errs...) +} + +func (r BasicRuleThresholds) ShouldAlert(series v3.Series) (Vector, error) { + var resultVector Vector + thresholds := []BasicRuleThreshold(r) + sort.Slice(thresholds, func(i, j int) bool { + compareOp := thresholds[i].GetCompareOp() + targetI := thresholds[i].Target() + targetJ := thresholds[j].Target() + + switch compareOp { + case ValueIsAbove, ValueAboveOrEq, ValueOutsideBounds: + // For "above" operations, sort descending (higher values first) + return targetI > targetJ + case ValueIsBelow, ValueBelowOrEq: + // For "below" operations, sort ascending (lower values first) + return targetI < targetJ + default: + // For equal/not equal operations, use descending as default + return targetI > targetJ + } + }) + for _, threshold := range thresholds { + smpl, shouldAlert := threshold.ShouldAlert(series) + if shouldAlert { + resultVector = append(resultVector, smpl) + } + } + return resultVector, nil +} + +func (b BasicRuleThreshold) GetName() string { + return b.Name +} + +func (b BasicRuleThreshold) Target() float64 { + unitConverter := converter.FromUnit(converter.Unit(b.TargetUnit)) + // convert the target value to the y-axis unit + value := unitConverter.Convert(converter.Value{ + F: *b.TargetValue, + U: converter.Unit(b.TargetUnit), + }, converter.Unit(b.RuleUnit)) + return value.F +} + +func (b BasicRuleThreshold) GetRecoveryTarget() float64 { + if b.RecoveryTarget == nil { + return 0 + } else { + return *b.RecoveryTarget + } +} + +func (b BasicRuleThreshold) GetMatchType() MatchType { + return b.MatchType +} + +func (b BasicRuleThreshold) GetCompareOp() CompareOp { + return b.CompareOp +} + +func (b BasicRuleThreshold) GetSelectedQuery() string { + return b.SelectedQuery +} + +func (b BasicRuleThreshold) Validate() error { + var errs []error + if b.Name == "" { + errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "threshold name cannot be empty")) + } + + if b.TargetValue == nil { + errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "target value cannot be nil")) + } + + switch b.CompareOp { + case ValueIsAbove, ValueIsBelow, ValueIsEq, ValueIsNotEq, ValueAboveOrEq, ValueBelowOrEq, ValueOutsideBounds: + // valid compare operations + case CompareOpNone: + errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "compare operation cannot be none")) + default: + errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid compare operation: %s", string(b.CompareOp))) + } + + switch b.MatchType { + case AtleastOnce, AllTheTimes, OnAverage, InTotal, Last: + // valid match types + case MatchTypeNone: + errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "match type cannot be none")) + default: + errs = append(errs, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid match type: %s", string(b.MatchType))) + } + + return errors.Join(errs...) +} + +func removeGroupinSetPoints(series v3.Series) []v3.Point { + var result []v3.Point + for _, s := range series.Points { + if s.Timestamp >= 0 && !math.IsNaN(s.Value) && !math.IsInf(s.Value, 0) { + result = append(result, s) + } + } + return result +} + +func (b BasicRuleThreshold) ShouldAlert(series v3.Series) (Sample, bool) { + var shouldAlert bool + var alertSmpl Sample + var lbls labels.Labels + + for name, value := range series.Labels { + lbls = append(lbls, labels.Label{Name: name, Value: value}) + } + + lbls = append(lbls, labels.Label{Name: LabelThresholdName, Value: b.Name}) + + series.Points = removeGroupinSetPoints(series) + + // nothing to evaluate + if len(series.Points) == 0 { + return alertSmpl, false + } + + switch b.MatchType { + case AtleastOnce: + // If any sample matches the condition, the rule is firing. + if b.CompareOp == ValueIsAbove { + for _, smpl := range series.Points { + if smpl.Value > b.Target() { + alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls} + shouldAlert = true + break + } + } + } else if b.CompareOp == ValueIsBelow { + for _, smpl := range series.Points { + if smpl.Value < b.Target() { + alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls} + shouldAlert = true + break + } + } + } else if b.CompareOp == ValueIsEq { + for _, smpl := range series.Points { + if smpl.Value == b.Target() { + alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls} + shouldAlert = true + break + } + } + } else if b.CompareOp == ValueIsNotEq { + for _, smpl := range series.Points { + if smpl.Value != b.Target() { + alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls} + shouldAlert = true + break + } + } + } else if b.CompareOp == ValueOutsideBounds { + for _, smpl := range series.Points { + if math.Abs(smpl.Value) >= b.Target() { + alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls} + shouldAlert = true + break + } + } + } + case AllTheTimes: + // If all samples match the condition, the rule is firing. + shouldAlert = true + alertSmpl = Sample{Point: Point{V: b.Target()}, Metric: lbls} + if b.CompareOp == ValueIsAbove { + for _, smpl := range series.Points { + if smpl.Value <= b.Target() { + shouldAlert = false + break + } + } + // use min value from the series + if shouldAlert { + var minValue float64 = math.Inf(1) + for _, smpl := range series.Points { + if smpl.Value < minValue { + minValue = smpl.Value + } + } + alertSmpl = Sample{Point: Point{V: minValue}, Metric: lbls} + } + } else if b.CompareOp == ValueIsBelow { + for _, smpl := range series.Points { + if smpl.Value >= b.Target() { + shouldAlert = false + break + } + } + if shouldAlert { + var maxValue float64 = math.Inf(-1) + for _, smpl := range series.Points { + if smpl.Value > maxValue { + maxValue = smpl.Value + } + } + alertSmpl = Sample{Point: Point{V: maxValue}, Metric: lbls} + } + } else if b.CompareOp == ValueIsEq { + for _, smpl := range series.Points { + if smpl.Value != b.Target() { + shouldAlert = false + break + } + } + } else if b.CompareOp == ValueIsNotEq { + for _, smpl := range series.Points { + if smpl.Value == b.Target() { + shouldAlert = false + break + } + } + // use any non-inf or nan value from the series + if shouldAlert { + for _, smpl := range series.Points { + if !math.IsInf(smpl.Value, 0) && !math.IsNaN(smpl.Value) { + alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls} + break + } + } + } + } else if b.CompareOp == ValueOutsideBounds { + for _, smpl := range series.Points { + if math.Abs(smpl.Value) < b.Target() { + alertSmpl = Sample{Point: Point{V: smpl.Value}, Metric: lbls} + shouldAlert = false + break + } + } + } + case OnAverage: + // If the average of all samples matches the condition, the rule is firing. + var sum, count float64 + for _, smpl := range series.Points { + if math.IsNaN(smpl.Value) || math.IsInf(smpl.Value, 0) { + continue + } + sum += smpl.Value + count++ + } + avg := sum / count + alertSmpl = Sample{Point: Point{V: avg}, Metric: lbls} + if b.CompareOp == ValueIsAbove { + if avg > b.Target() { + shouldAlert = true + } + } else if b.CompareOp == ValueIsBelow { + if avg < b.Target() { + shouldAlert = true + } + } else if b.CompareOp == ValueIsEq { + if avg == b.Target() { + shouldAlert = true + } + } else if b.CompareOp == ValueIsNotEq { + if avg != b.Target() { + shouldAlert = true + } + } else if b.CompareOp == ValueOutsideBounds { + if math.Abs(avg) >= b.Target() { + shouldAlert = true + } + } + case InTotal: + // If the sum of all samples matches the condition, the rule is firing. + var sum float64 + + for _, smpl := range series.Points { + if math.IsNaN(smpl.Value) || math.IsInf(smpl.Value, 0) { + continue + } + sum += smpl.Value + } + alertSmpl = Sample{Point: Point{V: sum}, Metric: lbls} + if b.CompareOp == ValueIsAbove { + if sum > b.Target() { + shouldAlert = true + } + } else if b.CompareOp == ValueIsBelow { + if sum < b.Target() { + shouldAlert = true + } + } else if b.CompareOp == ValueIsEq { + if sum == b.Target() { + shouldAlert = true + } + } else if b.CompareOp == ValueIsNotEq { + if sum != b.Target() { + shouldAlert = true + } + } else if b.CompareOp == ValueOutsideBounds { + if math.Abs(sum) >= b.Target() { + shouldAlert = true + } + } + case Last: + // If the last sample matches the condition, the rule is firing. + shouldAlert = false + alertSmpl = Sample{Point: Point{V: series.Points[len(series.Points)-1].Value}, Metric: lbls} + if b.CompareOp == ValueIsAbove { + if series.Points[len(series.Points)-1].Value > b.Target() { + shouldAlert = true + } + } else if b.CompareOp == ValueIsBelow { + if series.Points[len(series.Points)-1].Value < b.Target() { + shouldAlert = true + } + } else if b.CompareOp == ValueIsEq { + if series.Points[len(series.Points)-1].Value == b.Target() { + shouldAlert = true + } + } else if b.CompareOp == ValueIsNotEq { + if series.Points[len(series.Points)-1].Value != b.Target() { + shouldAlert = true + } + } + } + return alertSmpl, shouldAlert +} + +func (r *RuleThresholdData) GetRuleThreshold() (RuleThreshold, error) { + if r == nil { + return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "rule threshold is nil") + } + switch r.Kind { + case BasicThresholdKind: + if thresholds, ok := r.Spec.(BasicRuleThresholds); ok { + basic := BasicRuleThresholds(thresholds) + return basic, nil + } + return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid rule threshold spec") + default: + return nil, errors.NewInvalidInputf(errors.CodeUnsupported, "unknown threshold kind") + } +}