signoz/pkg/query-service/rules/threshold_rule.go
Srikanth Chekuri bd02848623
chore: add sql migration for dashboards, alerts, and saved views (#8642)
## 📄 Summary

To reliably migrate the alerts and dashboards, we need access to the telemetrystore to fetch some metadata and while doing migration, I need to log some stuff to fix stuff later.

Key changes:
- Modified the migration to include telemetrystore and a logging provider (open to using a standard logger instead)
- To avoid the previous issues with imported dashboards failing during migration, I've ensured that imported JSON files are automatically transformed when migration is active
- Implemented detailed logic to handle dashboard migration cleanly and prevent unnecessary errors
- Separated the core migration logic from SQL migration code, as users from the dot metrics migration requested shareable code snippets for local migrations. This modular approach allows others to easily reuse the migration functionality.

Known: I didn't register the migration yet in this PR, and will not merge this yet, so please review with that in mid.
2025-08-06 23:05:39 +05:30

792 lines
24 KiB
Go

package rules
import (
"bytes"
"context"
"encoding/json"
"fmt"
"log/slog"
"math"
"reflect"
"text/template"
"time"
"github.com/SigNoz/signoz/pkg/contextlinks"
"github.com/SigNoz/signoz/pkg/query-service/common"
"github.com/SigNoz/signoz/pkg/query-service/model"
"github.com/SigNoz/signoz/pkg/query-service/postprocess"
"github.com/SigNoz/signoz/pkg/transition"
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/SigNoz/signoz/pkg/query-service/app/querier"
querierV2 "github.com/SigNoz/signoz/pkg/query-service/app/querier/v2"
"github.com/SigNoz/signoz/pkg/query-service/app/queryBuilder"
"github.com/SigNoz/signoz/pkg/query-service/constants"
"github.com/SigNoz/signoz/pkg/query-service/interfaces"
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
querytemplate "github.com/SigNoz/signoz/pkg/query-service/utils/queryTemplate"
"github.com/SigNoz/signoz/pkg/query-service/utils/times"
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
logsv3 "github.com/SigNoz/signoz/pkg/query-service/app/logs/v3"
tracesV4 "github.com/SigNoz/signoz/pkg/query-service/app/traces/v4"
"github.com/SigNoz/signoz/pkg/query-service/formatter"
querierV5 "github.com/SigNoz/signoz/pkg/querier"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
yaml "gopkg.in/yaml.v2"
)
type ThresholdRule struct {
*BaseRule
// Ever since we introduced the new metrics query builder, the version is "v4"
// for all the rules
// if the version is "v3", then we use the old querier
// if the version is "v4", then we use the new querierV2
version string
// querier is used for alerts created before the introduction of new metrics query builder
querier interfaces.Querier
// querierV2 is used for alerts created after the introduction of new metrics query builder
querierV2 interfaces.Querier
// querierV5 is used for alerts migrated after the introduction of new query builder
querierV5 querierV5.Querier
// used for attribute metadata enrichment for logs and traces
logsKeys map[string]v3.AttributeKey
spansKeys map[string]v3.AttributeKey
}
func NewThresholdRule(
id string,
orgID valuer.UUID,
p *ruletypes.PostableRule,
reader interfaces.Reader,
querierV5 querierV5.Querier,
logger *slog.Logger,
opts ...RuleOption,
) (*ThresholdRule, error) {
logger.Info("creating new ThresholdRule", "id", id)
opts = append(opts, WithLogger(logger))
baseRule, err := NewBaseRule(id, orgID, p, reader, opts...)
if err != nil {
return nil, err
}
t := ThresholdRule{
BaseRule: baseRule,
version: p.Version,
}
querierOption := querier.QuerierOptions{
Reader: reader,
Cache: nil,
KeyGenerator: queryBuilder.NewKeyGenerator(),
}
querierOptsV2 := querierV2.QuerierOptions{
Reader: reader,
Cache: nil,
KeyGenerator: queryBuilder.NewKeyGenerator(),
}
t.querier = querier.NewQuerier(querierOption)
t.querierV2 = querierV2.NewQuerier(querierOptsV2)
t.querierV5 = querierV5
t.reader = reader
return &t, nil
}
func (r *ThresholdRule) Type() ruletypes.RuleType {
return ruletypes.RuleTypeThreshold
}
func (r *ThresholdRule) prepareQueryRange(ctx context.Context, ts time.Time) (*v3.QueryRangeParamsV3, error) {
r.logger.InfoContext(
ctx, "prepare query range request v4", "ts", ts.UnixMilli(), "eval_window", r.evalWindow.Milliseconds(), "eval_delay", r.evalDelay.Milliseconds(),
)
startTs, endTs := r.Timestamps(ts)
start, end := startTs.UnixMilli(), endTs.UnixMilli()
if r.ruleCondition.QueryType() == v3.QueryTypeClickHouseSQL {
params := &v3.QueryRangeParamsV3{
Start: start,
End: end,
Step: int64(math.Max(float64(common.MinAllowedStepInterval(start, end)), 60)),
CompositeQuery: &v3.CompositeQuery{
QueryType: r.ruleCondition.CompositeQuery.QueryType,
PanelType: r.ruleCondition.CompositeQuery.PanelType,
BuilderQueries: make(map[string]*v3.BuilderQuery),
ClickHouseQueries: make(map[string]*v3.ClickHouseQuery),
PromQueries: make(map[string]*v3.PromQuery),
Unit: r.ruleCondition.CompositeQuery.Unit,
},
Variables: make(map[string]interface{}, 0),
NoCache: true,
}
querytemplate.AssignReservedVarsV3(params)
for name, chQuery := range r.ruleCondition.CompositeQuery.ClickHouseQueries {
if chQuery.Disabled {
continue
}
tmpl := template.New("clickhouse-query")
tmpl, err := tmpl.Parse(chQuery.Query)
if err != nil {
return nil, err
}
var query bytes.Buffer
err = tmpl.Execute(&query, params.Variables)
if err != nil {
return nil, err
}
params.CompositeQuery.ClickHouseQueries[name] = &v3.ClickHouseQuery{
Query: query.String(),
Disabled: chQuery.Disabled,
Legend: chQuery.Legend,
}
}
return params, nil
}
if r.ruleCondition.CompositeQuery != nil && r.ruleCondition.CompositeQuery.BuilderQueries != nil {
for _, q := range r.ruleCondition.CompositeQuery.BuilderQueries {
// If the step interval is less than the minimum allowed step interval, set it to the minimum allowed step interval
if minStep := common.MinAllowedStepInterval(start, end); q.StepInterval < minStep {
q.StepInterval = minStep
}
q.SetShiftByFromFunc()
if q.DataSource == v3.DataSourceMetrics && constants.UseMetricsPreAggregation() {
// if the time range is greater than 1 day, and less than 1 week set the step interval to be multiple of 5 minutes
// if the time range is greater than 1 week, set the step interval to be multiple of 30 mins
if end-start >= 24*time.Hour.Milliseconds() && end-start < 7*24*time.Hour.Milliseconds() {
q.StepInterval = int64(math.Round(float64(q.StepInterval)/300)) * 300
} else if end-start >= 7*24*time.Hour.Milliseconds() {
q.StepInterval = int64(math.Round(float64(q.StepInterval)/1800)) * 1800
}
}
}
}
if r.ruleCondition.CompositeQuery.PanelType != v3.PanelTypeGraph {
r.ruleCondition.CompositeQuery.PanelType = v3.PanelTypeGraph
}
// default mode
return &v3.QueryRangeParamsV3{
Start: start,
End: end,
Step: int64(math.Max(float64(common.MinAllowedStepInterval(start, end)), 60)),
CompositeQuery: r.ruleCondition.CompositeQuery,
Variables: make(map[string]interface{}, 0),
NoCache: true,
}, nil
}
func (r *ThresholdRule) prepareLinksToLogs(ctx context.Context, ts time.Time, lbls labels.Labels) string {
if r.version == "v5" {
return r.prepareLinksToLogsV5(ctx, ts, lbls)
}
selectedQuery := r.GetSelectedQuery()
qr, err := r.prepareQueryRange(ctx, ts)
if err != nil {
return ""
}
start := time.UnixMilli(qr.Start)
end := time.UnixMilli(qr.End)
// TODO(srikanthccv): handle formula queries
if selectedQuery < "A" || selectedQuery > "Z" {
return ""
}
q := r.ruleCondition.CompositeQuery.BuilderQueries[selectedQuery]
if q == nil {
return ""
}
if q.DataSource != v3.DataSourceLogs {
return ""
}
queryFilter := []v3.FilterItem{}
if q.Filters != nil {
queryFilter = q.Filters.Items
}
filterItems := contextlinks.PrepareFilters(lbls.Map(), queryFilter, q.GroupBy, r.logsKeys)
return contextlinks.PrepareLinksToLogs(start, end, filterItems)
}
func (r *ThresholdRule) prepareLinksToTraces(ctx context.Context, ts time.Time, lbls labels.Labels) string {
if r.version == "v5" {
return r.prepareLinksToTracesV5(ctx, ts, lbls)
}
selectedQuery := r.GetSelectedQuery()
qr, err := r.prepareQueryRange(ctx, ts)
if err != nil {
return ""
}
start := time.UnixMilli(qr.Start)
end := time.UnixMilli(qr.End)
// TODO(srikanthccv): handle formula queries
if selectedQuery < "A" || selectedQuery > "Z" {
return ""
}
q := r.ruleCondition.CompositeQuery.BuilderQueries[selectedQuery]
if q == nil {
return ""
}
if q.DataSource != v3.DataSourceTraces {
return ""
}
queryFilter := []v3.FilterItem{}
if q.Filters != nil {
queryFilter = q.Filters.Items
}
filterItems := contextlinks.PrepareFilters(lbls.Map(), queryFilter, q.GroupBy, r.spansKeys)
return contextlinks.PrepareLinksToTraces(start, end, filterItems)
}
func (r *ThresholdRule) prepareQueryRangeV5(ctx context.Context, ts time.Time) (*qbtypes.QueryRangeRequest, error) {
r.logger.InfoContext(
ctx, "prepare query range request v5", "ts", ts.UnixMilli(), "eval_window", r.evalWindow.Milliseconds(), "eval_delay", r.evalDelay.Milliseconds(),
)
startTs, endTs := r.Timestamps(ts)
start, end := startTs.UnixMilli(), endTs.UnixMilli()
req := &qbtypes.QueryRangeRequest{
Start: uint64(start),
End: uint64(end),
RequestType: qbtypes.RequestTypeTimeSeries,
CompositeQuery: qbtypes.CompositeQuery{
Queries: make([]qbtypes.QueryEnvelope, 0),
},
NoCache: true,
}
req.CompositeQuery.Queries = make([]qbtypes.QueryEnvelope, len(r.Condition().CompositeQuery.Queries))
copy(req.CompositeQuery.Queries, r.Condition().CompositeQuery.Queries)
return req, nil
}
func (r *ThresholdRule) prepareLinksToLogsV5(ctx context.Context, ts time.Time, lbls labels.Labels) string {
selectedQuery := r.GetSelectedQuery()
qr, err := r.prepareQueryRangeV5(ctx, ts)
if err != nil {
return ""
}
start := time.UnixMilli(int64(qr.Start))
end := time.UnixMilli(int64(qr.End))
// TODO(srikanthccv): handle formula queries
if selectedQuery < "A" || selectedQuery > "Z" {
return ""
}
var q qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]
for _, query := range r.ruleCondition.CompositeQuery.Queries {
if query.Type == qbtypes.QueryTypeBuilder {
switch spec := query.Spec.(type) {
case qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]:
q = spec
}
}
}
if q.Signal != telemetrytypes.SignalLogs {
return ""
}
filterExpr := ""
if q.Filter != nil && q.Filter.Expression != "" {
filterExpr = q.Filter.Expression
}
whereClause := contextlinks.PrepareFilterExpression(lbls.Map(), filterExpr, q.GroupBy)
return contextlinks.PrepareLinksToLogsV5(start, end, whereClause)
}
func (r *ThresholdRule) prepareLinksToTracesV5(ctx context.Context, ts time.Time, lbls labels.Labels) string {
selectedQuery := r.GetSelectedQuery()
qr, err := r.prepareQueryRangeV5(ctx, ts)
if err != nil {
return ""
}
start := time.UnixMilli(int64(qr.Start))
end := time.UnixMilli(int64(qr.End))
// TODO(srikanthccv): handle formula queries
if selectedQuery < "A" || selectedQuery > "Z" {
return ""
}
var q qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]
for _, query := range r.ruleCondition.CompositeQuery.Queries {
if query.Type == qbtypes.QueryTypeBuilder {
switch spec := query.Spec.(type) {
case qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]:
q = spec
}
}
}
if q.Signal != telemetrytypes.SignalTraces {
return ""
}
filterExpr := ""
if q.Filter != nil && q.Filter.Expression != "" {
filterExpr = q.Filter.Expression
}
whereClause := contextlinks.PrepareFilterExpression(lbls.Map(), filterExpr, q.GroupBy)
return contextlinks.PrepareLinksToTracesV5(start, end, whereClause)
}
func (r *ThresholdRule) GetSelectedQuery() string {
return r.ruleCondition.GetSelectedQueryName()
}
func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID, ts time.Time) (ruletypes.Vector, error) {
params, err := r.prepareQueryRange(ctx, ts)
if err != nil {
return nil, err
}
err = r.PopulateTemporality(ctx, orgID, params)
if err != nil {
return nil, fmt.Errorf("internal error while setting temporality")
}
if params.CompositeQuery.QueryType == v3.QueryTypeBuilder {
hasLogsQuery := false
hasTracesQuery := false
for _, query := range params.CompositeQuery.BuilderQueries {
if query.DataSource == v3.DataSourceLogs {
hasLogsQuery = true
}
if query.DataSource == v3.DataSourceTraces {
hasTracesQuery = true
}
}
if hasLogsQuery {
// check if any enrichment is required for logs if yes then enrich them
if logsv3.EnrichmentRequired(params) {
logsFields, err := r.reader.GetLogFieldsFromNames(ctx, logsv3.GetFieldNames(params.CompositeQuery))
if err != nil {
return nil, err
}
logsKeys := model.GetLogFieldsV3(ctx, params, logsFields)
r.logsKeys = logsKeys
logsv3.Enrich(params, logsKeys)
}
}
if hasTracesQuery {
spanKeys, err := r.reader.GetSpanAttributeKeysByNames(ctx, logsv3.GetFieldNames(params.CompositeQuery))
if err != nil {
return nil, err
}
r.spansKeys = spanKeys
tracesV4.Enrich(params, spanKeys)
}
}
var results []*v3.Result
var queryErrors map[string]error
if r.version == "v4" {
results, queryErrors, err = r.querierV2.QueryRange(ctx, orgID, params)
} else {
results, queryErrors, err = r.querier.QueryRange(ctx, orgID, params)
}
if err != nil {
r.logger.ErrorContext(ctx, "failed to get alert query range result", "rule_name", r.Name(), "error", err, "query_errors", queryErrors)
return nil, fmt.Errorf("internal error while querying")
}
if params.CompositeQuery.QueryType == v3.QueryTypeBuilder {
results, err = postprocess.PostProcessResult(results, params)
if err != nil {
r.logger.ErrorContext(ctx, "failed to post process result", "rule_name", r.Name(), "error", err)
return nil, fmt.Errorf("internal error while post processing")
}
}
selectedQuery := r.GetSelectedQuery()
var queryResult *v3.Result
for _, res := range results {
if res.QueryName == selectedQuery {
queryResult = res
break
}
}
if queryResult != nil && len(queryResult.Series) > 0 {
r.lastTimestampWithDatapoints = time.Now()
}
var resultVector ruletypes.Vector
// if the data is missing for `For` duration then we should send alert
if r.ruleCondition.AlertOnAbsent && r.lastTimestampWithDatapoints.Add(time.Duration(r.Condition().AbsentFor)*time.Minute).Before(time.Now()) {
r.logger.InfoContext(ctx, "no data found for rule condition", "rule_id", r.ID())
lbls := labels.NewBuilder(labels.Labels{})
if !r.lastTimestampWithDatapoints.IsZero() {
lbls.Set("lastSeen", r.lastTimestampWithDatapoints.Format(constants.AlertTimeFormat))
}
resultVector = append(resultVector, ruletypes.Sample{
Metric: lbls.Labels(),
IsMissing: true,
})
return resultVector, nil
}
for _, series := range queryResult.Series {
smpl, shouldAlert := r.ShouldAlert(*series)
if shouldAlert {
resultVector = append(resultVector, smpl)
}
}
return resultVector, nil
}
func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID, ts time.Time) (ruletypes.Vector, error) {
params, err := r.prepareQueryRangeV5(ctx, ts)
if err != nil {
return nil, err
}
var results []*v3.Result
v5Result, err := r.querierV5.QueryRange(ctx, orgID, params)
if err != nil {
r.logger.ErrorContext(ctx, "failed to get alert query result", "rule_name", r.Name(), "error", err)
return nil, fmt.Errorf("internal error while querying")
}
for _, item := range v5Result.Data.Results {
if tsData, ok := item.(*qbtypes.TimeSeriesData); ok {
results = append(results, transition.ConvertV5TimeSeriesDataToV4Result(tsData))
} else {
// NOTE: should not happen but just to ensure we don't miss it if it happens for some reason
r.logger.WarnContext(ctx, "expected qbtypes.TimeSeriesData but got", "item_type", reflect.TypeOf(item))
}
}
selectedQuery := r.GetSelectedQuery()
var queryResult *v3.Result
for _, res := range results {
if res.QueryName == selectedQuery {
queryResult = res
break
}
}
if queryResult != nil && len(queryResult.Series) > 0 {
r.lastTimestampWithDatapoints = time.Now()
}
var resultVector ruletypes.Vector
// if the data is missing for `For` duration then we should send alert
if r.ruleCondition.AlertOnAbsent && r.lastTimestampWithDatapoints.Add(time.Duration(r.Condition().AbsentFor)*time.Minute).Before(time.Now()) {
r.logger.InfoContext(ctx, "no data found for rule condition", "rule_id", r.ID())
lbls := labels.NewBuilder(labels.Labels{})
if !r.lastTimestampWithDatapoints.IsZero() {
lbls.Set("lastSeen", r.lastTimestampWithDatapoints.Format(constants.AlertTimeFormat))
}
resultVector = append(resultVector, ruletypes.Sample{
Metric: lbls.Labels(),
IsMissing: true,
})
return resultVector, nil
}
if queryResult == nil {
r.logger.WarnContext(ctx, "query result is nil", "rule_name", r.Name(), "query_name", selectedQuery)
return resultVector, nil
}
for _, series := range queryResult.Series {
smpl, shouldAlert := r.ShouldAlert(*series)
if shouldAlert {
resultVector = append(resultVector, smpl)
}
}
return resultVector, nil
}
func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, error) {
prevState := r.State()
valueFormatter := formatter.FromUnit(r.Unit())
var res ruletypes.Vector
var err error
if r.version == "v5" {
r.logger.InfoContext(ctx, "running v5 query")
res, err = r.buildAndRunQueryV5(ctx, r.orgID, ts)
} else {
r.logger.InfoContext(ctx, "running v4 query")
res, err = r.buildAndRunQuery(ctx, r.orgID, ts)
}
if err != nil {
return nil, err
}
r.mtx.Lock()
defer r.mtx.Unlock()
resultFPs := map[uint64]struct{}{}
var alerts = make(map[uint64]*ruletypes.Alert, len(res))
for _, smpl := range res {
l := make(map[string]string, len(smpl.Metric))
for _, lbl := range smpl.Metric {
l[lbl.Name] = lbl.Value
}
value := valueFormatter.Format(smpl.V, r.Unit())
threshold := valueFormatter.Format(r.targetVal(), r.Unit())
r.logger.DebugContext(ctx, "Alert template data for rule", "rule_name", r.Name(), "formatter", valueFormatter.Name(), "value", value, "threshold", threshold)
tmplData := ruletypes.AlertTemplateData(l, value, threshold)
// Inject some convenience variables that are easier to remember for users
// who are not used to Go's templating system.
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
// utility function to apply go template on labels and annotations
expand := func(text string) string {
tmpl := ruletypes.NewTemplateExpander(
ctx,
defs+text,
"__alert_"+r.Name(),
tmplData,
times.Time(timestamp.FromTime(ts)),
nil,
)
result, err := tmpl.Expand()
if err != nil {
result = fmt.Sprintf("<error expanding template: %s>", err)
r.logger.ErrorContext(ctx, "Expanding alert template failed", "error", err, "data", tmplData)
}
return result
}
lb := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel).Del(labels.TemporalityLabel)
resultLabels := labels.NewBuilder(smpl.Metric).Del(labels.MetricNameLabel).Del(labels.TemporalityLabel).Labels()
for name, value := range r.labels.Map() {
lb.Set(name, expand(value))
}
lb.Set(labels.AlertNameLabel, r.Name())
lb.Set(labels.AlertRuleIdLabel, r.ID())
lb.Set(labels.RuleSourceLabel, r.GeneratorURL())
annotations := make(labels.Labels, 0, len(r.annotations.Map()))
for name, value := range r.annotations.Map() {
annotations = append(annotations, labels.Label{Name: name, Value: expand(value)})
}
if smpl.IsMissing {
lb.Set(labels.AlertNameLabel, "[No data] "+r.Name())
}
// Links with timestamps should go in annotations since labels
// is used alert grouping, and we want to group alerts with the same
// label set, but different timestamps, together.
if r.typ == ruletypes.AlertTypeTraces {
link := r.prepareLinksToTraces(ctx, ts, smpl.Metric)
if link != "" && r.hostFromSource() != "" {
r.logger.InfoContext(ctx, "adding traces link to annotations", "link", fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link))
annotations = append(annotations, labels.Label{Name: "related_traces", Value: fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link)})
}
} else if r.typ == ruletypes.AlertTypeLogs {
link := r.prepareLinksToLogs(ctx, ts, smpl.Metric)
if link != "" && r.hostFromSource() != "" {
r.logger.InfoContext(ctx, "adding logs link to annotations", "link", fmt.Sprintf("%s/logs/logs-explorer?%s", r.hostFromSource(), link))
annotations = append(annotations, labels.Label{Name: "related_logs", Value: fmt.Sprintf("%s/logs/logs-explorer?%s", r.hostFromSource(), link)})
}
}
lbs := lb.Labels()
h := lbs.Hash()
resultFPs[h] = struct{}{}
if _, ok := alerts[h]; ok {
return nil, fmt.Errorf("duplicate alert found, vector contains metrics with the same labelset after applying alert labels")
}
alerts[h] = &ruletypes.Alert{
Labels: lbs,
QueryResultLables: resultLabels,
Annotations: annotations,
ActiveAt: ts,
State: model.StatePending,
Value: smpl.V,
GeneratorURL: r.GeneratorURL(),
Receivers: r.preferredChannels,
Missing: smpl.IsMissing,
}
}
r.logger.InfoContext(ctx, "number of alerts found", "rule_name", r.Name(), "alerts_count", len(alerts))
// alerts[h] is ready, add or update active list now
for h, a := range alerts {
// Check whether we already have alerting state for the identifying label set.
// Update the last value and annotations if so, create a new alert entry otherwise.
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
alert.Value = a.Value
alert.Annotations = a.Annotations
alert.Receivers = r.preferredChannels
continue
}
r.Active[h] = a
}
itemsToAdd := []model.RuleStateHistory{}
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
for fp, a := range r.Active {
labelsJSON, err := json.Marshal(a.QueryResultLables)
if err != nil {
r.logger.ErrorContext(ctx, "error marshaling labels", "error", err, "labels", a.Labels)
}
if _, ok := resultFPs[fp]; !ok {
// If the alert was previously firing, keep it around for a given
// retention time so it is reported as resolved to the AlertManager.
if a.State == model.StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > ruletypes.ResolvedRetention) {
delete(r.Active, fp)
}
if a.State != model.StateInactive {
a.State = model.StateInactive
a.ResolvedAt = ts
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: model.StateInactive,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
continue
}
if a.State == model.StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration {
a.State = model.StateFiring
a.FiredAt = ts
state := model.StateFiring
if a.Missing {
state = model.StateNoData
}
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: state,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
}
currentState := r.State()
overallStateChanged := currentState != prevState
for idx, item := range itemsToAdd {
item.OverallStateChanged = overallStateChanged
item.OverallState = currentState
itemsToAdd[idx] = item
}
r.RecordRuleStateHistory(ctx, prevState, currentState, itemsToAdd)
r.health = ruletypes.HealthGood
r.lastError = err
return len(r.Active), nil
}
func (r *ThresholdRule) String() string {
ar := ruletypes.PostableRule{
AlertName: r.name,
RuleCondition: r.ruleCondition,
EvalWindow: ruletypes.Duration(r.evalWindow),
Labels: r.labels.Map(),
Annotations: r.annotations.Map(),
PreferredChannels: r.preferredChannels,
}
byt, err := yaml.Marshal(ar)
if err != nil {
return fmt.Sprintf("error marshaling alerting rule: %s", err.Error())
}
return string(byt)
}
func removeGroupinSetPoints(series v3.Series) []v3.Point {
var result []v3.Point
for _, s := range series.Points {
if s.Timestamp >= 0 && !math.IsNaN(s.Value) && !math.IsInf(s.Value, 0) {
result = append(result, s)
}
}
return result
}