2022-07-14 11:59:06 +05:30
package rules
import (
2024-05-24 21:29:13 +05:30
"bytes"
2022-07-14 11:59:06 +05:30
"context"
2024-02-02 21:16:14 +05:30
"encoding/json"
2022-07-14 11:59:06 +05:30
"fmt"
"math"
2024-06-01 08:22:16 +05:30
"text/template"
2022-07-14 11:59:06 +05:30
"time"
2022-10-06 20:13:30 +05:30
"go.uber.org/zap"
2024-05-01 17:03:46 +05:30
"go.signoz.io/signoz/pkg/query-service/common"
2024-09-17 15:33:17 +05:30
"go.signoz.io/signoz/pkg/query-service/contextlinks"
2024-09-09 13:06:09 +05:30
"go.signoz.io/signoz/pkg/query-service/model"
2024-05-24 12:11:34 +05:30
"go.signoz.io/signoz/pkg/query-service/postprocess"
2023-05-09 19:16:55 +05:30
2024-05-24 12:11:34 +05:30
"go.signoz.io/signoz/pkg/query-service/app/querier"
querierV2 "go.signoz.io/signoz/pkg/query-service/app/querier/v2"
2023-05-09 19:16:55 +05:30
"go.signoz.io/signoz/pkg/query-service/app/queryBuilder"
2022-10-06 20:13:30 +05:30
"go.signoz.io/signoz/pkg/query-service/constants"
2023-08-18 07:32:05 +05:30
"go.signoz.io/signoz/pkg/query-service/interfaces"
2023-05-09 19:16:55 +05:30
v3 "go.signoz.io/signoz/pkg/query-service/model/v3"
2022-10-06 20:13:30 +05:30
"go.signoz.io/signoz/pkg/query-service/utils/labels"
2024-05-24 21:29:13 +05:30
querytemplate "go.signoz.io/signoz/pkg/query-service/utils/queryTemplate"
2022-10-06 20:13:30 +05:30
"go.signoz.io/signoz/pkg/query-service/utils/times"
"go.signoz.io/signoz/pkg/query-service/utils/timestamp"
2022-07-14 11:59:06 +05:30
2023-05-09 19:16:55 +05:30
logsv3 "go.signoz.io/signoz/pkg/query-service/app/logs/v3"
2024-09-17 15:33:17 +05:30
tracesV3 "go.signoz.io/signoz/pkg/query-service/app/traces/v3"
2023-08-16 14:22:40 +05:30
"go.signoz.io/signoz/pkg/query-service/formatter"
2023-05-09 19:16:55 +05:30
2022-07-14 11:59:06 +05:30
yaml "gopkg.in/yaml.v2"
)
type ThresholdRule struct {
2024-09-11 09:56:59 +05:30
* BaseRule
2024-08-09 15:31:39 +05:30
// Ever since we introduced the new metrics query builder, the version is "v4"
// for all the rules
// if the version is "v3", then we use the old querier
// if the version is "v4", then we use the new querierV2
2024-05-24 12:11:34 +05:30
version string
2023-05-09 19:16:55 +05:30
2024-08-09 15:31:39 +05:30
// querier is used for alerts created before the introduction of new metrics query builder
querier interfaces . Querier
// querierV2 is used for alerts created after the introduction of new metrics query builder
2024-05-24 12:11:34 +05:30
querierV2 interfaces . Querier
2024-09-17 15:33:17 +05:30
// used for attribute metadata enrichment for logs and traces
logsKeys map [ string ] v3 . AttributeKey
spansKeys map [ string ] v3 . AttributeKey
2022-07-14 11:59:06 +05:30
}
func NewThresholdRule (
id string ,
2022-08-04 15:31:21 +05:30
p * PostableRule ,
2023-08-18 07:32:05 +05:30
featureFlags interfaces . FeatureLookup ,
2024-05-24 12:11:34 +05:30
reader interfaces . Reader ,
2024-09-12 10:58:07 +05:30
useLogsNewSchema bool ,
2024-09-11 09:56:59 +05:30
opts ... RuleOption ,
2022-07-14 11:59:06 +05:30
) ( * ThresholdRule , error ) {
2024-08-08 17:34:25 +05:30
zap . L ( ) . Info ( "creating new ThresholdRule" , zap . String ( "id" , id ) , zap . Any ( "opts" , opts ) )
2024-09-11 09:56:59 +05:30
baseRule , err := NewBaseRule ( id , p , reader , opts ... )
if err != nil {
return nil , err
2022-07-14 11:59:06 +05:30
}
2022-08-04 15:31:21 +05:30
t := ThresholdRule {
2024-09-24 10:22:52 +05:30
BaseRule : baseRule ,
version : p . Version ,
2022-08-03 15:08:14 +05:30
}
2024-05-24 12:11:34 +05:30
querierOption := querier . QuerierOptions {
2024-09-12 10:58:07 +05:30
Reader : reader ,
Cache : nil ,
KeyGenerator : queryBuilder . NewKeyGenerator ( ) ,
FeatureLookup : featureFlags ,
UseLogsNewSchema : useLogsNewSchema ,
2023-05-09 19:16:55 +05:30
}
2024-05-24 12:11:34 +05:30
querierOptsV2 := querierV2 . QuerierOptions {
2024-09-12 10:58:07 +05:30
Reader : reader ,
Cache : nil ,
KeyGenerator : queryBuilder . NewKeyGenerator ( ) ,
FeatureLookup : featureFlags ,
UseLogsNewSchema : useLogsNewSchema ,
2024-03-01 14:51:50 +05:30
}
2024-05-24 12:11:34 +05:30
t . querier = querier . NewQuerier ( querierOption )
t . querierV2 = querierV2 . NewQuerier ( querierOptsV2 )
2024-08-09 12:11:05 +05:30
t . reader = reader
2022-08-04 15:31:21 +05:30
return & t , nil
2022-07-14 11:59:06 +05:30
}
func ( r * ThresholdRule ) Type ( ) RuleType {
return RuleTypeThreshold
}
2024-09-11 09:56:59 +05:30
func ( r * ThresholdRule ) prepareQueryRange ( ts time . Time ) ( * v3 . QueryRangeParamsV3 , error ) {
2022-07-14 11:59:06 +05:30
2024-08-08 17:34:25 +05:30
zap . L ( ) . Info ( "prepareQueryRange" , zap . Int64 ( "ts" , ts . UnixMilli ( ) ) , zap . Int64 ( "evalWindow" , r . evalWindow . Milliseconds ( ) ) , zap . Int64 ( "evalDelay" , r . evalDelay . Milliseconds ( ) ) )
2024-09-17 15:33:17 +05:30
startTs , endTs := r . Timestamps ( ts )
start , end := startTs . UnixMilli ( ) , endTs . UnixMilli ( )
2023-07-05 10:34:07 +05:30
2023-05-09 19:16:55 +05:30
if r . ruleCondition . QueryType ( ) == v3 . QueryTypeClickHouseSQL {
2024-05-24 21:29:13 +05:30
params := & v3 . QueryRangeParamsV3 {
2024-06-05 19:35:48 +05:30
Start : start ,
End : end ,
Step : int64 ( math . Max ( float64 ( common . MinAllowedStepInterval ( start , end ) ) , 60 ) ) ,
CompositeQuery : & v3 . CompositeQuery {
QueryType : r . ruleCondition . CompositeQuery . QueryType ,
PanelType : r . ruleCondition . CompositeQuery . PanelType ,
BuilderQueries : make ( map [ string ] * v3 . BuilderQuery ) ,
ClickHouseQueries : make ( map [ string ] * v3 . ClickHouseQuery ) ,
PromQueries : make ( map [ string ] * v3 . PromQuery ) ,
Unit : r . ruleCondition . CompositeQuery . Unit ,
} ,
Variables : make ( map [ string ] interface { } , 0 ) ,
NoCache : true ,
2022-11-23 18:49:03 +05:30
}
2024-05-24 21:29:13 +05:30
querytemplate . AssignReservedVarsV3 ( params )
for name , chQuery := range r . ruleCondition . CompositeQuery . ClickHouseQueries {
if chQuery . Disabled {
continue
}
tmpl := template . New ( "clickhouse-query" )
tmpl , err := tmpl . Parse ( chQuery . Query )
if err != nil {
2024-09-11 09:56:59 +05:30
return nil , err
2024-05-24 21:29:13 +05:30
}
var query bytes . Buffer
err = tmpl . Execute ( & query , params . Variables )
if err != nil {
2024-09-11 09:56:59 +05:30
return nil , err
2024-05-24 21:29:13 +05:30
}
2024-06-05 19:35:48 +05:30
params . CompositeQuery . ClickHouseQueries [ name ] = & v3 . ClickHouseQuery {
Query : query . String ( ) ,
Disabled : chQuery . Disabled ,
Legend : chQuery . Legend ,
}
2024-05-24 21:29:13 +05:30
}
2024-09-11 09:56:59 +05:30
return params , nil
2022-11-23 18:49:03 +05:30
}
2022-07-14 11:59:06 +05:30
2023-07-05 10:34:07 +05:30
if r . ruleCondition . CompositeQuery != nil && r . ruleCondition . CompositeQuery . BuilderQueries != nil {
for _ , q := range r . ruleCondition . CompositeQuery . BuilderQueries {
2024-06-12 12:21:27 +05:30
// If the step interval is less than the minimum allowed step interval, set it to the minimum allowed step interval
if minStep := common . MinAllowedStepInterval ( start , end ) ; q . StepInterval < minStep {
q . StepInterval = minStep
}
2023-07-05 10:34:07 +05:30
}
}
2024-06-19 14:19:30 +05:30
if r . ruleCondition . CompositeQuery . PanelType != v3 . PanelTypeGraph {
r . ruleCondition . CompositeQuery . PanelType = v3 . PanelTypeGraph
}
2022-11-23 18:49:03 +05:30
// default mode
2023-05-09 19:16:55 +05:30
return & v3 . QueryRangeParamsV3 {
2023-07-05 10:34:07 +05:30
Start : start ,
End : end ,
2024-05-24 12:11:34 +05:30
Step : int64 ( math . Max ( float64 ( common . MinAllowedStepInterval ( start , end ) ) , 60 ) ) ,
2023-05-09 19:16:55 +05:30
CompositeQuery : r . ruleCondition . CompositeQuery ,
2024-05-24 12:11:34 +05:30
Variables : make ( map [ string ] interface { } , 0 ) ,
NoCache : true ,
2024-09-11 09:56:59 +05:30
} , nil
2022-07-14 11:59:06 +05:30
}
2024-02-02 21:16:14 +05:30
func ( r * ThresholdRule ) prepareLinksToLogs ( ts time . Time , lbls labels . Labels ) string {
selectedQuery := r . GetSelectedQuery ( )
2024-09-17 15:33:17 +05:30
qr , err := r . prepareQueryRange ( ts )
if err != nil {
return ""
}
start := time . UnixMilli ( qr . Start )
end := time . UnixMilli ( qr . End )
2024-02-02 21:16:14 +05:30
// TODO(srikanthccv): handle formula queries
if selectedQuery < "A" || selectedQuery > "Z" {
return ""
}
2024-09-17 15:33:17 +05:30
q := r . ruleCondition . CompositeQuery . BuilderQueries [ selectedQuery ]
if q == nil {
2024-09-11 09:56:59 +05:30
return ""
}
2024-09-17 15:33:17 +05:30
if q . DataSource != v3 . DataSourceLogs {
return ""
2024-02-02 21:16:14 +05:30
}
2024-09-17 15:33:17 +05:30
queryFilter := [ ] v3 . FilterItem { }
if q . Filters != nil {
queryFilter = q . Filters . Items
}
2024-02-02 21:16:14 +05:30
2024-09-17 15:33:17 +05:30
filterItems := contextlinks . PrepareFilters ( lbls . Map ( ) , queryFilter , q . GroupBy , r . logsKeys )
2024-02-02 21:16:14 +05:30
2024-09-17 15:33:17 +05:30
return contextlinks . PrepareLinksToLogs ( start , end , filterItems )
2024-02-02 21:16:14 +05:30
}
func ( r * ThresholdRule ) prepareLinksToTraces ( ts time . Time , lbls labels . Labels ) string {
selectedQuery := r . GetSelectedQuery ( )
2024-09-17 15:33:17 +05:30
qr , err := r . prepareQueryRange ( ts )
if err != nil {
return ""
}
start := time . UnixMilli ( qr . Start )
end := time . UnixMilli ( qr . End )
2024-02-02 21:16:14 +05:30
// TODO(srikanthccv): handle formula queries
if selectedQuery < "A" || selectedQuery > "Z" {
return ""
}
2024-09-17 15:33:17 +05:30
q := r . ruleCondition . CompositeQuery . BuilderQueries [ selectedQuery ]
if q == nil {
2024-09-11 09:56:59 +05:30
return ""
}
2024-09-17 15:33:17 +05:30
if q . DataSource != v3 . DataSourceTraces {
return ""
2024-02-02 21:16:14 +05:30
}
2024-09-17 15:33:17 +05:30
queryFilter := [ ] v3 . FilterItem { }
if q . Filters != nil {
queryFilter = q . Filters . Items
}
2024-02-02 21:16:14 +05:30
2024-09-17 15:33:17 +05:30
filterItems := contextlinks . PrepareFilters ( lbls . Map ( ) , queryFilter , q . GroupBy , r . spansKeys )
2024-02-02 21:16:14 +05:30
2024-09-17 15:33:17 +05:30
return contextlinks . PrepareLinksToTraces ( start , end , filterItems )
2024-02-02 21:16:14 +05:30
}
2023-11-10 17:43:19 +05:30
func ( r * ThresholdRule ) GetSelectedQuery ( ) string {
2024-09-17 15:33:17 +05:30
return r . ruleCondition . GetSelectedQueryName ( )
2023-11-10 17:43:19 +05:30
}
2024-09-11 09:56:59 +05:30
func ( r * ThresholdRule ) buildAndRunQuery ( ctx context . Context , ts time . Time ) ( Vector , error ) {
2022-07-14 11:59:06 +05:30
2024-09-11 09:56:59 +05:30
params , err := r . prepareQueryRange ( ts )
if err != nil {
return nil , err
}
2024-09-24 10:22:52 +05:30
err = r . PopulateTemporality ( ctx , params )
2024-05-24 12:11:34 +05:30
if err != nil {
return nil , fmt . Errorf ( "internal error while setting temporality" )
}
2022-11-23 18:49:03 +05:30
2024-05-24 12:11:34 +05:30
if params . CompositeQuery . QueryType == v3 . QueryTypeBuilder {
2024-09-17 15:33:17 +05:30
hasLogsQuery := false
hasTracesQuery := false
for _ , query := range params . CompositeQuery . BuilderQueries {
if query . DataSource == v3 . DataSourceLogs {
hasLogsQuery = true
}
if query . DataSource == v3 . DataSourceTraces {
hasTracesQuery = true
}
}
if hasLogsQuery {
// check if any enrichment is required for logs if yes then enrich them
if logsv3 . EnrichmentRequired ( params ) {
logsFields , err := r . reader . GetLogFields ( ctx )
if err != nil {
return nil , err
}
logsKeys := model . GetLogFieldsV3 ( ctx , params , logsFields )
r . logsKeys = logsKeys
logsv3 . Enrich ( params , logsKeys )
}
}
if hasTracesQuery {
spanKeys , err := r . reader . GetSpanAttributeKeys ( ctx )
if err != nil {
return nil , err
}
r . spansKeys = spanKeys
tracesV3 . Enrich ( params , spanKeys )
2022-11-23 18:49:03 +05:30
}
2024-05-24 12:11:34 +05:30
}
2022-11-23 18:49:03 +05:30
2024-05-24 12:11:34 +05:30
var results [ ] * v3 . Result
2024-09-11 09:56:59 +05:30
var queryErrors map [ string ] error
2022-11-23 18:49:03 +05:30
2024-05-24 12:11:34 +05:30
if r . version == "v4" {
2024-09-13 16:43:56 +05:30
results , queryErrors , err = r . querierV2 . QueryRange ( ctx , params )
2024-05-24 12:11:34 +05:30
} else {
2024-09-13 16:43:56 +05:30
results , queryErrors , err = r . querier . QueryRange ( ctx , params )
2024-05-24 12:11:34 +05:30
}
if err != nil {
2024-09-11 09:56:59 +05:30
zap . L ( ) . Error ( "failed to get alert query result" , zap . String ( "rule" , r . Name ( ) ) , zap . Error ( err ) , zap . Any ( "errors" , queryErrors ) )
2024-05-24 12:11:34 +05:30
return nil , fmt . Errorf ( "internal error while querying" )
}
2022-11-23 18:49:03 +05:30
2024-05-24 12:11:34 +05:30
if params . CompositeQuery . QueryType == v3 . QueryTypeBuilder {
results , err = postprocess . PostProcessResult ( results , params )
2022-11-23 18:49:03 +05:30
if err != nil {
2024-05-24 12:11:34 +05:30
zap . L ( ) . Error ( "failed to post process result" , zap . String ( "rule" , r . Name ( ) ) , zap . Error ( err ) )
return nil , fmt . Errorf ( "internal error while post processing" )
2022-11-23 18:49:03 +05:30
}
2022-07-14 11:59:06 +05:30
}
2024-05-24 12:11:34 +05:30
selectedQuery := r . GetSelectedQuery ( )
var queryResult * v3 . Result
for _ , res := range results {
if res . QueryName == selectedQuery {
queryResult = res
break
}
2022-07-14 11:59:06 +05:30
}
2024-05-24 12:11:34 +05:30
if queryResult != nil && len ( queryResult . Series ) > 0 {
r . lastTimestampWithDatapoints = time . Now ( )
}
2022-07-14 11:59:06 +05:30
2024-05-24 12:11:34 +05:30
var resultVector Vector
2022-07-14 11:59:06 +05:30
2024-05-24 12:11:34 +05:30
// if the data is missing for `For` duration then we should send alert
if r . ruleCondition . AlertOnAbsent && r . lastTimestampWithDatapoints . Add ( time . Duration ( r . Condition ( ) . AbsentFor ) * time . Minute ) . Before ( time . Now ( ) ) {
zap . L ( ) . Info ( "no data found for rule condition" , zap . String ( "ruleid" , r . ID ( ) ) )
lbls := labels . NewBuilder ( labels . Labels { } )
if ! r . lastTimestampWithDatapoints . IsZero ( ) {
lbls . Set ( "lastSeen" , r . lastTimestampWithDatapoints . Format ( constants . AlertTimeFormat ) )
}
resultVector = append ( resultVector , Sample {
Metric : lbls . Labels ( ) ,
IsMissing : true ,
} )
return resultVector , nil
2022-07-14 11:59:06 +05:30
}
2024-05-24 12:11:34 +05:30
for _ , series := range queryResult . Series {
2024-09-24 10:22:52 +05:30
smpl , shouldAlert := r . ShouldAlert ( * series )
2024-05-24 12:11:34 +05:30
if shouldAlert {
resultVector = append ( resultVector , smpl )
}
}
return resultVector , nil
2022-07-14 11:59:06 +05:30
}
2024-09-11 09:56:59 +05:30
func ( r * ThresholdRule ) Eval ( ctx context . Context , ts time . Time ) ( interface { } , error ) {
2022-07-14 11:59:06 +05:30
2024-08-09 12:11:05 +05:30
prevState := r . State ( )
2023-08-16 14:22:40 +05:30
valueFormatter := formatter . FromUnit ( r . Unit ( ) )
2024-09-11 09:56:59 +05:30
res , err := r . buildAndRunQuery ( ctx , ts )
2022-07-14 11:59:06 +05:30
if err != nil {
return nil , err
}
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
resultFPs := map [ uint64 ] struct { } { }
var alerts = make ( map [ uint64 ] * Alert , len ( res ) )
for _ , smpl := range res {
l := make ( map [ string ] string , len ( smpl . Metric ) )
for _ , lbl := range smpl . Metric {
l [ lbl . Name ] = lbl . Value
}
2023-08-16 14:22:40 +05:30
value := valueFormatter . Format ( smpl . V , r . Unit ( ) )
2024-07-01 18:34:02 +05:30
threshold := valueFormatter . Format ( r . targetVal ( ) , r . Unit ( ) )
2024-03-27 00:07:29 +05:30
zap . L ( ) . Debug ( "Alert template data for rule" , zap . String ( "name" , r . Name ( ) ) , zap . String ( "formatter" , valueFormatter . Name ( ) ) , zap . String ( "value" , value ) , zap . String ( "threshold" , threshold ) )
2023-08-16 14:22:40 +05:30
tmplData := AlertTemplateData ( l , value , threshold )
2022-07-14 11:59:06 +05:30
// Inject some convenience variables that are easier to remember for users
// who are not used to Go's templating system.
2023-02-10 23:53:45 +05:30
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
2022-07-14 11:59:06 +05:30
2024-02-11 22:31:46 +05:30
// utility function to apply go template on labels and annotations
2022-07-14 11:59:06 +05:30
expand := func ( text string ) string {
tmpl := NewTemplateExpander (
ctx ,
defs + text ,
"__alert_" + r . Name ( ) ,
tmplData ,
times . Time ( timestamp . FromTime ( ts ) ) ,
nil ,
)
result , err := tmpl . Expand ( )
if err != nil {
result = fmt . Sprintf ( "<error expanding template: %s>" , err )
2024-03-27 00:07:29 +05:30
zap . L ( ) . Error ( "Expanding alert template failed" , zap . Error ( err ) , zap . Any ( "data" , tmplData ) )
2022-07-14 11:59:06 +05:30
}
return result
}
2024-05-27 13:19:28 +05:30
lb := labels . NewBuilder ( smpl . Metric ) . Del ( labels . MetricNameLabel ) . Del ( labels . TemporalityLabel )
2024-08-23 21:13:00 +05:30
resultLabels := labels . NewBuilder ( smpl . MetricOrig ) . Del ( labels . MetricNameLabel ) . Del ( labels . TemporalityLabel ) . Labels ( )
2022-07-14 11:59:06 +05:30
2024-09-11 09:56:59 +05:30
for name , value := range r . labels . Map ( ) {
lb . Set ( name , expand ( value ) )
2022-07-14 11:59:06 +05:30
}
lb . Set ( labels . AlertNameLabel , r . Name ( ) )
lb . Set ( labels . AlertRuleIdLabel , r . ID ( ) )
lb . Set ( labels . RuleSourceLabel , r . GeneratorURL ( ) )
2024-09-11 09:56:59 +05:30
annotations := make ( labels . Labels , 0 , len ( r . annotations . Map ( ) ) )
for name , value := range r . annotations . Map ( ) {
2024-09-24 10:22:52 +05:30
annotations = append ( annotations , labels . Label { Name : common . NormalizeLabelName ( name ) , Value : expand ( value ) } )
2024-02-20 10:42:30 +05:30
}
2024-08-09 15:31:39 +05:30
if smpl . IsMissing {
lb . Set ( labels . AlertNameLabel , "[No data] " + r . Name ( ) )
}
2024-02-20 10:42:30 +05:30
// Links with timestamps should go in annotations since labels
// is used alert grouping, and we want to group alerts with the same
// label set, but different timestamps, together.
2024-08-30 10:34:11 +05:30
if r . typ == AlertTypeTraces {
2024-02-20 10:42:30 +05:30
link := r . prepareLinksToTraces ( ts , smpl . MetricOrig )
2024-02-02 21:16:14 +05:30
if link != "" && r . hostFromSource ( ) != "" {
2024-09-17 15:33:17 +05:30
zap . L ( ) . Info ( "adding traces link to annotations" , zap . String ( "link" , fmt . Sprintf ( "%s/traces-explorer?%s" , r . hostFromSource ( ) , link ) ) )
2024-02-20 10:42:30 +05:30
annotations = append ( annotations , labels . Label { Name : "related_traces" , Value : fmt . Sprintf ( "%s/traces-explorer?%s" , r . hostFromSource ( ) , link ) } )
2024-02-02 21:16:14 +05:30
}
2024-08-30 10:34:11 +05:30
} else if r . typ == AlertTypeLogs {
2024-02-20 10:42:30 +05:30
link := r . prepareLinksToLogs ( ts , smpl . MetricOrig )
2024-02-02 21:16:14 +05:30
if link != "" && r . hostFromSource ( ) != "" {
2024-09-17 15:33:17 +05:30
zap . L ( ) . Info ( "adding logs link to annotations" , zap . String ( "link" , fmt . Sprintf ( "%s/logs/logs-explorer?%s" , r . hostFromSource ( ) , link ) ) )
2024-02-20 10:42:30 +05:30
annotations = append ( annotations , labels . Label { Name : "related_logs" , Value : fmt . Sprintf ( "%s/logs/logs-explorer?%s" , r . hostFromSource ( ) , link ) } )
2024-02-02 21:16:14 +05:30
}
}
2022-07-14 11:59:06 +05:30
lbs := lb . Labels ( )
h := lbs . Hash ( )
resultFPs [ h ] = struct { } { }
if _ , ok := alerts [ h ] ; ok {
2024-03-27 00:07:29 +05:30
zap . L ( ) . Error ( "the alert query returns duplicate records" , zap . String ( "ruleid" , r . ID ( ) ) , zap . Any ( "alert" , alerts [ h ] ) )
2022-07-14 11:59:06 +05:30
err = fmt . Errorf ( "duplicate alert found, vector contains metrics with the same labelset after applying alert labels" )
return nil , err
}
alerts [ h ] = & Alert {
2024-08-23 21:13:00 +05:30
Labels : lbs ,
QueryResultLables : resultLabels ,
Annotations : annotations ,
ActiveAt : ts ,
2024-09-09 13:06:09 +05:30
State : model . StatePending ,
2024-08-23 21:13:00 +05:30
Value : smpl . V ,
GeneratorURL : r . GeneratorURL ( ) ,
Receivers : r . preferredChannels ,
Missing : smpl . IsMissing ,
2022-07-14 11:59:06 +05:30
}
}
2024-09-11 09:56:59 +05:30
zap . L ( ) . Info ( "number of alerts found" , zap . String ( "name" , r . Name ( ) ) , zap . Int ( "count" , len ( alerts ) ) )
2022-07-14 11:59:06 +05:30
// alerts[h] is ready, add or update active list now
for h , a := range alerts {
// Check whether we already have alerting state for the identifying label set.
// Update the last value and annotations if so, create a new alert entry otherwise.
2024-09-24 10:22:52 +05:30
if alert , ok := r . Active [ h ] ; ok && alert . State != model . StateInactive {
2022-07-14 11:59:06 +05:30
alert . Value = a . Value
alert . Annotations = a . Annotations
2022-08-04 15:31:21 +05:30
alert . Receivers = r . preferredChannels
2022-07-14 11:59:06 +05:30
continue
}
2024-09-24 10:22:52 +05:30
r . Active [ h ] = a
2022-07-14 11:59:06 +05:30
}
2024-09-13 18:10:49 +05:30
itemsToAdd := [ ] model . RuleStateHistory { }
2024-08-09 12:11:05 +05:30
2022-07-14 11:59:06 +05:30
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
2024-09-24 10:22:52 +05:30
for fp , a := range r . Active {
2024-08-23 21:13:00 +05:30
labelsJSON , err := json . Marshal ( a . QueryResultLables )
2024-08-09 12:11:05 +05:30
if err != nil {
zap . L ( ) . Error ( "error marshaling labels" , zap . Error ( err ) , zap . Any ( "labels" , a . Labels ) )
}
2022-07-14 11:59:06 +05:30
if _ , ok := resultFPs [ fp ] ; ! ok {
// If the alert was previously firing, keep it around for a given
// retention time so it is reported as resolved to the AlertManager.
2024-09-24 10:22:52 +05:30
if a . State == model . StatePending || ( ! a . ResolvedAt . IsZero ( ) && ts . Sub ( a . ResolvedAt ) > ResolvedRetention ) {
delete ( r . Active , fp )
2022-07-14 11:59:06 +05:30
}
2024-09-09 13:06:09 +05:30
if a . State != model . StateInactive {
a . State = model . StateInactive
2022-07-14 11:59:06 +05:30
a . ResolvedAt = ts
2024-09-13 18:10:49 +05:30
itemsToAdd = append ( itemsToAdd , model . RuleStateHistory {
2024-08-09 12:11:05 +05:30
RuleID : r . ID ( ) ,
RuleName : r . Name ( ) ,
2024-09-09 13:06:09 +05:30
State : model . StateInactive ,
2024-08-09 12:11:05 +05:30
StateChanged : true ,
UnixMilli : ts . UnixMilli ( ) ,
2024-09-13 18:10:49 +05:30
Labels : model . LabelsString ( labelsJSON ) ,
2024-08-23 21:13:00 +05:30
Fingerprint : a . QueryResultLables . Hash ( ) ,
2024-09-09 13:06:09 +05:30
Value : a . Value ,
2024-08-09 12:11:05 +05:30
} )
2022-07-14 11:59:06 +05:30
}
continue
}
2024-09-09 13:06:09 +05:30
if a . State == model . StatePending && ts . Sub ( a . ActiveAt ) >= r . holdDuration {
a . State = model . StateFiring
2022-07-14 11:59:06 +05:30
a . FiredAt = ts
2024-09-09 13:06:09 +05:30
state := model . StateFiring
2024-08-09 12:11:05 +05:30
if a . Missing {
2024-09-09 13:06:09 +05:30
state = model . StateNoData
2024-08-09 12:11:05 +05:30
}
2024-09-13 18:10:49 +05:30
itemsToAdd = append ( itemsToAdd , model . RuleStateHistory {
2024-08-09 12:11:05 +05:30
RuleID : r . ID ( ) ,
RuleName : r . Name ( ) ,
State : state ,
StateChanged : true ,
UnixMilli : ts . UnixMilli ( ) ,
2024-09-13 18:10:49 +05:30
Labels : model . LabelsString ( labelsJSON ) ,
2024-08-23 21:13:00 +05:30
Fingerprint : a . QueryResultLables . Hash ( ) ,
2024-08-09 12:11:05 +05:30
Value : a . Value ,
} )
}
}
currentState := r . State ( )
2024-09-09 13:06:09 +05:30
overallStateChanged := currentState != prevState
for idx , item := range itemsToAdd {
item . OverallStateChanged = overallStateChanged
item . OverallState = currentState
itemsToAdd [ idx ] = item
2024-08-09 12:11:05 +05:30
}
2022-07-14 11:59:06 +05:30
2024-09-11 09:56:59 +05:30
r . RecordRuleStateHistory ( ctx , prevState , currentState , itemsToAdd )
2024-09-09 13:06:09 +05:30
2022-07-14 11:59:06 +05:30
r . health = HealthGood
r . lastError = err
2024-09-24 10:22:52 +05:30
return len ( r . Active ) , nil
2022-07-14 11:59:06 +05:30
}
func ( r * ThresholdRule ) String ( ) string {
ar := PostableRule {
2024-03-27 20:25:18 +05:30
AlertName : r . name ,
2022-08-04 15:31:21 +05:30
RuleCondition : r . ruleCondition ,
EvalWindow : Duration ( r . evalWindow ) ,
Labels : r . labels . Map ( ) ,
Annotations : r . annotations . Map ( ) ,
PreferredChannels : r . preferredChannels ,
2022-07-14 11:59:06 +05:30
}
byt , err := yaml . Marshal ( ar )
if err != nil {
return fmt . Sprintf ( "error marshaling alerting rule: %s" , err . Error ( ) )
}
return string ( byt )
}
2024-05-24 12:11:34 +05:30
func removeGroupinSetPoints ( series v3 . Series ) [ ] v3 . Point {
var result [ ] v3 . Point
for _ , s := range series . Points {
2024-06-05 19:33:45 +05:30
if s . Timestamp >= 0 && ! math . IsNaN ( s . Value ) && ! math . IsInf ( s . Value , 0 ) {
2024-05-24 12:11:34 +05:30
result = append ( result , s )
}
}
return result
}