2022-07-14 11:59:06 +05:30
package rules
import (
2024-05-24 21:29:13 +05:30
"bytes"
2022-07-14 11:59:06 +05:30
"context"
2024-02-02 21:16:14 +05:30
"encoding/json"
2022-07-14 11:59:06 +05:30
"fmt"
"math"
2024-02-02 21:16:14 +05:30
"net/url"
2023-12-28 20:22:42 +05:30
"regexp"
2022-07-14 11:59:06 +05:30
"sort"
2024-06-01 08:22:16 +05:30
"text/template"
2022-07-14 11:59:06 +05:30
"time"
2023-12-28 20:22:42 +05:30
"unicode"
2022-07-14 11:59:06 +05:30
2022-10-06 20:13:30 +05:30
"go.uber.org/zap"
2024-05-01 17:03:46 +05:30
"go.signoz.io/signoz/pkg/query-service/common"
2024-09-09 13:06:09 +05:30
"go.signoz.io/signoz/pkg/query-service/model"
2024-05-24 12:11:34 +05:30
"go.signoz.io/signoz/pkg/query-service/postprocess"
2023-05-09 19:16:55 +05:30
2024-05-24 12:11:34 +05:30
"go.signoz.io/signoz/pkg/query-service/app/querier"
querierV2 "go.signoz.io/signoz/pkg/query-service/app/querier/v2"
2023-05-09 19:16:55 +05:30
"go.signoz.io/signoz/pkg/query-service/app/queryBuilder"
2022-10-06 20:13:30 +05:30
"go.signoz.io/signoz/pkg/query-service/constants"
2023-08-18 07:32:05 +05:30
"go.signoz.io/signoz/pkg/query-service/interfaces"
2023-05-09 19:16:55 +05:30
v3 "go.signoz.io/signoz/pkg/query-service/model/v3"
2022-10-06 20:13:30 +05:30
"go.signoz.io/signoz/pkg/query-service/utils/labels"
2024-05-24 21:29:13 +05:30
querytemplate "go.signoz.io/signoz/pkg/query-service/utils/queryTemplate"
2022-10-06 20:13:30 +05:30
"go.signoz.io/signoz/pkg/query-service/utils/times"
"go.signoz.io/signoz/pkg/query-service/utils/timestamp"
2022-07-14 11:59:06 +05:30
2023-05-09 19:16:55 +05:30
logsv3 "go.signoz.io/signoz/pkg/query-service/app/logs/v3"
2023-08-16 14:22:40 +05:30
"go.signoz.io/signoz/pkg/query-service/formatter"
2023-05-09 19:16:55 +05:30
2022-07-14 11:59:06 +05:30
yaml "gopkg.in/yaml.v2"
)
type ThresholdRule struct {
2024-09-11 09:56:59 +05:30
* BaseRule
2024-08-09 15:31:39 +05:30
// Ever since we introduced the new metrics query builder, the version is "v4"
// for all the rules
// if the version is "v3", then we use the old querier
// if the version is "v4", then we use the new querierV2
2024-05-24 12:11:34 +05:30
version string
2024-03-01 14:51:50 +05:30
// temporalityMap is a map of metric name to temporality
// to avoid fetching temporality for the same metric multiple times
// querying the v4 table on low cardinal temporality column
// should be fast but we can still avoid the query if we have the data in memory
temporalityMap map [ string ] map [ v3 . Temporality ] bool
2023-05-09 19:16:55 +05:30
2024-08-09 15:31:39 +05:30
// querier is used for alerts created before the introduction of new metrics query builder
querier interfaces . Querier
// querierV2 is used for alerts created after the introduction of new metrics query builder
2024-05-24 12:11:34 +05:30
querierV2 interfaces . Querier
2022-07-14 11:59:06 +05:30
}
func NewThresholdRule (
id string ,
2022-08-04 15:31:21 +05:30
p * PostableRule ,
2023-08-18 07:32:05 +05:30
featureFlags interfaces . FeatureLookup ,
2024-05-24 12:11:34 +05:30
reader interfaces . Reader ,
2024-09-11 09:56:59 +05:30
opts ... RuleOption ,
2022-07-14 11:59:06 +05:30
) ( * ThresholdRule , error ) {
2024-08-08 17:34:25 +05:30
zap . L ( ) . Info ( "creating new ThresholdRule" , zap . String ( "id" , id ) , zap . Any ( "opts" , opts ) )
2024-09-11 09:56:59 +05:30
baseRule , err := NewBaseRule ( id , p , reader , opts ... )
if err != nil {
return nil , err
2022-07-14 11:59:06 +05:30
}
2022-08-04 15:31:21 +05:30
t := ThresholdRule {
2024-09-11 09:56:59 +05:30
BaseRule : baseRule ,
version : p . Version ,
temporalityMap : make ( map [ string ] map [ v3 . Temporality ] bool ) ,
2022-08-03 15:08:14 +05:30
}
2024-05-24 12:11:34 +05:30
querierOption := querier . QuerierOptions {
Reader : reader ,
Cache : nil ,
KeyGenerator : queryBuilder . NewKeyGenerator ( ) ,
FeatureLookup : featureFlags ,
2023-05-09 19:16:55 +05:30
}
2024-05-24 12:11:34 +05:30
querierOptsV2 := querierV2 . QuerierOptions {
Reader : reader ,
Cache : nil ,
KeyGenerator : queryBuilder . NewKeyGenerator ( ) ,
FeatureLookup : featureFlags ,
2024-03-01 14:51:50 +05:30
}
2024-05-24 12:11:34 +05:30
t . querier = querier . NewQuerier ( querierOption )
t . querierV2 = querierV2 . NewQuerier ( querierOptsV2 )
2024-08-09 12:11:05 +05:30
t . reader = reader
2022-08-04 15:31:21 +05:30
return & t , nil
2022-07-14 11:59:06 +05:30
}
func ( r * ThresholdRule ) Type ( ) RuleType {
return RuleTypeThreshold
}
2024-03-01 14:51:50 +05:30
// populateTemporality same as addTemporality but for v4 and better
2024-09-11 09:56:59 +05:30
func ( r * ThresholdRule ) populateTemporality ( ctx context . Context , qp * v3 . QueryRangeParamsV3 ) error {
2024-03-01 14:51:50 +05:30
missingTemporality := make ( [ ] string , 0 )
metricNameToTemporality := make ( map [ string ] map [ v3 . Temporality ] bool )
if qp . CompositeQuery != nil && len ( qp . CompositeQuery . BuilderQueries ) > 0 {
for _ , query := range qp . CompositeQuery . BuilderQueries {
// if there is no temporality specified in the query but we have it in the map
// then use the value from the map
if query . Temporality == "" && r . temporalityMap [ query . AggregateAttribute . Key ] != nil {
// We prefer delta if it is available
if r . temporalityMap [ query . AggregateAttribute . Key ] [ v3 . Delta ] {
query . Temporality = v3 . Delta
} else if r . temporalityMap [ query . AggregateAttribute . Key ] [ v3 . Cumulative ] {
query . Temporality = v3 . Cumulative
} else {
query . Temporality = v3 . Unspecified
}
}
// we don't have temporality for this metric
if query . DataSource == v3 . DataSourceMetrics && query . Temporality == "" {
missingTemporality = append ( missingTemporality , query . AggregateAttribute . Key )
}
if _ , ok := metricNameToTemporality [ query . AggregateAttribute . Key ] ; ! ok {
metricNameToTemporality [ query . AggregateAttribute . Key ] = make ( map [ v3 . Temporality ] bool )
}
}
}
2024-08-09 12:34:40 +05:30
var nameToTemporality map [ string ] map [ v3 . Temporality ] bool
var err error
if len ( missingTemporality ) > 0 {
2024-09-11 09:56:59 +05:30
nameToTemporality , err = r . reader . FetchTemporality ( ctx , missingTemporality )
2024-08-09 12:34:40 +05:30
if err != nil {
return err
}
2024-03-01 14:51:50 +05:30
}
if qp . CompositeQuery != nil && len ( qp . CompositeQuery . BuilderQueries ) > 0 {
for name := range qp . CompositeQuery . BuilderQueries {
query := qp . CompositeQuery . BuilderQueries [ name ]
if query . DataSource == v3 . DataSourceMetrics && query . Temporality == "" {
if nameToTemporality [ query . AggregateAttribute . Key ] [ v3 . Delta ] {
query . Temporality = v3 . Delta
} else if nameToTemporality [ query . AggregateAttribute . Key ] [ v3 . Cumulative ] {
query . Temporality = v3 . Cumulative
} else {
query . Temporality = v3 . Unspecified
}
r . temporalityMap [ query . AggregateAttribute . Key ] = nameToTemporality [ query . AggregateAttribute . Key ]
}
}
}
return nil
}
2024-09-11 09:56:59 +05:30
func ( r * ThresholdRule ) prepareQueryRange ( ts time . Time ) ( * v3 . QueryRangeParamsV3 , error ) {
2022-07-14 11:59:06 +05:30
2024-08-08 17:34:25 +05:30
zap . L ( ) . Info ( "prepareQueryRange" , zap . Int64 ( "ts" , ts . UnixMilli ( ) ) , zap . Int64 ( "evalWindow" , r . evalWindow . Milliseconds ( ) ) , zap . Int64 ( "evalDelay" , r . evalDelay . Milliseconds ( ) ) )
start := ts . Add ( - time . Duration ( r . evalWindow ) ) . UnixMilli ( )
end := ts . UnixMilli ( )
2024-09-11 09:56:59 +05:30
2024-08-08 17:34:25 +05:30
if r . evalDelay > 0 {
start = start - int64 ( r . evalDelay . Milliseconds ( ) )
end = end - int64 ( r . evalDelay . Milliseconds ( ) )
}
2023-07-05 10:34:07 +05:30
// round to minute otherwise we could potentially miss data
start = start - ( start % ( 60 * 1000 ) )
end = end - ( end % ( 60 * 1000 ) )
2023-05-09 19:16:55 +05:30
if r . ruleCondition . QueryType ( ) == v3 . QueryTypeClickHouseSQL {
2024-05-24 21:29:13 +05:30
params := & v3 . QueryRangeParamsV3 {
2024-06-05 19:35:48 +05:30
Start : start ,
End : end ,
Step : int64 ( math . Max ( float64 ( common . MinAllowedStepInterval ( start , end ) ) , 60 ) ) ,
CompositeQuery : & v3 . CompositeQuery {
QueryType : r . ruleCondition . CompositeQuery . QueryType ,
PanelType : r . ruleCondition . CompositeQuery . PanelType ,
BuilderQueries : make ( map [ string ] * v3 . BuilderQuery ) ,
ClickHouseQueries : make ( map [ string ] * v3 . ClickHouseQuery ) ,
PromQueries : make ( map [ string ] * v3 . PromQuery ) ,
Unit : r . ruleCondition . CompositeQuery . Unit ,
} ,
Variables : make ( map [ string ] interface { } , 0 ) ,
NoCache : true ,
2022-11-23 18:49:03 +05:30
}
2024-05-24 21:29:13 +05:30
querytemplate . AssignReservedVarsV3 ( params )
for name , chQuery := range r . ruleCondition . CompositeQuery . ClickHouseQueries {
if chQuery . Disabled {
continue
}
tmpl := template . New ( "clickhouse-query" )
tmpl , err := tmpl . Parse ( chQuery . Query )
if err != nil {
2024-09-11 09:56:59 +05:30
return nil , err
2024-05-24 21:29:13 +05:30
}
var query bytes . Buffer
err = tmpl . Execute ( & query , params . Variables )
if err != nil {
2024-09-11 09:56:59 +05:30
return nil , err
2024-05-24 21:29:13 +05:30
}
2024-06-05 19:35:48 +05:30
params . CompositeQuery . ClickHouseQueries [ name ] = & v3 . ClickHouseQuery {
Query : query . String ( ) ,
Disabled : chQuery . Disabled ,
Legend : chQuery . Legend ,
}
2024-05-24 21:29:13 +05:30
}
2024-09-11 09:56:59 +05:30
return params , nil
2022-11-23 18:49:03 +05:30
}
2022-07-14 11:59:06 +05:30
2023-07-05 10:34:07 +05:30
if r . ruleCondition . CompositeQuery != nil && r . ruleCondition . CompositeQuery . BuilderQueries != nil {
for _ , q := range r . ruleCondition . CompositeQuery . BuilderQueries {
2024-06-12 12:21:27 +05:30
// If the step interval is less than the minimum allowed step interval, set it to the minimum allowed step interval
if minStep := common . MinAllowedStepInterval ( start , end ) ; q . StepInterval < minStep {
q . StepInterval = minStep
}
2023-07-05 10:34:07 +05:30
}
}
2024-06-19 14:19:30 +05:30
if r . ruleCondition . CompositeQuery . PanelType != v3 . PanelTypeGraph {
r . ruleCondition . CompositeQuery . PanelType = v3 . PanelTypeGraph
}
2022-11-23 18:49:03 +05:30
// default mode
2023-05-09 19:16:55 +05:30
return & v3 . QueryRangeParamsV3 {
2023-07-05 10:34:07 +05:30
Start : start ,
End : end ,
2024-05-24 12:11:34 +05:30
Step : int64 ( math . Max ( float64 ( common . MinAllowedStepInterval ( start , end ) ) , 60 ) ) ,
2023-05-09 19:16:55 +05:30
CompositeQuery : r . ruleCondition . CompositeQuery ,
2024-05-24 12:11:34 +05:30
Variables : make ( map [ string ] interface { } , 0 ) ,
NoCache : true ,
2024-09-11 09:56:59 +05:30
} , nil
2022-07-14 11:59:06 +05:30
}
2024-02-02 21:16:14 +05:30
// The following function is used to prepare the where clause for the query
// `lbls` contains the key value pairs of the labels from the result of the query
// We iterate over the where clause and replace the labels with the actual values
// There are two cases:
// 1. The label is present in the where clause
// 2. The label is not present in the where clause
//
// Example for case 2:
// Latency by serviceName without any filter
// In this case, for each service with latency > threshold we send a notification
// The expectation will be that clicking on the related traces for service A, will
// take us to the traces page with the filter serviceName=A
// So for all the missing labels in the where clause, we add them as key = value
//
// Example for case 1:
// Severity text IN (WARN, ERROR)
// In this case, the Severity text will appear in the `lbls` if it were part of the group
// by clause, in which case we replace it with the actual value for the notification
// i.e Severity text = WARN
// If the Severity text is not part of the group by clause, then we add it as it is
func ( r * ThresholdRule ) fetchFilters ( selectedQuery string , lbls labels . Labels ) [ ] v3 . FilterItem {
var filterItems [ ] v3 . FilterItem
added := make ( map [ string ] struct { } )
if r . ruleCondition . CompositeQuery . QueryType == v3 . QueryTypeBuilder &&
r . ruleCondition . CompositeQuery . BuilderQueries [ selectedQuery ] != nil &&
r . ruleCondition . CompositeQuery . BuilderQueries [ selectedQuery ] . Filters != nil {
for _ , item := range r . ruleCondition . CompositeQuery . BuilderQueries [ selectedQuery ] . Filters . Items {
exists := false
for _ , label := range lbls {
if item . Key . Key == label . Name {
// if the label is present in the where clause, replace it with key = value
filterItems = append ( filterItems , v3 . FilterItem {
Key : item . Key ,
Operator : v3 . FilterOperatorEqual ,
Value : label . Value ,
} )
exists = true
added [ label . Name ] = struct { } { }
break
}
}
if ! exists {
// if the label is not present in the where clause, add it as it is
filterItems = append ( filterItems , item )
}
}
}
// add the labels which are not present in the where clause
for _ , label := range lbls {
if _ , ok := added [ label . Name ] ; ! ok {
filterItems = append ( filterItems , v3 . FilterItem {
Key : v3 . AttributeKey { Key : label . Name } ,
Operator : v3 . FilterOperatorEqual ,
Value : label . Value ,
} )
}
}
return filterItems
}
func ( r * ThresholdRule ) prepareLinksToLogs ( ts time . Time , lbls labels . Labels ) string {
selectedQuery := r . GetSelectedQuery ( )
// TODO(srikanthccv): handle formula queries
if selectedQuery < "A" || selectedQuery > "Z" {
return ""
}
2024-09-11 09:56:59 +05:30
q , err := r . prepareQueryRange ( ts )
if err != nil {
return ""
}
2024-02-02 21:16:14 +05:30
// Logs list view expects time in milliseconds
2024-08-23 21:13:00 +05:30
tr := v3 . URLShareableTimeRange {
2024-03-12 17:30:01 +05:30
Start : q . Start ,
End : q . End ,
2024-02-02 21:16:14 +05:30
PageSize : 100 ,
}
2024-08-23 21:13:00 +05:30
options := v3 . URLShareableOptions {
2024-02-02 21:16:14 +05:30
MaxLines : 2 ,
Format : "list" ,
SelectColumns : [ ] v3 . AttributeKey { } ,
}
period , _ := json . Marshal ( tr )
urlEncodedTimeRange := url . QueryEscape ( string ( period ) )
filterItems := r . fetchFilters ( selectedQuery , lbls )
2024-08-23 21:13:00 +05:30
urlData := v3 . URLShareableCompositeQuery {
2024-02-02 21:16:14 +05:30
QueryType : string ( v3 . QueryTypeBuilder ) ,
2024-08-23 21:13:00 +05:30
Builder : v3 . URLShareableBuilderQuery {
2024-02-02 21:16:14 +05:30
QueryData : [ ] v3 . BuilderQuery {
{
DataSource : v3 . DataSourceLogs ,
QueryName : "A" ,
AggregateOperator : v3 . AggregateOperatorNoOp ,
AggregateAttribute : v3 . AttributeKey { } ,
Filters : & v3 . FilterSet {
Items : filterItems ,
Operator : "AND" ,
} ,
Expression : "A" ,
Disabled : false ,
Having : [ ] v3 . Having { } ,
StepInterval : 60 ,
OrderBy : [ ] v3 . OrderBy {
{
ColumnName : "timestamp" ,
Order : "desc" ,
} ,
} ,
} ,
} ,
QueryFormulas : make ( [ ] string , 0 ) ,
} ,
}
data , _ := json . Marshal ( urlData )
2024-08-28 14:18:15 +05:30
compositeQuery := url . QueryEscape ( url . QueryEscape ( string ( data ) ) )
2024-02-02 21:16:14 +05:30
optionsData , _ := json . Marshal ( options )
urlEncodedOptions := url . QueryEscape ( string ( optionsData ) )
return fmt . Sprintf ( "compositeQuery=%s&timeRange=%s&startTime=%d&endTime=%d&options=%s" , compositeQuery , urlEncodedTimeRange , tr . Start , tr . End , urlEncodedOptions )
}
func ( r * ThresholdRule ) prepareLinksToTraces ( ts time . Time , lbls labels . Labels ) string {
selectedQuery := r . GetSelectedQuery ( )
// TODO(srikanthccv): handle formula queries
if selectedQuery < "A" || selectedQuery > "Z" {
return ""
}
2024-09-11 09:56:59 +05:30
q , err := r . prepareQueryRange ( ts )
if err != nil {
return ""
}
2024-02-02 21:16:14 +05:30
// Traces list view expects time in nanoseconds
2024-08-23 21:13:00 +05:30
tr := v3 . URLShareableTimeRange {
2024-03-12 17:30:01 +05:30
Start : q . Start * time . Second . Microseconds ( ) ,
End : q . End * time . Second . Microseconds ( ) ,
2024-02-02 21:16:14 +05:30
PageSize : 100 ,
}
2024-08-23 21:13:00 +05:30
options := v3 . URLShareableOptions {
2024-02-02 21:16:14 +05:30
MaxLines : 2 ,
Format : "list" ,
SelectColumns : constants . TracesListViewDefaultSelectedColumns ,
}
period , _ := json . Marshal ( tr )
urlEncodedTimeRange := url . QueryEscape ( string ( period ) )
filterItems := r . fetchFilters ( selectedQuery , lbls )
2024-08-23 21:13:00 +05:30
urlData := v3 . URLShareableCompositeQuery {
2024-02-02 21:16:14 +05:30
QueryType : string ( v3 . QueryTypeBuilder ) ,
2024-08-23 21:13:00 +05:30
Builder : v3 . URLShareableBuilderQuery {
2024-02-02 21:16:14 +05:30
QueryData : [ ] v3 . BuilderQuery {
{
DataSource : v3 . DataSourceTraces ,
QueryName : "A" ,
AggregateOperator : v3 . AggregateOperatorNoOp ,
AggregateAttribute : v3 . AttributeKey { } ,
Filters : & v3 . FilterSet {
Items : filterItems ,
Operator : "AND" ,
} ,
Expression : "A" ,
Disabled : false ,
Having : [ ] v3 . Having { } ,
StepInterval : 60 ,
OrderBy : [ ] v3 . OrderBy {
{
ColumnName : "timestamp" ,
Order : "desc" ,
} ,
} ,
} ,
} ,
QueryFormulas : make ( [ ] string , 0 ) ,
} ,
}
data , _ := json . Marshal ( urlData )
2024-08-28 14:18:15 +05:30
compositeQuery := url . QueryEscape ( url . QueryEscape ( string ( data ) ) )
2024-02-02 21:16:14 +05:30
optionsData , _ := json . Marshal ( options )
urlEncodedOptions := url . QueryEscape ( string ( optionsData ) )
return fmt . Sprintf ( "compositeQuery=%s&timeRange=%s&startTime=%d&endTime=%d&options=%s" , compositeQuery , urlEncodedTimeRange , tr . Start , tr . End , urlEncodedOptions )
}
2023-11-10 17:43:19 +05:30
func ( r * ThresholdRule ) GetSelectedQuery ( ) string {
if r . ruleCondition != nil {
if r . ruleCondition . SelectedQuery != "" {
return r . ruleCondition . SelectedQuery
}
2024-05-24 12:11:34 +05:30
queryNames := map [ string ] struct { } { }
if r . ruleCondition . CompositeQuery != nil {
if r . ruleCondition . QueryType ( ) == v3 . QueryTypeBuilder {
for name := range r . ruleCondition . CompositeQuery . BuilderQueries {
queryNames [ name ] = struct { } { }
}
} else if r . ruleCondition . QueryType ( ) == v3 . QueryTypeClickHouseSQL {
for name := range r . ruleCondition . CompositeQuery . ClickHouseQueries {
queryNames [ name ] = struct { } { }
}
}
}
2023-11-10 17:43:19 +05:30
// The following logic exists for backward compatibility
// If there is no selected query, then
// - check if F1 is present, if yes, return F1
// - else return the query with max ascii value
// this logic is not really correct. we should be considering
// whether the query is enabled or not. but this is a temporary
// fix to support backward compatibility
2024-05-24 12:11:34 +05:30
if _ , ok := queryNames [ "F1" ] ; ok {
2023-11-10 17:43:19 +05:30
return "F1"
}
2024-05-24 12:11:34 +05:30
keys := make ( [ ] string , 0 , len ( queryNames ) )
for k := range queryNames {
2023-11-10 17:43:19 +05:30
keys = append ( keys , k )
}
sort . Strings ( keys )
return keys [ len ( keys ) - 1 ]
}
// This should never happen
return ""
}
2024-09-11 09:56:59 +05:30
func ( r * ThresholdRule ) buildAndRunQuery ( ctx context . Context , ts time . Time ) ( Vector , error ) {
2022-07-14 11:59:06 +05:30
2024-09-11 09:56:59 +05:30
params , err := r . prepareQueryRange ( ts )
if err != nil {
return nil , err
}
err = r . populateTemporality ( ctx , params )
2024-05-24 12:11:34 +05:30
if err != nil {
return nil , fmt . Errorf ( "internal error while setting temporality" )
}
2022-11-23 18:49:03 +05:30
2024-05-24 12:11:34 +05:30
if params . CompositeQuery . QueryType == v3 . QueryTypeBuilder {
// check if any enrichment is required for logs if yes then enrich them
if logsv3 . EnrichmentRequired ( params ) {
// Note: Sending empty fields key because enrichment is only needed for json
// TODO: Add support for attribute enrichment later
logsv3 . Enrich ( params , map [ string ] v3 . AttributeKey { } )
2022-11-23 18:49:03 +05:30
}
2024-05-24 12:11:34 +05:30
}
2022-11-23 18:49:03 +05:30
2024-05-24 12:11:34 +05:30
var results [ ] * v3 . Result
2024-09-11 09:56:59 +05:30
var queryErrors map [ string ] error
2022-11-23 18:49:03 +05:30
2024-05-24 12:11:34 +05:30
if r . version == "v4" {
2024-09-11 09:56:59 +05:30
results , queryErrors , err = r . querierV2 . QueryRange ( ctx , params , map [ string ] v3 . AttributeKey { } )
2024-05-24 12:11:34 +05:30
} else {
2024-09-11 09:56:59 +05:30
results , queryErrors , err = r . querier . QueryRange ( ctx , params , map [ string ] v3 . AttributeKey { } )
2024-05-24 12:11:34 +05:30
}
if err != nil {
2024-09-11 09:56:59 +05:30
zap . L ( ) . Error ( "failed to get alert query result" , zap . String ( "rule" , r . Name ( ) ) , zap . Error ( err ) , zap . Any ( "errors" , queryErrors ) )
2024-05-24 12:11:34 +05:30
return nil , fmt . Errorf ( "internal error while querying" )
}
2022-11-23 18:49:03 +05:30
2024-05-24 12:11:34 +05:30
if params . CompositeQuery . QueryType == v3 . QueryTypeBuilder {
results , err = postprocess . PostProcessResult ( results , params )
2022-11-23 18:49:03 +05:30
if err != nil {
2024-05-24 12:11:34 +05:30
zap . L ( ) . Error ( "failed to post process result" , zap . String ( "rule" , r . Name ( ) ) , zap . Error ( err ) )
return nil , fmt . Errorf ( "internal error while post processing" )
2022-11-23 18:49:03 +05:30
}
2022-07-14 11:59:06 +05:30
}
2024-05-24 12:11:34 +05:30
selectedQuery := r . GetSelectedQuery ( )
var queryResult * v3 . Result
for _ , res := range results {
if res . QueryName == selectedQuery {
queryResult = res
break
}
2022-07-14 11:59:06 +05:30
}
2024-05-24 12:11:34 +05:30
if queryResult != nil && len ( queryResult . Series ) > 0 {
r . lastTimestampWithDatapoints = time . Now ( )
}
2022-07-14 11:59:06 +05:30
2024-05-24 12:11:34 +05:30
var resultVector Vector
2022-07-14 11:59:06 +05:30
2024-05-24 12:11:34 +05:30
// if the data is missing for `For` duration then we should send alert
if r . ruleCondition . AlertOnAbsent && r . lastTimestampWithDatapoints . Add ( time . Duration ( r . Condition ( ) . AbsentFor ) * time . Minute ) . Before ( time . Now ( ) ) {
zap . L ( ) . Info ( "no data found for rule condition" , zap . String ( "ruleid" , r . ID ( ) ) )
lbls := labels . NewBuilder ( labels . Labels { } )
if ! r . lastTimestampWithDatapoints . IsZero ( ) {
lbls . Set ( "lastSeen" , r . lastTimestampWithDatapoints . Format ( constants . AlertTimeFormat ) )
}
resultVector = append ( resultVector , Sample {
Metric : lbls . Labels ( ) ,
IsMissing : true ,
} )
return resultVector , nil
2022-07-14 11:59:06 +05:30
}
2024-05-24 12:11:34 +05:30
for _ , series := range queryResult . Series {
smpl , shouldAlert := r . shouldAlert ( * series )
if shouldAlert {
resultVector = append ( resultVector , smpl )
}
}
return resultVector , nil
2022-07-14 11:59:06 +05:30
}
2023-12-28 20:22:42 +05:30
func normalizeLabelName ( name string ) string {
// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
// Regular expression to match non-alphanumeric characters except underscores
reg := regexp . MustCompile ( ` [^a-zA-Z0-9_] ` )
// Replace all non-alphanumeric characters except underscores with underscores
normalized := reg . ReplaceAllString ( name , "_" )
// If the first character is not a letter or an underscore, prepend an underscore
if len ( normalized ) > 0 && ! unicode . IsLetter ( rune ( normalized [ 0 ] ) ) && normalized [ 0 ] != '_' {
normalized = "_" + normalized
}
return normalized
}
2024-09-11 09:56:59 +05:30
func ( r * ThresholdRule ) Eval ( ctx context . Context , ts time . Time ) ( interface { } , error ) {
2022-07-14 11:59:06 +05:30
2024-08-09 12:11:05 +05:30
prevState := r . State ( )
2023-08-16 14:22:40 +05:30
valueFormatter := formatter . FromUnit ( r . Unit ( ) )
2024-09-11 09:56:59 +05:30
res , err := r . buildAndRunQuery ( ctx , ts )
2022-07-14 11:59:06 +05:30
if err != nil {
return nil , err
}
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
resultFPs := map [ uint64 ] struct { } { }
var alerts = make ( map [ uint64 ] * Alert , len ( res ) )
for _ , smpl := range res {
l := make ( map [ string ] string , len ( smpl . Metric ) )
for _ , lbl := range smpl . Metric {
l [ lbl . Name ] = lbl . Value
}
2023-08-16 14:22:40 +05:30
value := valueFormatter . Format ( smpl . V , r . Unit ( ) )
2024-07-01 18:34:02 +05:30
threshold := valueFormatter . Format ( r . targetVal ( ) , r . Unit ( ) )
2024-03-27 00:07:29 +05:30
zap . L ( ) . Debug ( "Alert template data for rule" , zap . String ( "name" , r . Name ( ) ) , zap . String ( "formatter" , valueFormatter . Name ( ) ) , zap . String ( "value" , value ) , zap . String ( "threshold" , threshold ) )
2023-08-16 14:22:40 +05:30
tmplData := AlertTemplateData ( l , value , threshold )
2022-07-14 11:59:06 +05:30
// Inject some convenience variables that are easier to remember for users
// who are not used to Go's templating system.
2023-02-10 23:53:45 +05:30
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
2022-07-14 11:59:06 +05:30
2024-02-11 22:31:46 +05:30
// utility function to apply go template on labels and annotations
2022-07-14 11:59:06 +05:30
expand := func ( text string ) string {
tmpl := NewTemplateExpander (
ctx ,
defs + text ,
"__alert_" + r . Name ( ) ,
tmplData ,
times . Time ( timestamp . FromTime ( ts ) ) ,
nil ,
)
result , err := tmpl . Expand ( )
if err != nil {
result = fmt . Sprintf ( "<error expanding template: %s>" , err )
2024-03-27 00:07:29 +05:30
zap . L ( ) . Error ( "Expanding alert template failed" , zap . Error ( err ) , zap . Any ( "data" , tmplData ) )
2022-07-14 11:59:06 +05:30
}
return result
}
2024-05-27 13:19:28 +05:30
lb := labels . NewBuilder ( smpl . Metric ) . Del ( labels . MetricNameLabel ) . Del ( labels . TemporalityLabel )
2024-08-23 21:13:00 +05:30
resultLabels := labels . NewBuilder ( smpl . MetricOrig ) . Del ( labels . MetricNameLabel ) . Del ( labels . TemporalityLabel ) . Labels ( )
2022-07-14 11:59:06 +05:30
2024-09-11 09:56:59 +05:30
for name , value := range r . labels . Map ( ) {
lb . Set ( name , expand ( value ) )
2022-07-14 11:59:06 +05:30
}
lb . Set ( labels . AlertNameLabel , r . Name ( ) )
lb . Set ( labels . AlertRuleIdLabel , r . ID ( ) )
lb . Set ( labels . RuleSourceLabel , r . GeneratorURL ( ) )
2024-09-11 09:56:59 +05:30
annotations := make ( labels . Labels , 0 , len ( r . annotations . Map ( ) ) )
for name , value := range r . annotations . Map ( ) {
annotations = append ( annotations , labels . Label { Name : normalizeLabelName ( name ) , Value : expand ( value ) } )
2024-02-20 10:42:30 +05:30
}
2024-08-09 15:31:39 +05:30
if smpl . IsMissing {
lb . Set ( labels . AlertNameLabel , "[No data] " + r . Name ( ) )
}
2024-02-20 10:42:30 +05:30
// Links with timestamps should go in annotations since labels
// is used alert grouping, and we want to group alerts with the same
// label set, but different timestamps, together.
2024-08-30 10:34:11 +05:30
if r . typ == AlertTypeTraces {
2024-02-20 10:42:30 +05:30
link := r . prepareLinksToTraces ( ts , smpl . MetricOrig )
2024-02-02 21:16:14 +05:30
if link != "" && r . hostFromSource ( ) != "" {
2024-02-20 10:42:30 +05:30
annotations = append ( annotations , labels . Label { Name : "related_traces" , Value : fmt . Sprintf ( "%s/traces-explorer?%s" , r . hostFromSource ( ) , link ) } )
2024-02-02 21:16:14 +05:30
}
2024-08-30 10:34:11 +05:30
} else if r . typ == AlertTypeLogs {
2024-02-20 10:42:30 +05:30
link := r . prepareLinksToLogs ( ts , smpl . MetricOrig )
2024-02-02 21:16:14 +05:30
if link != "" && r . hostFromSource ( ) != "" {
2024-02-20 10:42:30 +05:30
annotations = append ( annotations , labels . Label { Name : "related_logs" , Value : fmt . Sprintf ( "%s/logs/logs-explorer?%s" , r . hostFromSource ( ) , link ) } )
2024-02-02 21:16:14 +05:30
}
}
2022-07-14 11:59:06 +05:30
lbs := lb . Labels ( )
h := lbs . Hash ( )
resultFPs [ h ] = struct { } { }
if _ , ok := alerts [ h ] ; ok {
2024-03-27 00:07:29 +05:30
zap . L ( ) . Error ( "the alert query returns duplicate records" , zap . String ( "ruleid" , r . ID ( ) ) , zap . Any ( "alert" , alerts [ h ] ) )
2022-07-14 11:59:06 +05:30
err = fmt . Errorf ( "duplicate alert found, vector contains metrics with the same labelset after applying alert labels" )
return nil , err
}
alerts [ h ] = & Alert {
2024-08-23 21:13:00 +05:30
Labels : lbs ,
QueryResultLables : resultLabels ,
Annotations : annotations ,
ActiveAt : ts ,
2024-09-09 13:06:09 +05:30
State : model . StatePending ,
2024-08-23 21:13:00 +05:30
Value : smpl . V ,
GeneratorURL : r . GeneratorURL ( ) ,
Receivers : r . preferredChannels ,
Missing : smpl . IsMissing ,
2022-07-14 11:59:06 +05:30
}
}
2024-09-11 09:56:59 +05:30
zap . L ( ) . Info ( "number of alerts found" , zap . String ( "name" , r . Name ( ) ) , zap . Int ( "count" , len ( alerts ) ) )
2022-07-14 11:59:06 +05:30
// alerts[h] is ready, add or update active list now
for h , a := range alerts {
// Check whether we already have alerting state for the identifying label set.
// Update the last value and annotations if so, create a new alert entry otherwise.
2024-09-09 13:06:09 +05:30
if alert , ok := r . active [ h ] ; ok && alert . State != model . StateInactive {
2022-07-14 11:59:06 +05:30
alert . Value = a . Value
alert . Annotations = a . Annotations
2022-08-04 15:31:21 +05:30
alert . Receivers = r . preferredChannels
2022-07-14 11:59:06 +05:30
continue
}
r . active [ h ] = a
}
2024-08-09 12:11:05 +05:30
itemsToAdd := [ ] v3 . RuleStateHistory { }
2022-07-14 11:59:06 +05:30
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
for fp , a := range r . active {
2024-08-23 21:13:00 +05:30
labelsJSON , err := json . Marshal ( a . QueryResultLables )
2024-08-09 12:11:05 +05:30
if err != nil {
zap . L ( ) . Error ( "error marshaling labels" , zap . Error ( err ) , zap . Any ( "labels" , a . Labels ) )
}
2022-07-14 11:59:06 +05:30
if _ , ok := resultFPs [ fp ] ; ! ok {
// If the alert was previously firing, keep it around for a given
// retention time so it is reported as resolved to the AlertManager.
2024-09-09 13:06:09 +05:30
if a . State == model . StatePending || ( ! a . ResolvedAt . IsZero ( ) && ts . Sub ( a . ResolvedAt ) > resolvedRetention ) {
2022-07-14 11:59:06 +05:30
delete ( r . active , fp )
}
2024-09-09 13:06:09 +05:30
if a . State != model . StateInactive {
a . State = model . StateInactive
2022-07-14 11:59:06 +05:30
a . ResolvedAt = ts
2024-08-09 12:11:05 +05:30
itemsToAdd = append ( itemsToAdd , v3 . RuleStateHistory {
RuleID : r . ID ( ) ,
RuleName : r . Name ( ) ,
2024-09-09 13:06:09 +05:30
State : model . StateInactive ,
2024-08-09 12:11:05 +05:30
StateChanged : true ,
UnixMilli : ts . UnixMilli ( ) ,
Labels : v3 . LabelsString ( labelsJSON ) ,
2024-08-23 21:13:00 +05:30
Fingerprint : a . QueryResultLables . Hash ( ) ,
2024-09-09 13:06:09 +05:30
Value : a . Value ,
2024-08-09 12:11:05 +05:30
} )
2022-07-14 11:59:06 +05:30
}
continue
}
2024-09-09 13:06:09 +05:30
if a . State == model . StatePending && ts . Sub ( a . ActiveAt ) >= r . holdDuration {
a . State = model . StateFiring
2022-07-14 11:59:06 +05:30
a . FiredAt = ts
2024-09-09 13:06:09 +05:30
state := model . StateFiring
2024-08-09 12:11:05 +05:30
if a . Missing {
2024-09-09 13:06:09 +05:30
state = model . StateNoData
2024-08-09 12:11:05 +05:30
}
itemsToAdd = append ( itemsToAdd , v3 . RuleStateHistory {
RuleID : r . ID ( ) ,
RuleName : r . Name ( ) ,
State : state ,
StateChanged : true ,
UnixMilli : ts . UnixMilli ( ) ,
Labels : v3 . LabelsString ( labelsJSON ) ,
2024-08-23 21:13:00 +05:30
Fingerprint : a . QueryResultLables . Hash ( ) ,
2024-08-09 12:11:05 +05:30
Value : a . Value ,
} )
}
}
currentState := r . State ( )
2024-09-09 13:06:09 +05:30
overallStateChanged := currentState != prevState
for idx , item := range itemsToAdd {
item . OverallStateChanged = overallStateChanged
item . OverallState = currentState
itemsToAdd [ idx ] = item
2024-08-09 12:11:05 +05:30
}
2022-07-14 11:59:06 +05:30
2024-09-11 09:56:59 +05:30
r . RecordRuleStateHistory ( ctx , prevState , currentState , itemsToAdd )
2024-09-09 13:06:09 +05:30
2022-07-14 11:59:06 +05:30
r . health = HealthGood
r . lastError = err
2022-08-04 17:24:15 +05:30
return len ( r . active ) , nil
2022-07-14 11:59:06 +05:30
}
func ( r * ThresholdRule ) String ( ) string {
ar := PostableRule {
2024-03-27 20:25:18 +05:30
AlertName : r . name ,
2022-08-04 15:31:21 +05:30
RuleCondition : r . ruleCondition ,
EvalWindow : Duration ( r . evalWindow ) ,
Labels : r . labels . Map ( ) ,
Annotations : r . annotations . Map ( ) ,
PreferredChannels : r . preferredChannels ,
2022-07-14 11:59:06 +05:30
}
byt , err := yaml . Marshal ( ar )
if err != nil {
return fmt . Sprintf ( "error marshaling alerting rule: %s" , err . Error ( ) )
}
return string ( byt )
}
2024-05-24 12:11:34 +05:30
func removeGroupinSetPoints ( series v3 . Series ) [ ] v3 . Point {
var result [ ] v3 . Point
for _ , s := range series . Points {
2024-06-05 19:33:45 +05:30
if s . Timestamp >= 0 && ! math . IsNaN ( s . Value ) && ! math . IsInf ( s . Value , 0 ) {
2024-05-24 12:11:34 +05:30
result = append ( result , s )
}
}
return result
}