2022-07-14 11:59:06 +05:30
package rules
import (
"context"
2024-08-09 12:11:05 +05:30
"encoding/json"
2022-07-14 11:59:06 +05:30
"fmt"
2024-02-11 22:31:46 +05:30
"math"
2022-10-06 20:13:30 +05:30
"sync"
"time"
2022-07-14 11:59:06 +05:30
"go.uber.org/zap"
2023-03-07 13:37:31 +05:30
plabels "github.com/prometheus/prometheus/model/labels"
2022-07-14 11:59:06 +05:30
pql "github.com/prometheus/prometheus/promql"
2023-08-16 14:22:40 +05:30
"go.signoz.io/signoz/pkg/query-service/converter"
"go.signoz.io/signoz/pkg/query-service/formatter"
2024-08-09 12:11:05 +05:30
"go.signoz.io/signoz/pkg/query-service/interfaces"
2024-09-09 13:06:09 +05:30
"go.signoz.io/signoz/pkg/query-service/model"
2023-05-09 19:16:55 +05:30
v3 "go.signoz.io/signoz/pkg/query-service/model/v3"
2022-10-06 20:13:30 +05:30
qslabels "go.signoz.io/signoz/pkg/query-service/utils/labels"
"go.signoz.io/signoz/pkg/query-service/utils/times"
"go.signoz.io/signoz/pkg/query-service/utils/timestamp"
2022-07-14 11:59:06 +05:30
yaml "gopkg.in/yaml.v2"
)
2022-08-04 17:24:15 +05:30
type PromRuleOpts struct {
// SendAlways will send alert irresepective of resendDelay
// or other params
SendAlways bool
}
2022-07-14 11:59:06 +05:30
type PromRule struct {
id string
name string
source string
ruleCondition * RuleCondition
evalWindow time . Duration
holdDuration time . Duration
labels plabels . Labels
annotations plabels . Labels
2022-08-04 15:31:21 +05:30
preferredChannels [ ] string
2022-07-14 11:59:06 +05:30
mtx sync . Mutex
evaluationDuration time . Duration
evaluationTimestamp time . Time
health RuleHealth
lastError error
// map of active alerts
active map [ uint64 ] * Alert
2024-09-04 18:09:40 +05:30
logger * zap . Logger
2022-08-04 17:24:15 +05:30
opts PromRuleOpts
2024-08-09 12:11:05 +05:30
reader interfaces . Reader
2024-09-09 13:06:09 +05:30
handledRestart bool
2022-07-14 11:59:06 +05:30
}
func NewPromRule (
id string ,
2022-08-04 15:31:21 +05:30
postableRule * PostableRule ,
2024-09-04 18:09:40 +05:30
logger * zap . Logger ,
2022-08-04 17:24:15 +05:30
opts PromRuleOpts ,
2024-08-09 12:11:05 +05:30
reader interfaces . Reader ,
2022-07-14 11:59:06 +05:30
) ( * PromRule , error ) {
2022-08-04 15:31:21 +05:30
if postableRule . RuleCondition == nil {
2022-07-14 11:59:06 +05:30
return nil , fmt . Errorf ( "no rule condition" )
2022-08-04 15:31:21 +05:30
} else if ! postableRule . RuleCondition . IsValid ( ) {
2022-07-14 11:59:06 +05:30
return nil , fmt . Errorf ( "invalid rule condition" )
}
2022-08-04 15:31:21 +05:30
p := PromRule {
id : id ,
2024-03-27 20:25:18 +05:30
name : postableRule . AlertName ,
2022-08-04 15:31:21 +05:30
source : postableRule . Source ,
ruleCondition : postableRule . RuleCondition ,
evalWindow : time . Duration ( postableRule . EvalWindow ) ,
labels : plabels . FromMap ( postableRule . Labels ) ,
annotations : plabels . FromMap ( postableRule . Annotations ) ,
preferredChannels : postableRule . PreferredChannels ,
health : HealthUnknown ,
active : map [ uint64 ] * Alert { } ,
logger : logger ,
2022-08-04 17:24:15 +05:30
opts : opts ,
2022-08-04 15:31:21 +05:30
}
2024-08-09 12:11:05 +05:30
p . reader = reader
2022-08-04 15:31:21 +05:30
if int64 ( p . evalWindow ) == 0 {
p . evalWindow = 5 * time . Minute
}
2022-08-04 17:24:15 +05:30
query , err := p . getPqlQuery ( )
if err != nil {
// can not generate a valid prom QL query
return nil , err
}
2022-08-04 15:31:21 +05:30
2024-03-27 00:07:29 +05:30
zap . L ( ) . Info ( "creating new alerting rule" , zap . String ( "name" , p . name ) , zap . String ( "condition" , p . ruleCondition . String ( ) ) , zap . String ( "query" , query ) )
2022-08-04 15:31:21 +05:30
return & p , nil
2022-07-14 11:59:06 +05:30
}
func ( r * PromRule ) Name ( ) string {
return r . name
}
func ( r * PromRule ) ID ( ) string {
return r . id
}
func ( r * PromRule ) Condition ( ) * RuleCondition {
return r . ruleCondition
}
2024-07-01 18:34:02 +05:30
// targetVal returns the target value for the rule condition
// when the y-axis and target units are non-empty, it
// converts the target value to the y-axis unit
2023-02-10 23:53:45 +05:30
func ( r * PromRule ) targetVal ( ) float64 {
if r . ruleCondition == nil || r . ruleCondition . Target == nil {
return 0
}
2024-07-01 18:34:02 +05:30
// get the converter for the target unit
2024-02-11 22:31:46 +05:30
unitConverter := converter . FromUnit ( converter . Unit ( r . ruleCondition . TargetUnit ) )
2024-07-01 18:34:02 +05:30
// convert the target value to the y-axis unit
value := unitConverter . Convert ( converter . Value {
F : * r . ruleCondition . Target ,
U : converter . Unit ( r . ruleCondition . TargetUnit ) ,
} , converter . Unit ( r . Unit ( ) ) )
2024-02-11 22:31:46 +05:30
return value . F
2023-02-10 23:53:45 +05:30
}
2022-07-14 11:59:06 +05:30
func ( r * PromRule ) Type ( ) RuleType {
return RuleTypeProm
}
func ( r * PromRule ) GeneratorURL ( ) string {
2022-08-03 15:08:14 +05:30
return prepareRuleGeneratorURL ( r . ID ( ) , r . source )
2022-07-14 11:59:06 +05:30
}
2022-08-04 15:31:21 +05:30
func ( r * PromRule ) PreferredChannels ( ) [ ] string {
return r . preferredChannels
}
2022-07-14 11:59:06 +05:30
func ( r * PromRule ) SetLastError ( err error ) {
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
r . lastError = err
}
func ( r * PromRule ) LastError ( ) error {
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
return r . lastError
}
func ( r * PromRule ) SetHealth ( health RuleHealth ) {
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
r . health = health
}
func ( r * PromRule ) Health ( ) RuleHealth {
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
return r . health
}
// SetEvaluationDuration updates evaluationDuration to the duration it took to evaluate the rule on its last evaluation.
func ( r * PromRule ) SetEvaluationDuration ( dur time . Duration ) {
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
r . evaluationDuration = dur
}
func ( r * PromRule ) HoldDuration ( ) time . Duration {
return r . holdDuration
}
func ( r * PromRule ) EvalWindow ( ) time . Duration {
return r . evalWindow
}
// Labels returns the labels of the alerting rule.
func ( r * PromRule ) Labels ( ) qslabels . BaseLabels {
return r . labels
}
// Annotations returns the annotations of the alerting rule.
func ( r * PromRule ) Annotations ( ) qslabels . BaseLabels {
return r . annotations
}
// GetEvaluationDuration returns the time in seconds it took to evaluate the alerting rule.
func ( r * PromRule ) GetEvaluationDuration ( ) time . Duration {
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
return r . evaluationDuration
}
// SetEvaluationTimestamp updates evaluationTimestamp to the timestamp of when the rule was last evaluated.
func ( r * PromRule ) SetEvaluationTimestamp ( ts time . Time ) {
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
r . evaluationTimestamp = ts
}
// GetEvaluationTimestamp returns the time the evaluation took place.
func ( r * PromRule ) GetEvaluationTimestamp ( ) time . Time {
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
return r . evaluationTimestamp
}
// State returns the maximum state of alert instances for this rule.
// StateFiring > StatePending > StateInactive
2024-09-09 13:06:09 +05:30
func ( r * PromRule ) State ( ) model . AlertState {
2022-07-14 11:59:06 +05:30
2024-09-09 13:06:09 +05:30
maxState := model . StateInactive
2022-07-14 11:59:06 +05:30
for _ , a := range r . active {
if a . State > maxState {
maxState = a . State
}
}
return maxState
}
func ( r * PromRule ) currentAlerts ( ) [ ] * Alert {
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
alerts := make ( [ ] * Alert , 0 , len ( r . active ) )
for _ , a := range r . active {
anew := * a
alerts = append ( alerts , & anew )
}
return alerts
}
func ( r * PromRule ) ActiveAlerts ( ) [ ] * Alert {
var res [ ] * Alert
for _ , a := range r . currentAlerts ( ) {
if a . ResolvedAt . IsZero ( ) {
res = append ( res , a )
}
}
return res
}
2023-08-16 14:22:40 +05:30
func ( r * PromRule ) Unit ( ) string {
if r . ruleCondition != nil && r . ruleCondition . CompositeQuery != nil {
return r . ruleCondition . CompositeQuery . Unit
}
return ""
}
2022-07-14 11:59:06 +05:30
// ForEachActiveAlert runs the given function on each alert.
// This should be used when you want to use the actual alerts from the ThresholdRule
// and not on its copy.
// If you want to run on a copy of alerts then don't use this, get the alerts from 'ActiveAlerts()'.
func ( r * PromRule ) ForEachActiveAlert ( f func ( * Alert ) ) {
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
for _ , a := range r . active {
f ( a )
}
}
func ( r * PromRule ) SendAlerts ( ctx context . Context , ts time . Time , resendDelay time . Duration , interval time . Duration , notifyFunc NotifyFunc ) {
alerts := [ ] * Alert { }
r . ForEachActiveAlert ( func ( alert * Alert ) {
2022-08-04 17:24:15 +05:30
if r . opts . SendAlways || alert . needsSending ( ts , resendDelay ) {
2022-07-14 11:59:06 +05:30
alert . LastSentAt = ts
// Allow for two Eval or Alertmanager send failures.
delta := resendDelay
if interval > resendDelay {
delta = interval
}
alert . ValidUntil = ts . Add ( 4 * delta )
anew := * alert
alerts = append ( alerts , & anew )
}
} )
notifyFunc ( ctx , "" , alerts ... )
}
2023-11-10 17:43:19 +05:30
func ( r * PromRule ) GetSelectedQuery ( ) string {
if r . ruleCondition != nil {
// If the user has explicitly set the selected query, we return that.
if r . ruleCondition . SelectedQuery != "" {
return r . ruleCondition . SelectedQuery
}
// Historically, we used to have only one query in the alerts for promql.
// So, if there is only one query, we return that.
// This is to maintain backward compatibility.
// For new rules, we will have to explicitly set the selected query.
return "A"
}
// This should never happen.
return ""
}
2022-07-14 11:59:06 +05:30
func ( r * PromRule ) getPqlQuery ( ) ( string , error ) {
2023-05-09 19:16:55 +05:30
if r . ruleCondition . CompositeQuery . QueryType == v3 . QueryTypePromQL {
if len ( r . ruleCondition . CompositeQuery . PromQueries ) > 0 {
2023-11-10 17:43:19 +05:30
selectedQuery := r . GetSelectedQuery ( )
if promQuery , ok := r . ruleCondition . CompositeQuery . PromQueries [ selectedQuery ] ; ok {
2022-07-14 11:59:06 +05:30
query := promQuery . Query
if query == "" {
return query , fmt . Errorf ( "a promquery needs to be set for this rule to function" )
}
2024-02-11 22:31:46 +05:30
return query , nil
2022-07-14 11:59:06 +05:30
}
}
}
return "" , fmt . Errorf ( "invalid promql rule query" )
}
2024-02-11 22:31:46 +05:30
func ( r * PromRule ) matchType ( ) MatchType {
if r . ruleCondition == nil {
return AtleastOnce
}
return r . ruleCondition . MatchType
}
func ( r * PromRule ) compareOp ( ) CompareOp {
if r . ruleCondition == nil {
return ValueIsEq
}
return r . ruleCondition . CompareOp
}
2024-09-09 13:06:09 +05:30
// TODO(srikanthccv): implement base rule and use for all types of rules
func ( r * PromRule ) recordRuleStateHistory ( ctx context . Context , prevState , currentState model . AlertState , itemsToAdd [ ] v3 . RuleStateHistory ) error {
zap . L ( ) . Debug ( "recording rule state history" , zap . String ( "ruleid" , r . ID ( ) ) , zap . Any ( "prevState" , prevState ) , zap . Any ( "currentState" , currentState ) , zap . Any ( "itemsToAdd" , itemsToAdd ) )
revisedItemsToAdd := map [ uint64 ] v3 . RuleStateHistory { }
lastSavedState , err := r . reader . GetLastSavedRuleStateHistory ( ctx , r . ID ( ) )
if err != nil {
return err
}
// if the query-service has been restarted, or the rule has been modified (which re-initializes the rule),
// the state would reset so we need to add the corresponding state changes to previously saved states
if ! r . handledRestart && len ( lastSavedState ) > 0 {
zap . L ( ) . Debug ( "handling restart" , zap . String ( "ruleid" , r . ID ( ) ) , zap . Any ( "lastSavedState" , lastSavedState ) )
l := map [ uint64 ] v3 . RuleStateHistory { }
for _ , item := range itemsToAdd {
l [ item . Fingerprint ] = item
}
shouldSkip := map [ uint64 ] bool { }
for _ , item := range lastSavedState {
// for the last saved item with fingerprint, check if there is a corresponding entry in the current state
currentState , ok := l [ item . Fingerprint ]
if ! ok {
// there was a state change in the past, but not in the current state
// if the state was firing, then we should add a resolved state change
if item . State == model . StateFiring || item . State == model . StateNoData {
item . State = model . StateInactive
item . StateChanged = true
item . UnixMilli = time . Now ( ) . UnixMilli ( )
revisedItemsToAdd [ item . Fingerprint ] = item
}
// there is nothing to do if the prev state was normal
} else {
if item . State != currentState . State {
item . State = currentState . State
item . StateChanged = true
item . UnixMilli = time . Now ( ) . UnixMilli ( )
revisedItemsToAdd [ item . Fingerprint ] = item
}
}
// do not add this item to revisedItemsToAdd as it is already processed
shouldSkip [ item . Fingerprint ] = true
}
zap . L ( ) . Debug ( "after lastSavedState loop" , zap . String ( "ruleid" , r . ID ( ) ) , zap . Any ( "revisedItemsToAdd" , revisedItemsToAdd ) )
// if there are any new state changes that were not saved, add them to the revised items
for _ , item := range itemsToAdd {
if _ , ok := revisedItemsToAdd [ item . Fingerprint ] ; ! ok && ! shouldSkip [ item . Fingerprint ] {
revisedItemsToAdd [ item . Fingerprint ] = item
}
}
zap . L ( ) . Debug ( "after itemsToAdd loop" , zap . String ( "ruleid" , r . ID ( ) ) , zap . Any ( "revisedItemsToAdd" , revisedItemsToAdd ) )
newState := model . StateInactive
for _ , item := range revisedItemsToAdd {
if item . State == model . StateFiring || item . State == model . StateNoData {
newState = model . StateFiring
break
}
}
zap . L ( ) . Debug ( "newState" , zap . String ( "ruleid" , r . ID ( ) ) , zap . Any ( "newState" , newState ) )
// if there is a change in the overall state, update the overall state
if lastSavedState [ 0 ] . OverallState != newState {
for fingerprint , item := range revisedItemsToAdd {
item . OverallState = newState
item . OverallStateChanged = true
revisedItemsToAdd [ fingerprint ] = item
}
}
zap . L ( ) . Debug ( "revisedItemsToAdd after newState" , zap . String ( "ruleid" , r . ID ( ) ) , zap . Any ( "revisedItemsToAdd" , revisedItemsToAdd ) )
} else {
for _ , item := range itemsToAdd {
revisedItemsToAdd [ item . Fingerprint ] = item
}
}
if len ( revisedItemsToAdd ) > 0 && r . reader != nil {
zap . L ( ) . Debug ( "writing rule state history" , zap . String ( "ruleid" , r . ID ( ) ) , zap . Any ( "revisedItemsToAdd" , revisedItemsToAdd ) )
entries := make ( [ ] v3 . RuleStateHistory , 0 , len ( revisedItemsToAdd ) )
for _ , item := range revisedItemsToAdd {
entries = append ( entries , item )
}
err := r . reader . AddRuleStateHistory ( ctx , entries )
if err != nil {
zap . L ( ) . Error ( "error while inserting rule state history" , zap . Error ( err ) , zap . Any ( "itemsToAdd" , itemsToAdd ) )
}
}
r . handledRestart = true
return nil
}
2022-07-14 11:59:06 +05:30
func ( r * PromRule ) Eval ( ctx context . Context , ts time . Time , queriers * Queriers ) ( interface { } , error ) {
2024-08-09 12:11:05 +05:30
prevState := r . State ( )
2024-02-11 22:31:46 +05:30
start := ts . Add ( - r . evalWindow )
end := ts
interval := 60 * time . Second // TODO(srikanthccv): this should be configurable
2023-08-16 14:22:40 +05:30
valueFormatter := formatter . FromUnit ( r . Unit ( ) )
2022-07-14 11:59:06 +05:30
q , err := r . getPqlQuery ( )
if err != nil {
return nil , err
}
2024-03-27 00:07:29 +05:30
zap . L ( ) . Info ( "evaluating promql query" , zap . String ( "name" , r . Name ( ) ) , zap . String ( "query" , q ) )
2024-02-11 22:31:46 +05:30
res , err := queriers . PqlEngine . RunAlertQuery ( ctx , q , start , end , interval )
2022-07-14 11:59:06 +05:30
if err != nil {
r . SetHealth ( HealthBad )
r . SetLastError ( err )
return nil , err
}
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
resultFPs := map [ uint64 ] struct { } { }
2022-08-04 17:24:15 +05:30
2022-07-14 11:59:06 +05:30
var alerts = make ( map [ uint64 ] * Alert , len ( res ) )
2024-02-11 22:31:46 +05:30
for _ , series := range res {
l := make ( map [ string ] string , len ( series . Metric ) )
for _ , lbl := range series . Metric {
2022-07-14 11:59:06 +05:30
l [ lbl . Name ] = lbl . Value
}
2024-02-11 22:31:46 +05:30
if len ( series . Floats ) == 0 {
continue
}
alertSmpl , shouldAlert := r . shouldAlert ( series )
if ! shouldAlert {
continue
}
2024-03-27 00:07:29 +05:30
zap . L ( ) . Debug ( "alerting for series" , zap . String ( "name" , r . Name ( ) ) , zap . Any ( "series" , series ) )
2024-02-11 22:31:46 +05:30
2024-07-01 18:34:02 +05:30
threshold := valueFormatter . Format ( r . targetVal ( ) , r . Unit ( ) )
2023-12-11 16:09:28 +05:30
2024-02-11 22:31:46 +05:30
tmplData := AlertTemplateData ( l , valueFormatter . Format ( alertSmpl . F , r . Unit ( ) ) , threshold )
2022-07-14 11:59:06 +05:30
// Inject some convenience variables that are easier to remember for users
// who are not used to Go's templating system.
2023-02-10 23:53:45 +05:30
defs := "{{$labels := .Labels}}{{$value := .Value}}{{$threshold := .Threshold}}"
2022-07-14 11:59:06 +05:30
expand := func ( text string ) string {
tmpl := NewTemplateExpander (
ctx ,
defs + text ,
"__alert_" + r . Name ( ) ,
tmplData ,
times . Time ( timestamp . FromTime ( ts ) ) ,
nil ,
)
result , err := tmpl . Expand ( )
if err != nil {
result = fmt . Sprintf ( "<error expanding template: %s>" , err )
2024-09-04 18:09:40 +05:30
r . logger . Warn ( "Expanding alert template failed" , zap . Error ( err ) , zap . Any ( "data" , tmplData ) )
2022-07-14 11:59:06 +05:30
}
return result
}
2024-02-11 22:31:46 +05:30
lb := plabels . NewBuilder ( alertSmpl . Metric ) . Del ( plabels . MetricName )
2024-08-23 21:13:00 +05:30
resultLabels := plabels . NewBuilder ( alertSmpl . Metric ) . Del ( plabels . MetricName ) . Labels ( )
2022-07-14 11:59:06 +05:30
for _ , l := range r . labels {
lb . Set ( l . Name , expand ( l . Value ) )
}
2022-08-04 17:24:15 +05:30
2022-07-14 11:59:06 +05:30
lb . Set ( qslabels . AlertNameLabel , r . Name ( ) )
lb . Set ( qslabels . AlertRuleIdLabel , r . ID ( ) )
lb . Set ( qslabels . RuleSourceLabel , r . GeneratorURL ( ) )
annotations := make ( plabels . Labels , 0 , len ( r . annotations ) )
for _ , a := range r . annotations {
annotations = append ( annotations , plabels . Label { Name : a . Name , Value : expand ( a . Value ) } )
}
2023-09-05 18:17:32 +05:30
lbs := lb . Labels ( )
2022-07-14 11:59:06 +05:30
h := lbs . Hash ( )
resultFPs [ h ] = struct { } { }
if _ , ok := alerts [ h ] ; ok {
err = fmt . Errorf ( "vector contains metrics with the same labelset after applying alert labels" )
// We have already acquired the lock above hence using SetHealth and
// SetLastError will deadlock.
r . health = HealthBad
r . lastError = err
return nil , err
}
alerts [ h ] = & Alert {
2024-08-23 21:13:00 +05:30
Labels : lbs ,
QueryResultLables : resultLabels ,
Annotations : annotations ,
ActiveAt : ts ,
2024-09-09 13:06:09 +05:30
State : model . StatePending ,
2024-08-23 21:13:00 +05:30
Value : alertSmpl . F ,
GeneratorURL : r . GeneratorURL ( ) ,
Receivers : r . preferredChannels ,
2022-07-14 11:59:06 +05:30
}
}
2024-03-27 00:07:29 +05:30
zap . L ( ) . Debug ( "found alerts for rule" , zap . Int ( "count" , len ( alerts ) ) , zap . String ( "name" , r . Name ( ) ) )
2022-07-14 11:59:06 +05:30
// alerts[h] is ready, add or update active list now
for h , a := range alerts {
// Check whether we already have alerting state for the identifying label set.
// Update the last value and annotations if so, create a new alert entry otherwise.
2024-09-09 13:06:09 +05:30
if alert , ok := r . active [ h ] ; ok && alert . State != model . StateInactive {
2022-07-14 11:59:06 +05:30
alert . Value = a . Value
alert . Annotations = a . Annotations
2022-08-04 15:31:21 +05:30
alert . Receivers = r . preferredChannels
2022-07-14 11:59:06 +05:30
continue
}
r . active [ h ] = a
}
2024-08-09 12:11:05 +05:30
itemsToAdd := [ ] v3 . RuleStateHistory { }
2022-07-14 11:59:06 +05:30
// Check if any pending alerts should be removed or fire now. Write out alert timeseries.
for fp , a := range r . active {
2024-09-09 13:06:09 +05:30
labelsJSON , err := json . Marshal ( a . QueryResultLables )
2024-08-09 12:11:05 +05:30
if err != nil {
zap . L ( ) . Error ( "error marshaling labels" , zap . Error ( err ) , zap . String ( "name" , r . Name ( ) ) )
}
2022-07-14 11:59:06 +05:30
if _ , ok := resultFPs [ fp ] ; ! ok {
// If the alert was previously firing, keep it around for a given
// retention time so it is reported as resolved to the AlertManager.
2024-09-09 13:06:09 +05:30
if a . State == model . StatePending || ( ! a . ResolvedAt . IsZero ( ) && ts . Sub ( a . ResolvedAt ) > resolvedRetention ) {
2022-07-14 11:59:06 +05:30
delete ( r . active , fp )
}
2024-09-09 13:06:09 +05:30
if a . State != model . StateInactive {
a . State = model . StateInactive
2022-07-14 11:59:06 +05:30
a . ResolvedAt = ts
2024-08-09 12:11:05 +05:30
itemsToAdd = append ( itemsToAdd , v3 . RuleStateHistory {
RuleID : r . ID ( ) ,
RuleName : r . Name ( ) ,
2024-09-09 13:06:09 +05:30
State : model . StateInactive ,
2024-08-09 12:11:05 +05:30
StateChanged : true ,
UnixMilli : ts . UnixMilli ( ) ,
Labels : v3 . LabelsString ( labelsJSON ) ,
2024-08-23 21:13:00 +05:30
Fingerprint : a . QueryResultLables . Hash ( ) ,
2024-08-09 12:11:05 +05:30
} )
2022-07-14 11:59:06 +05:30
}
continue
}
2024-09-09 13:06:09 +05:30
if a . State == model . StatePending && ts . Sub ( a . ActiveAt ) >= r . holdDuration {
a . State = model . StateFiring
2022-07-14 11:59:06 +05:30
a . FiredAt = ts
2024-09-09 13:06:09 +05:30
state := model . StateFiring
2024-08-09 12:11:05 +05:30
if a . Missing {
2024-09-09 13:06:09 +05:30
state = model . StateNoData
2024-08-09 12:11:05 +05:30
}
itemsToAdd = append ( itemsToAdd , v3 . RuleStateHistory {
RuleID : r . ID ( ) ,
RuleName : r . Name ( ) ,
State : state ,
StateChanged : true ,
UnixMilli : ts . UnixMilli ( ) ,
Labels : v3 . LabelsString ( labelsJSON ) ,
2024-08-23 21:13:00 +05:30
Fingerprint : a . QueryResultLables . Hash ( ) ,
2024-08-09 12:11:05 +05:30
Value : a . Value ,
} )
2022-07-14 11:59:06 +05:30
}
}
r . health = HealthGood
r . lastError = err
2024-08-09 12:11:05 +05:30
currentState := r . State ( )
2024-09-09 13:06:09 +05:30
overallStateChanged := currentState != prevState
for idx , item := range itemsToAdd {
item . OverallStateChanged = overallStateChanged
item . OverallState = currentState
itemsToAdd [ idx ] = item
2024-08-09 12:11:05 +05:30
}
2024-09-09 13:06:09 +05:30
r . recordRuleStateHistory ( ctx , prevState , currentState , itemsToAdd )
2024-08-09 12:11:05 +05:30
2022-08-04 17:24:15 +05:30
return len ( r . active ) , nil
2022-07-14 11:59:06 +05:30
}
2024-02-11 22:31:46 +05:30
func ( r * PromRule ) shouldAlert ( series pql . Series ) ( pql . Sample , bool ) {
var alertSmpl pql . Sample
var shouldAlert bool
switch r . matchType ( ) {
case AtleastOnce :
// If any sample matches the condition, the rule is firing.
if r . compareOp ( ) == ValueIsAbove {
for _ , smpl := range series . Floats {
if smpl . F > r . targetVal ( ) {
alertSmpl = pql . Sample { F : smpl . F , T : smpl . T , Metric : series . Metric }
shouldAlert = true
break
}
}
} else if r . compareOp ( ) == ValueIsBelow {
for _ , smpl := range series . Floats {
if smpl . F < r . targetVal ( ) {
alertSmpl = pql . Sample { F : smpl . F , T : smpl . T , Metric : series . Metric }
shouldAlert = true
break
}
}
} else if r . compareOp ( ) == ValueIsEq {
for _ , smpl := range series . Floats {
if smpl . F == r . targetVal ( ) {
alertSmpl = pql . Sample { F : smpl . F , T : smpl . T , Metric : series . Metric }
shouldAlert = true
break
}
}
} else if r . compareOp ( ) == ValueIsNotEq {
for _ , smpl := range series . Floats {
if smpl . F != r . targetVal ( ) {
alertSmpl = pql . Sample { F : smpl . F , T : smpl . T , Metric : series . Metric }
shouldAlert = true
break
}
}
}
case AllTheTimes :
// If all samples match the condition, the rule is firing.
shouldAlert = true
alertSmpl = pql . Sample { F : r . targetVal ( ) , Metric : series . Metric }
if r . compareOp ( ) == ValueIsAbove {
for _ , smpl := range series . Floats {
if smpl . F <= r . targetVal ( ) {
shouldAlert = false
break
}
}
2024-09-04 18:30:04 +05:30
// use min value from the series
if shouldAlert {
var minValue float64 = math . Inf ( 1 )
for _ , smpl := range series . Floats {
if smpl . F < minValue {
minValue = smpl . F
}
}
alertSmpl = pql . Sample { F : minValue , Metric : series . Metric }
}
2024-02-11 22:31:46 +05:30
} else if r . compareOp ( ) == ValueIsBelow {
for _ , smpl := range series . Floats {
if smpl . F >= r . targetVal ( ) {
shouldAlert = false
break
}
}
2024-09-04 18:30:04 +05:30
if shouldAlert {
var maxValue float64 = math . Inf ( - 1 )
for _ , smpl := range series . Floats {
if smpl . F > maxValue {
maxValue = smpl . F
}
}
alertSmpl = pql . Sample { F : maxValue , Metric : series . Metric }
}
2024-02-11 22:31:46 +05:30
} else if r . compareOp ( ) == ValueIsEq {
for _ , smpl := range series . Floats {
if smpl . F != r . targetVal ( ) {
shouldAlert = false
break
}
}
} else if r . compareOp ( ) == ValueIsNotEq {
for _ , smpl := range series . Floats {
if smpl . F == r . targetVal ( ) {
shouldAlert = false
break
}
}
2024-09-04 18:30:04 +05:30
if shouldAlert {
for _ , smpl := range series . Floats {
if ! math . IsInf ( smpl . F , 0 ) && ! math . IsNaN ( smpl . F ) {
alertSmpl = pql . Sample { F : smpl . F , Metric : series . Metric }
break
}
}
}
2024-02-11 22:31:46 +05:30
}
case OnAverage :
// If the average of all samples matches the condition, the rule is firing.
var sum float64
for _ , smpl := range series . Floats {
if math . IsNaN ( smpl . F ) {
continue
}
sum += smpl . F
}
avg := sum / float64 ( len ( series . Floats ) )
alertSmpl = pql . Sample { F : avg , Metric : series . Metric }
if r . compareOp ( ) == ValueIsAbove {
if avg > r . targetVal ( ) {
shouldAlert = true
}
} else if r . compareOp ( ) == ValueIsBelow {
if avg < r . targetVal ( ) {
shouldAlert = true
}
} else if r . compareOp ( ) == ValueIsEq {
if avg == r . targetVal ( ) {
shouldAlert = true
}
} else if r . compareOp ( ) == ValueIsNotEq {
if avg != r . targetVal ( ) {
shouldAlert = true
}
}
case InTotal :
// If the sum of all samples matches the condition, the rule is firing.
var sum float64
for _ , smpl := range series . Floats {
if math . IsNaN ( smpl . F ) {
continue
}
sum += smpl . F
}
alertSmpl = pql . Sample { F : sum , Metric : series . Metric }
if r . compareOp ( ) == ValueIsAbove {
if sum > r . targetVal ( ) {
shouldAlert = true
}
} else if r . compareOp ( ) == ValueIsBelow {
if sum < r . targetVal ( ) {
shouldAlert = true
}
} else if r . compareOp ( ) == ValueIsEq {
if sum == r . targetVal ( ) {
shouldAlert = true
}
} else if r . compareOp ( ) == ValueIsNotEq {
if sum != r . targetVal ( ) {
shouldAlert = true
}
}
}
return alertSmpl , shouldAlert
}
2022-07-14 11:59:06 +05:30
func ( r * PromRule ) String ( ) string {
ar := PostableRule {
2024-03-27 20:25:18 +05:30
AlertName : r . name ,
2022-08-04 15:31:21 +05:30
RuleCondition : r . ruleCondition ,
EvalWindow : Duration ( r . evalWindow ) ,
Labels : r . labels . Map ( ) ,
Annotations : r . annotations . Map ( ) ,
PreferredChannels : r . preferredChannels ,
2022-07-14 11:59:06 +05:30
}
byt , err := yaml . Marshal ( ar )
if err != nil {
return fmt . Sprintf ( "error marshaling alerting rule: %s" , err . Error ( ) )
}
return string ( byt )
}