From ddc3cc4911fad7515b4dd3fc73a2d806eb5f74e5 Mon Sep 17 00:00:00 2001 From: Srikanth Chekuri Date: Fri, 10 Nov 2023 17:43:19 +0530 Subject: [PATCH] chore: dashboards to alerts creation support in query-service (#3924) --- pkg/query-service/rules/alerting.go | 1 + pkg/query-service/rules/manager.go | 12 +++ pkg/query-service/rules/promRule.go | 19 ++++- pkg/query-service/rules/thresholdRule.go | 94 +++++++++++++----------- 4 files changed, 81 insertions(+), 45 deletions(-) diff --git a/pkg/query-service/rules/alerting.go b/pkg/query-service/rules/alerting.go index 60cb4c938442..ad82470e8321 100644 --- a/pkg/query-service/rules/alerting.go +++ b/pkg/query-service/rules/alerting.go @@ -144,6 +144,7 @@ type RuleCondition struct { Target *float64 `yaml:"target,omitempty" json:"target,omitempty"` MatchType `json:"matchType,omitempty"` TargetUnit string `json:"targetUnit,omitempty"` + SelectedQuery string `json:"selectedQueryName,omitempty"` } func (rc *RuleCondition) IsValid() bool { diff --git a/pkg/query-service/rules/manager.go b/pkg/query-service/rules/manager.go index 30c643b031fb..a91ff881014c 100644 --- a/pkg/query-service/rules/manager.go +++ b/pkg/query-service/rules/manager.go @@ -714,6 +714,18 @@ func (m *Manager) GetRule(ctx context.Context, id string) (*GettableRule, error) return nil, err } r.Id = fmt.Sprintf("%d", s.Id) + // fetch state of rule from memory + if rm, ok := m.rules[r.Id]; !ok { + r.State = StateDisabled.String() + r.Disabled = true + } else { + r.State = rm.State().String() + } + r.CreatedAt = s.CreatedAt + r.CreatedBy = s.CreatedBy + r.UpdatedAt = s.UpdatedAt + r.UpdatedBy = s.UpdatedBy + return r, nil } diff --git a/pkg/query-service/rules/promRule.go b/pkg/query-service/rules/promRule.go index 25209b21c8b1..6d0cafa9309a 100644 --- a/pkg/query-service/rules/promRule.go +++ b/pkg/query-service/rules/promRule.go @@ -297,11 +297,28 @@ func (r *PromRule) SendAlerts(ctx context.Context, ts time.Time, resendDelay tim notifyFunc(ctx, "", alerts...) } +func (r *PromRule) GetSelectedQuery() string { + if r.ruleCondition != nil { + // If the user has explicitly set the selected query, we return that. + if r.ruleCondition.SelectedQuery != "" { + return r.ruleCondition.SelectedQuery + } + // Historically, we used to have only one query in the alerts for promql. + // So, if there is only one query, we return that. + // This is to maintain backward compatibility. + // For new rules, we will have to explicitly set the selected query. + return "A" + } + // This should never happen. + return "" +} + func (r *PromRule) getPqlQuery() (string, error) { if r.ruleCondition.CompositeQuery.QueryType == v3.QueryTypePromQL { if len(r.ruleCondition.CompositeQuery.PromQueries) > 0 { - if promQuery, ok := r.ruleCondition.CompositeQuery.PromQueries["A"]; ok { + selectedQuery := r.GetSelectedQuery() + if promQuery, ok := r.ruleCondition.CompositeQuery.PromQueries[selectedQuery]; ok { query := promQuery.Query if query == "" { return query, fmt.Errorf("a promquery needs to be set for this rule to function") diff --git a/pkg/query-service/rules/thresholdRule.go b/pkg/query-service/rules/thresholdRule.go index c4d6fdbf1208..62b8b97635ca 100644 --- a/pkg/query-service/rules/thresholdRule.go +++ b/pkg/query-service/rules/thresholdRule.go @@ -135,13 +135,6 @@ func (r *ThresholdRule) PreferredChannels() []string { return r.preferredChannels } -func (r *ThresholdRule) target() *float64 { - if r.ruleCondition == nil { - return nil - } - return r.ruleCondition.Target -} - func (r *ThresholdRule) targetVal() float64 { if r.ruleCondition == nil || r.ruleCondition.Target == nil { return 0 @@ -217,26 +210,6 @@ func (r *ThresholdRule) Annotations() labels.BaseLabels { return r.annotations } -func (r *ThresholdRule) sample(alert *Alert, ts time.Time) Sample { - lb := labels.NewBuilder(r.labels) - alertLabels := alert.Labels.(labels.Labels) - for _, l := range alertLabels { - lb.Set(l.Name, l.Value) - } - - lb.Set(labels.MetricNameLabel, alertMetricName) - lb.Set(labels.AlertNameLabel, r.name) - lb.Set(labels.AlertRuleIdLabel, r.ID()) - lb.Set(labels.AlertStateLabel, alert.State.String()) - - s := Sample{ - Metric: lb.Labels(), - Point: Point{T: timestamp.FromTime(ts), V: 1}, - } - - return s -} - // GetEvaluationDuration returns the time in seconds it took to evaluate the alerting rule. func (r *ThresholdRule) GetEvaluationDuration() time.Duration { r.mtx.Lock() @@ -682,6 +655,54 @@ func (r *ThresholdRule) prepareClickhouseQueries(ts time.Time) (map[string]strin return queries, nil } +func (r *ThresholdRule) GetSelectedQuery() string { + + // The acutal query string is not relevant here + // we just need to know the selected query + + var queries map[string]string + var err error + + if r.ruleCondition.QueryType() == v3.QueryTypeBuilder { + queries, err = r.prepareBuilderQueries(time.Now()) + if err != nil { + zap.S().Errorf("ruleid:", r.ID(), "\t msg: failed to prepare metric queries", zap.Error(err)) + return "" + } + } else if r.ruleCondition.QueryType() == v3.QueryTypeClickHouseSQL { + queries, err = r.prepareClickhouseQueries(time.Now()) + if err != nil { + zap.S().Errorf("ruleid:", r.ID(), "\t msg: failed to prepare clickhouse queries", zap.Error(err)) + return "" + } + } + + if r.ruleCondition != nil { + if r.ruleCondition.SelectedQuery != "" { + return r.ruleCondition.SelectedQuery + } + + // The following logic exists for backward compatibility + // If there is no selected query, then + // - check if F1 is present, if yes, return F1 + // - else return the query with max ascii value + // this logic is not really correct. we should be considering + // whether the query is enabled or not. but this is a temporary + // fix to support backward compatibility + if _, ok := queries["F1"]; ok { + return "F1" + } + keys := make([]string, 0, len(queries)) + for k := range queries { + keys = append(keys, k) + } + sort.Strings(keys) + return keys[len(keys)-1] + } + // This should never happen + return "" +} + // query looks if alert condition is being // satisfied and returns the signals func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, ts time.Time, ch clickhouse.Conn) (Vector, error) { @@ -691,7 +712,7 @@ func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, ts time.Time, ch c } // var to hold target query to be executed - queries := make(map[string]string) + var queries map[string]string var err error // fetch the target query based on query type @@ -723,22 +744,7 @@ func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, ts time.Time, ch c zap.S().Debugf("ruleid:", r.ID(), "\t runQueries:", queries) - // find target query label - if query, ok := queries["F1"]; ok { - // found a formula query, run with it - return r.runChQuery(ctx, ch, query) - } - - // no formula in rule condition, now look for - // query label with max ascii val - keys := make([]string, 0, len(queries)) - for k := range queries { - keys = append(keys, k) - } - sort.Strings(keys) - - queryLabel := keys[len(keys)-1] - + queryLabel := r.GetSelectedQuery() zap.S().Debugf("ruleId: ", r.ID(), "\t result query label:", queryLabel) if queryString, ok := queries[queryLabel]; ok {