From 87ce1976317fc8ab69aaf6a0699a32efd04c2dfe Mon Sep 17 00:00:00 2001 From: Nityananda Gohain Date: Sat, 30 Aug 2025 23:16:56 +0530 Subject: [PATCH] fix: don't skip resource filter in main table for OR queries (#8958) * fix: don't skip resource filter in main table for OR queries * fix: dont skip resource table * fix: make check case insensitive * fix: iterate over token stream --- pkg/querybuilder/where_clause_visitor.go | 13 +++++++++-- pkg/telemetrylogs/stmt_builder_test.go | 29 ++++++++++++++++++++++++ pkg/telemetrytraces/stmt_builder_test.go | 4 ++-- 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/pkg/querybuilder/where_clause_visitor.go b/pkg/querybuilder/where_clause_visitor.go index 31c896ffcd15..ba4ad3e36824 100644 --- a/pkg/querybuilder/where_clause_visitor.go +++ b/pkg/querybuilder/where_clause_visitor.go @@ -95,8 +95,6 @@ func PrepareWhereClause(query string, opts FilterExprVisitorOpts) (*PreparedWher opts.Builder = sb } - visitor := newFilterExpressionVisitor(opts) - // Set up error handling lexerErrorListener := NewErrorListener() lexer.RemoveErrorListeners() @@ -111,6 +109,17 @@ func PrepareWhereClause(query string, opts FilterExprVisitorOpts) (*PreparedWher // Parse the query tree := parser.Query() + // override skipResourceFilter if the expression contains OR + for _, tok := range tokens.GetAllTokens() { + if tok.GetTokenType() == grammar.FilterQueryParserOR { + opts.SkipResourceFilter = false + break + } + } + tokens.Reset() + + visitor := newFilterExpressionVisitor(opts) + // Handle syntax errors if len(parserErrorListener.SyntaxErrors) > 0 { combinedErrors := errors.Newf( diff --git a/pkg/telemetrylogs/stmt_builder_test.go b/pkg/telemetrylogs/stmt_builder_test.go index f1b865e4e1ae..ed23fafc183c 100644 --- a/pkg/telemetrylogs/stmt_builder_test.go +++ b/pkg/telemetrylogs/stmt_builder_test.go @@ -74,6 +74,35 @@ func TestStatementBuilderTimeSeries(t *testing.T) { }, expectedErr: nil, }, + { + name: "Time series with OR b/w resource attr and attribute filter", + requestType: qbtypes.RequestTypeTimeSeries, + query: qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]{ + Signal: telemetrytypes.SignalTraces, + StepInterval: qbtypes.Step{Duration: 30 * time.Second}, + Aggregations: []qbtypes.LogAggregation{ + { + Expression: "count()", + }, + }, + Filter: &qbtypes.Filter{ + Expression: "service.name = 'redis-manual' OR http.method = 'GET'", + }, + Limit: 10, + GroupBy: []qbtypes.GroupByKey{ + { + TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{ + Name: "service.name", + }, + }, + }, + }, + expected: qbtypes.Statement{ + Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE ((simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) OR true) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), __limit_cte AS (SELECT toString(multiIf(mapContains(resources_string, 'service.name') = ?, resources_string['service.name'], NULL)) AS `service.name`, count() AS __result_0 FROM signoz_logs.distributed_logs_v2 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND ((resources_string['service.name'] = ? AND mapContains(resources_string, 'service.name') = ?) OR (attributes_string['http.method'] = ? AND mapContains(attributes_string, 'http.method') = ?)) AND timestamp >= ? AND ts_bucket_start >= ? AND timestamp < ? AND ts_bucket_start <= ? GROUP BY `service.name` ORDER BY __result_0 DESC LIMIT ?) SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 30 SECOND) AS ts, toString(multiIf(mapContains(resources_string, 'service.name') = ?, resources_string['service.name'], NULL)) AS `service.name`, count() AS __result_0 FROM signoz_logs.distributed_logs_v2 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND ((resources_string['service.name'] = ? AND mapContains(resources_string, 'service.name') = ?) OR (attributes_string['http.method'] = ? AND mapContains(attributes_string, 'http.method') = ?)) AND timestamp >= ? AND ts_bucket_start >= ? AND timestamp < ? AND ts_bucket_start <= ? AND (`service.name`) GLOBAL IN (SELECT `service.name` FROM __limit_cte) GROUP BY ts, `service.name`", + Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), true, "redis-manual", true, "GET", true, "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10, true, "redis-manual", true, "GET", true, "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448)}, + }, + expectedErr: nil, + }, { name: "Time series with limit + custom order by", requestType: qbtypes.RequestTypeTimeSeries, diff --git a/pkg/telemetrytraces/stmt_builder_test.go b/pkg/telemetrytraces/stmt_builder_test.go index 9585a373581b..ea1a8e43eab9 100644 --- a/pkg/telemetrytraces/stmt_builder_test.go +++ b/pkg/telemetrytraces/stmt_builder_test.go @@ -89,8 +89,8 @@ func TestStatementBuilder(t *testing.T) { }, }, expected: qbtypes.Statement{ - Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE ((simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) OR true) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), __limit_cte AS (SELECT toString(multiIf(mapContains(resources_string, 'service.name') = ?, resources_string['service.name'], NULL)) AS `service.name`, count() AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND ((attributes_string['http.request.method'] = ? AND mapContains(attributes_string, 'http.request.method') = ?)) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? GROUP BY `service.name` ORDER BY __result_0 DESC LIMIT ?) SELECT toStartOfInterval(timestamp, INTERVAL 30 SECOND) AS ts, toString(multiIf(mapContains(resources_string, 'service.name') = ?, resources_string['service.name'], NULL)) AS `service.name`, count() AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND ((attributes_string['http.request.method'] = ? AND mapContains(attributes_string, 'http.request.method') = ?)) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? AND (`service.name`) GLOBAL IN (SELECT `service.name` FROM __limit_cte) GROUP BY ts, `service.name`", - Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), true, "GET", true, "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10, true, "GET", true, "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448)}, + Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE ((simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) OR true) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?), __limit_cte AS (SELECT toString(multiIf(mapContains(resources_string, 'service.name') = ?, resources_string['service.name'], NULL)) AS `service.name`, count() AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND ((resources_string['service.name'] = ? AND mapContains(resources_string, 'service.name') = ?) OR (attributes_string['http.request.method'] = ? AND mapContains(attributes_string, 'http.request.method') = ?)) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? GROUP BY `service.name` ORDER BY __result_0 DESC LIMIT ?) SELECT toStartOfInterval(timestamp, INTERVAL 30 SECOND) AS ts, toString(multiIf(mapContains(resources_string, 'service.name') = ?, resources_string['service.name'], NULL)) AS `service.name`, count() AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND ((resources_string['service.name'] = ? AND mapContains(resources_string, 'service.name') = ?) OR (attributes_string['http.request.method'] = ? AND mapContains(attributes_string, 'http.request.method') = ?)) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? AND (`service.name`) GLOBAL IN (SELECT `service.name` FROM __limit_cte) GROUP BY ts, `service.name`", + Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), true, "redis-manual", true, "GET", true, "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10, true, "redis-manual", true, "GET", true, "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448)}, }, expectedErr: nil, },