Skip to content

Commit 20aaeb8

Browse files
chore: query tee comparator telemetry improvements (#20849)
1 parent d94ee60 commit 20aaeb8

File tree

15 files changed

+432
-224
lines changed

15 files changed

+432
-224
lines changed

cmd/querytee/main.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ import (
1212
"github.com/prometheus/client_golang/prometheus"
1313
"github.com/prometheus/client_golang/prometheus/collectors"
1414

15-
"github.com/grafana/loki/v3/pkg/querytee/comparator"
16-
1715
"github.com/grafana/loki/v3/pkg/querytee"
16+
"github.com/grafana/loki/v3/pkg/querytee/comparator"
1817
loki_tracing "github.com/grafana/loki/v3/pkg/tracing"
18+
"github.com/grafana/loki/v3/pkg/util/constants"
1919
util_log "github.com/grafana/loki/v3/pkg/util/log"
2020
)
2121

@@ -95,22 +95,22 @@ func lokiReadRoutes(cfg Config) []querytee.Route {
9595
})
9696

9797
return []querytee.Route{
98-
{Path: "/loki/api/v1/query_range", RouteName: "api_v1_query_range", Methods: []string{"GET", "POST"}, ResponseComparator: samplesComparator},
99-
{Path: "/loki/api/v1/query", RouteName: "api_v1_query", Methods: []string{"GET", "POST"}, ResponseComparator: samplesComparator},
100-
{Path: "/loki/api/v1/label", RouteName: "api_v1_label", Methods: []string{"GET"}, ResponseComparator: nil},
101-
{Path: "/loki/api/v1/labels", RouteName: "api_v1_labels", Methods: []string{"GET"}, ResponseComparator: nil},
102-
{Path: "/loki/api/v1/label/{name}/values", RouteName: "api_v1_label_name_values", Methods: []string{"GET"}, ResponseComparator: nil},
103-
{Path: "/loki/api/v1/series", RouteName: "api_v1_series", Methods: []string{"GET"}, ResponseComparator: nil},
104-
{Path: "/api/prom/query", RouteName: "api_prom_query", Methods: []string{"GET", "POST"}, ResponseComparator: samplesComparator},
105-
{Path: "/api/prom/label", RouteName: "api_prom_label", Methods: []string{"GET"}, ResponseComparator: nil},
106-
{Path: "/api/prom/label/{name}/values", RouteName: "api_prom_label_name_values", Methods: []string{"GET"}, ResponseComparator: nil},
107-
{Path: "/api/prom/series", RouteName: "api_prom_series", Methods: []string{"GET"}, ResponseComparator: nil},
98+
{Path: constants.PathLokiQueryRange, RouteName: "api_v1_query_range", Methods: []string{"GET", "POST"}, ResponseComparator: samplesComparator},
99+
{Path: constants.PathLokiQuery, RouteName: "api_v1_query", Methods: []string{"GET", "POST"}, ResponseComparator: samplesComparator},
100+
{Path: constants.PathLokiLabel, RouteName: "api_v1_label", Methods: []string{"GET"}, ResponseComparator: nil},
101+
{Path: constants.PathLokiLabels, RouteName: "api_v1_labels", Methods: []string{"GET"}, ResponseComparator: nil},
102+
{Path: constants.PathLokiLabelNameValues, RouteName: "api_v1_label_name_values", Methods: []string{"GET"}, ResponseComparator: nil},
103+
{Path: constants.PathLokiSeries, RouteName: "api_v1_series", Methods: []string{"GET"}, ResponseComparator: nil},
104+
{Path: constants.PathPromQuery, RouteName: "api_prom_query", Methods: []string{"GET", "POST"}, ResponseComparator: samplesComparator},
105+
{Path: constants.PathPromLabel, RouteName: "api_prom_label", Methods: []string{"GET"}, ResponseComparator: nil},
106+
{Path: constants.PathPromLabelNameValues, RouteName: "api_prom_label_name_values", Methods: []string{"GET"}, ResponseComparator: nil},
107+
{Path: constants.PathPromSeries, RouteName: "api_prom_series", Methods: []string{"GET"}, ResponseComparator: nil},
108108
}
109109
}
110110

111111
func lokiWriteRoutes() []querytee.Route {
112112
return []querytee.Route{
113-
{Path: "/loki/api/v1/push", RouteName: "api_v1_push", Methods: []string{"POST"}, ResponseComparator: nil},
114-
{Path: "/api/prom/push", RouteName: "api_prom_push", Methods: []string{"POST"}, ResponseComparator: nil},
113+
{Path: constants.PathLokiPush, RouteName: "api_v1_push", Methods: []string{"POST"}, ResponseComparator: nil},
114+
{Path: constants.PathPromPush, RouteName: "api_prom_push", Methods: []string{"POST"}, ResponseComparator: nil},
115115
}
116116
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
-- +goose Up
2+
-- Add mismatch_cause column to sampled_queries and comparison_outcomes tables.
3+
-- Stores the cause when comparison_status is 'mismatch'.
4+
5+
ALTER TABLE sampled_queries
6+
ADD COLUMN mismatch_cause VARCHAR(80) DEFAULT NULL;
7+
8+
ALTER TABLE comparison_outcomes
9+
ADD COLUMN mismatch_cause VARCHAR(80) DEFAULT NULL;
10+
11+
-- +goose Down
12+
-- Remove mismatch_cause column
13+
14+
ALTER TABLE comparison_outcomes DROP COLUMN mismatch_cause;
15+
ALTER TABLE sampled_queries DROP COLUMN mismatch_cause;
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
-- +goose Up
2+
-- Add 'match_within_tolerance' to comparison_status ENUM on sampled_queries.
3+
-- The application sets this when responses differ by hash but match within
4+
-- the configured value tolerance (e.g. floating-point drift).
5+
6+
ALTER TABLE sampled_queries
7+
MODIFY COLUMN comparison_status ENUM('match', 'mismatch', 'error', 'partial', 'match_within_tolerance') NOT NULL DEFAULT 'mismatch';
8+
9+
-- +goose Down
10+
-- Revert to original ENUM. Map any match_within_tolerance rows to 'match' so the column change succeeds.
11+
12+
UPDATE sampled_queries SET comparison_status = 'match' WHERE comparison_status = 'match_within_tolerance';
13+
14+
ALTER TABLE sampled_queries
15+
MODIFY COLUMN comparison_status ENUM('match', 'mismatch', 'error', 'partial') NOT NULL DEFAULT 'mismatch';

pkg/goldfish/storage_mysql.go

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,9 @@ func (s *MySQLStorage) StoreQuerySample(ctx context.Context, sample *QuerySample
9999
cell_a_used_new_engine, cell_b_used_new_engine,
100100
sampled_at,
101101
comparison_status,
102-
match_within_tolerance
103-
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
102+
match_within_tolerance,
103+
mismatch_cause
104+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
104105
`
105106

106107
// Convert empty span IDs to NULL for database storage
@@ -112,6 +113,12 @@ func (s *MySQLStorage) StoreQuerySample(ctx context.Context, sample *QuerySample
112113
cellBSpanID = sample.CellBSpanID
113114
}
114115

116+
// Prepare nullable mismatch_cause (NULL when empty)
117+
var mismatchCauseVal any
118+
if comparison.MismatchCause != "" {
119+
mismatchCauseVal = comparison.MismatchCause
120+
}
121+
115122
// Prepare nullable result storage metadata
116123
var cellAResultURI, cellBResultURI any
117124
var cellAResultSize, cellBResultSize any
@@ -181,6 +188,7 @@ func (s *MySQLStorage) StoreQuerySample(ctx context.Context, sample *QuerySample
181188
sample.SampledAt,
182189
comparison.ComparisonStatus,
183190
comparison.MatchWithinTolerance,
191+
mismatchCauseVal,
184192
)
185193

186194
return err
@@ -200,22 +208,29 @@ func (s *MySQLStorage) StoreComparisonResult(ctx context.Context, result *Compar
200208

201209
query := `
202210
INSERT INTO comparison_outcomes (
203-
correlation_id, comparison_status, match_within_tolerance,
211+
correlation_id, comparison_status, match_within_tolerance, mismatch_cause,
204212
difference_details, performance_metrics,
205213
compared_at
206-
) VALUES (?, ?, ?, ?, ?, ?)
214+
) VALUES (?, ?, ?, ?, ?, ?, ?)
207215
ON DUPLICATE KEY UPDATE
208216
comparison_status = VALUES(comparison_status),
209217
match_within_tolerance = VALUES(match_within_tolerance),
218+
mismatch_cause = VALUES(mismatch_cause),
210219
difference_details = VALUES(difference_details),
211220
performance_metrics = VALUES(performance_metrics),
212221
compared_at = VALUES(compared_at)
213222
`
214223

224+
var comparisonMismatchCause any
225+
if result.MismatchCause != "" {
226+
comparisonMismatchCause = result.MismatchCause
227+
}
228+
215229
_, err = s.db.ExecContext(ctx, query,
216230
result.CorrelationID,
217231
result.ComparisonStatus,
218232
result.MatchWithinTolerance,
233+
comparisonMismatchCause,
219234
differenceJSON,
220235
perfMetricsJSON,
221236
result.ComparedAt,
@@ -255,7 +270,7 @@ func (s *MySQLStorage) GetSampledQueries(ctx context.Context, page, pageSize int
255270
cell_a_span_id, cell_b_span_id,
256271
cell_a_used_new_engine, cell_b_used_new_engine,
257272
sampled_at, created_at,
258-
comparison_status, match_within_tolerance
273+
comparison_status, match_within_tolerance, mismatch_cause
259274
FROM sampled_queries
260275
` + whereClause + `
261276
ORDER BY sampled_at DESC
@@ -303,7 +318,7 @@ func (s *MySQLStorage) GetSampledQueries(ctx context.Context, page, pageSize int
303318
&cellASpanID, &cellBSpanID,
304319
&q.CellAUsedNewEngine, &q.CellBUsedNewEngine,
305320
&q.SampledAt, &createdAt,
306-
&q.ComparisonStatus, &q.MatchWithinTolerance,
321+
&q.ComparisonStatus, &q.MatchWithinTolerance, &q.MismatchCause,
307322
)
308323
if err != nil {
309324
return nil, err
@@ -378,7 +393,7 @@ func (s *MySQLStorage) GetQueryByCorrelationID(ctx context.Context, correlationI
378393
cell_a_trace_id, cell_b_trace_id,
379394
cell_a_span_id, cell_b_span_id,
380395
cell_a_used_new_engine, cell_b_used_new_engine,
381-
sampled_at, created_at, comparison_status, match_within_tolerance
396+
sampled_at, created_at, comparison_status, match_within_tolerance, mismatch_cause
382397
FROM sampled_queries
383398
WHERE correlation_id = ?
384399
`
@@ -405,7 +420,7 @@ func (s *MySQLStorage) GetQueryByCorrelationID(ctx context.Context, correlationI
405420
&q.CellATraceID, &q.CellBTraceID,
406421
&cellASpanID, &cellBSpanID,
407422
&q.CellAUsedNewEngine, &q.CellBUsedNewEngine,
408-
&q.SampledAt, &createdAt, &q.ComparisonStatus, &q.MatchWithinTolerance,
423+
&q.SampledAt, &createdAt, &q.ComparisonStatus, &q.MatchWithinTolerance, &q.MismatchCause,
409424
)
410425

411426
if err != nil {

pkg/goldfish/types.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ type QuerySample struct {
4848
// Comparison outcome
4949
ComparisonStatus ComparisonStatus `json:"comparisonStatus"`
5050
MatchWithinTolerance bool `json:"matchWithinTolerance"`
51+
MismatchCause string `json:"mismatchCause,omitempty"` // Set when ComparisonStatus is mismatch
5152

5253
SampledAt time.Time `json:"sampledAt"`
5354
}
@@ -70,6 +71,7 @@ type ComparisonResult struct {
7071
CorrelationID string
7172
ComparisonStatus ComparisonStatus
7273
MatchWithinTolerance bool
74+
MismatchCause string
7375
DifferenceDetails map[string]any
7476
PerformanceMetrics PerformanceMetrics
7577
ComparedAt time.Time
@@ -79,10 +81,11 @@ type ComparisonResult struct {
7981
type ComparisonStatus string
8082

8183
const (
82-
ComparisonStatusMatch ComparisonStatus = "match"
83-
ComparisonStatusMismatch ComparisonStatus = "mismatch"
84-
ComparisonStatusError ComparisonStatus = "error"
85-
ComparisonStatusPartial ComparisonStatus = "partial"
84+
ComparisonStatusMatch ComparisonStatus = "match"
85+
ComparisonStatusMismatch ComparisonStatus = "mismatch"
86+
ComparisonStatusError ComparisonStatus = "error"
87+
ComparisonStatusPartial ComparisonStatus = "partial"
88+
ComparisonStatusMatchWithinTolerance ComparisonStatus = "match_within_tolerance"
8689
)
8790

8891
// IsValid checks if the ComparisonStatus value is valid

0 commit comments

Comments
 (0)