Skip to content

Commit 2aabf51

Browse files
adambenhassenGemini
authored andcommitted
fix(traces): eliminate clickhouse query timeouts and improve read times
1 parent 6f4861a commit 2aabf51

File tree

2 files changed

+76
-39
lines changed

2 files changed

+76
-39
lines changed

.changeset/orange-tools-train.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'hive': minor
3+
---
4+
5+
eliminate clickhouse query timeouts and improve read times of large amounts of traces in dashboard

packages/services/api/src/modules/operations/providers/traces.ts

Lines changed: 71 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,9 @@ export class Traces {
4545
WHERE
4646
"trace_id" IN (${sql.array(traceIds, 'String')})
4747
LIMIT 1 BY "trace_id"
48+
SETTINGS max_threads = 8
4849
`,
49-
timeout: 10_000,
50+
timeout: 30_000,
5051
queryId: 'Traces.findTraceByTraceId',
5152
});
5253

@@ -99,11 +100,13 @@ export class Traces {
99100
${spanFields}
100101
FROM
101102
"otel_traces"
102-
WHERE
103+
PREWHERE
103104
"TraceId" = ${traceId}
104-
AND "SpanAttributes"['hive.target_id'] = ${targetId}
105+
WHERE
106+
"SpanAttributes"['hive.target_id'] = ${targetId}
107+
SETTINGS max_threads = 8
105108
`,
106-
timeout: 10_000,
109+
timeout: 30_000,
107110
queryId: 'Traces.findSpansForTraceId',
108111
});
109112

@@ -147,7 +150,6 @@ export class Traces {
147150
const orderByFragment = sql`
148151
${sort?.sort === 'DURATION' ? sql`"duration" ${sort.direction === 'ASC' ? sql`ASC` : sql`DESC`},` : sql``}
149152
"timestamp" ${sort?.sort === 'TIMESTAMP' && sort?.direction === 'ASC' ? sql`ASC` : sql`DESC`}
150-
, "trace_id" DESC
151153
`;
152154

153155
let paginationSQLFragmentPart = sql``;
@@ -163,49 +165,59 @@ export class Traces {
163165
"duration" = ${durationStr}
164166
AND "timestamp" < ${cursor.timestamp}
165167
)
166-
OR (
167-
"duration" = ${durationStr}
168-
AND "timestamp" = ${cursor.timestamp}
169-
AND "trace_id" < ${cursor.traceId}
170-
)
171168
)
172169
`;
173170
} /* TIMESTAMP */ else {
174171
const operator = sort?.direction === 'ASC' ? sql`>` : sql`<`;
175172
paginationSQLFragmentPart = sql`
176-
AND (
177-
(
178-
"timestamp" = ${cursor.timestamp}
179-
AND "trace_id" < ${cursor.traceId}
180-
)
181-
OR "timestamp" ${operator} ${cursor.timestamp}
182-
)
173+
AND "timestamp" ${operator} ${cursor.timestamp}
183174
`;
184175
}
185176
}
186177

187178
const sqlConditions = buildTraceFilterSQLConditions(filter, false);
188179

189-
const filterSQLFragment = sqlConditions.length
190-
? sql`AND ${sql.join(sqlConditions, ' AND ')}`
180+
const timestampPrewhereConditions: SqlValue[] = [];
181+
const otherFilterConditions: SqlValue[] = [];
182+
183+
for (const condition of sqlConditions) {
184+
if (condition.sql.includes('"otel_traces_normalized"."timestamp"')) {
185+
timestampPrewhereConditions.push(condition);
186+
} else {
187+
otherFilterConditions.push(condition);
188+
}
189+
}
190+
191+
const filterSQLFragment = otherFilterConditions.length
192+
? sql`AND ${sql.join(otherFilterConditions, ' AND ')}`
193+
: sql``;
194+
195+
const prewhereTimestampFragment = timestampPrewhereConditions.length
196+
? sql`AND ${sql.join(timestampPrewhereConditions, ' AND ')}`
191197
: sql``;
192198

199+
const query = sql`
200+
SELECT
201+
${traceFields}
202+
FROM
203+
"otel_traces_normalized"
204+
PREWHERE
205+
target_id = ${targetId}
206+
${prewhereTimestampFragment}
207+
WHERE
208+
true
209+
${paginationSQLFragmentPart}
210+
${filterSQLFragment}
211+
ORDER BY
212+
${orderByFragment}
213+
LIMIT ${sql.raw(String(limit + 1))}
214+
SETTINGS max_threads = 8
215+
`;
216+
193217
const tracesQuery = await this.clickHouse.query<unknown>({
194-
query: sql`
195-
SELECT
196-
${traceFields}
197-
FROM
198-
"otel_traces_normalized"
199-
WHERE
200-
target_id = ${targetId}
201-
${paginationSQLFragmentPart}
202-
${filterSQLFragment}
203-
ORDER BY
204-
${orderByFragment}
205-
LIMIT ${sql.raw(String(limit + 1))}
206-
`,
218+
query,
207219
queryId: 'traces',
208-
timeout: 10_000,
220+
timeout: 30_000,
209221
});
210222

211223
let traces = TraceListModel.parse(tracesQuery.data);
@@ -298,17 +310,18 @@ export class Traces {
298310
, sumIf(1, "graphql_error_count" != 0 ${filterSQLFragment}) AS "error_count_filtered"
299311
FROM
300312
"otel_traces_normalized"
301-
WHERE
313+
PREWHERE
302314
"target_id" = ${targetId}
303315
AND "otel_traces_normalized"."timestamp" >= toDateTime(${formatDate(startDate)}, 'UTC')
304316
AND "otel_traces_normalized"."timestamp" <= toDateTime(${formatDate(endDate)}, 'UTC')
305317
GROUP BY
306318
"time_bucket_start"
307319
) AS "t"
308320
ON "t"."time_bucket_start" = "time_bucket_list"."time_bucket"
321+
SETTINGS max_threads = 8
309322
`,
310323
queryId: `trace_status_breakdown_for_target_id_`,
311-
timeout: 10_000,
324+
timeout: 30_000,
312325
});
313326

314327
return TraceStatusBreakdownBucketList.parse(result.data);
@@ -344,15 +357,26 @@ export class TraceBreakdownLoader {
344357
const arrJoinColumnAlias = 'arr_join_column_value';
345358

346359
for (const { key, columnExpression, limit, arrayJoinColumn } of inputs) {
360+
const prewhereConditions: SqlValue[] = [];
361+
const whereConditions: SqlValue[] = [];
362+
363+
for (const condition of this.conditions) {
364+
if (condition.sql.includes('target_id') || condition.sql.includes('"timestamp"')) {
365+
prewhereConditions.push(condition);
366+
} else {
367+
whereConditions.push(condition);
368+
}
369+
}
370+
347371
statements.push(sql`
348372
SELECT
349373
'${sql.raw(key)}' AS "key"
350374
, toString(${sql.raw(columnExpression ?? arrJoinColumnAlias)}) AS "value"
351375
, count(*) AS "count"
352376
FROM "otel_traces_normalized"
353377
${sql.raw(arrayJoinColumn ? `ARRAY JOIN ${arrayJoinColumn} AS "${arrJoinColumnAlias}"` : '')}
354-
WHERE
355-
${sql.join(this.conditions, ' AND ')}
378+
${prewhereConditions.length ? sql`PREWHERE ${sql.join(prewhereConditions, ' AND ')}` : sql``}
379+
${whereConditions.length ? sql`WHERE ${sql.join(whereConditions, ' AND ')}` : sql``}
356380
GROUP BY
357381
"value"
358382
ORDER BY
@@ -363,6 +387,7 @@ export class TraceBreakdownLoader {
363387

364388
const query = sql`
365389
${sql.join(statements, ' UNION ALL ')}
390+
SETTINGS max_threads = 8
366391
`;
367392

368393
const results = await this.clickhouse.query<{
@@ -372,7 +397,7 @@ export class TraceBreakdownLoader {
372397
}>({
373398
query,
374399
queryId: 'traces_filter_options',
375-
timeout: 10_000,
400+
timeout: 60_000,
376401
});
377402

378403
const rowsGroupedByKey = results.data.reduce(
@@ -403,6 +428,14 @@ export class TraceBreakdownLoader {
403428

404429
this.conditions = [sql`target_id = ${targetId}`];
405430

431+
if (filter?.period) {
432+
const period = parseDateRangeInput(filter.period);
433+
this.conditions.push(
434+
sql`"timestamp" >= toDateTime(${formatDate(period.from)}, 'UTC')`,
435+
sql`"timestamp" <= toDateTime(${formatDate(period.to)}, 'UTC')`,
436+
);
437+
}
438+
406439
if (filter?.traceIds?.length) {
407440
this.conditions.push(sql`"trace_id" IN (${sql.array(filter.traceIds, 'String')})`);
408441
}
@@ -571,7 +604,6 @@ const traceFields = sql`
571604
, "http_url" AS "httpUrl"
572605
, "duration"
573606
, "graphql_operation_name" AS "graphqlOperationName"
574-
, "graphql_operation_document" AS "graphqlOperationDocument"
575607
, "graphql_operation_hash" AS "graphqlOperationHash"
576608
, "client_name" AS "clientName"
577609
, "client_version" AS "clientVersion"

0 commit comments

Comments
 (0)