Skip to content

Commit cf72e40

Browse files
adambenhassenGemini
authored andcommitted
fix(traces): eliminate clickhouse query timeouts and improve read times
1 parent 6f4861a commit cf72e40

File tree

2 files changed

+79
-30
lines changed

2 files changed

+79
-30
lines changed

.changeset/orange-tools-train.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'hive': minor
3+
---
4+
5+
eliminate clickhouse query timeouts and improve read times of large amounts of traces in dashboard

packages/services/api/src/modules/operations/providers/traces.ts

Lines changed: 74 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,9 @@ export class Traces {
4545
WHERE
4646
"trace_id" IN (${sql.array(traceIds, 'String')})
4747
LIMIT 1 BY "trace_id"
48+
SETTINGS max_threads = 8
4849
`,
49-
timeout: 10_000,
50+
timeout: 30_000,
5051
queryId: 'Traces.findTraceByTraceId',
5152
});
5253

@@ -99,11 +100,13 @@ export class Traces {
99100
${spanFields}
100101
FROM
101102
"otel_traces"
102-
WHERE
103+
PREWHERE
103104
"TraceId" = ${traceId}
104-
AND "SpanAttributes"['hive.target_id'] = ${targetId}
105+
WHERE
106+
"SpanAttributes"['hive.target_id'] = ${targetId}
107+
SETTINGS max_threads = 8
105108
`,
106-
timeout: 10_000,
109+
timeout: 30_000,
107110
queryId: 'Traces.findSpansForTraceId',
108111
});
109112

@@ -174,38 +177,59 @@ export class Traces {
174177
const operator = sort?.direction === 'ASC' ? sql`>` : sql`<`;
175178
paginationSQLFragmentPart = sql`
176179
AND (
177-
(
178-
"timestamp" = ${cursor.timestamp}
179-
AND "trace_id" < ${cursor.traceId}
180+
"timestamp" ${operator} ${cursor.timestamp}
181+
OR (
182+
"timestamp" = ${cursor.timestamp}
183+
AND "trace_id" < ${cursor.traceId}
180184
)
181-
OR "timestamp" ${operator} ${cursor.timestamp}
182185
)
183186
`;
184187
}
185188
}
186189

187190
const sqlConditions = buildTraceFilterSQLConditions(filter, false);
188191

189-
const filterSQLFragment = sqlConditions.length
190-
? sql`AND ${sql.join(sqlConditions, ' AND ')}`
192+
const timestampPrewhereConditions: SqlValue[] = [];
193+
const otherFilterConditions: SqlValue[] = [];
194+
195+
for (const condition of sqlConditions) {
196+
if (condition.sql.includes('"otel_traces_normalized"."timestamp"')) {
197+
timestampPrewhereConditions.push(condition);
198+
} else {
199+
otherFilterConditions.push(condition);
200+
}
201+
}
202+
203+
const filterSQLFragment = otherFilterConditions.length
204+
? sql`AND ${sql.join(otherFilterConditions, ' AND ')}`
191205
: sql``;
192206

207+
const prewhereTimestampFragment = timestampPrewhereConditions.length
208+
? sql`AND ${sql.join(timestampPrewhereConditions, ' AND ')}`
209+
: sql``;
210+
211+
const query = sql`
212+
SELECT
213+
${traceFields}
214+
FROM
215+
"otel_traces_normalized"
216+
PREWHERE
217+
target_id = ${targetId}
218+
${prewhereTimestampFragment}
219+
WHERE
220+
true
221+
${paginationSQLFragmentPart}
222+
${filterSQLFragment}
223+
ORDER BY
224+
${orderByFragment}
225+
LIMIT ${sql.raw(String(limit + 1))}
226+
SETTINGS max_threads = 8
227+
`;
228+
193229
const tracesQuery = await this.clickHouse.query<unknown>({
194-
query: sql`
195-
SELECT
196-
${traceFields}
197-
FROM
198-
"otel_traces_normalized"
199-
WHERE
200-
target_id = ${targetId}
201-
${paginationSQLFragmentPart}
202-
${filterSQLFragment}
203-
ORDER BY
204-
${orderByFragment}
205-
LIMIT ${sql.raw(String(limit + 1))}
206-
`,
230+
query,
207231
queryId: 'traces',
208-
timeout: 10_000,
232+
timeout: 30_000,
209233
});
210234

211235
let traces = TraceListModel.parse(tracesQuery.data);
@@ -298,17 +322,18 @@ export class Traces {
298322
, sumIf(1, "graphql_error_count" != 0 ${filterSQLFragment}) AS "error_count_filtered"
299323
FROM
300324
"otel_traces_normalized"
301-
WHERE
325+
PREWHERE
302326
"target_id" = ${targetId}
303327
AND "otel_traces_normalized"."timestamp" >= toDateTime(${formatDate(startDate)}, 'UTC')
304328
AND "otel_traces_normalized"."timestamp" <= toDateTime(${formatDate(endDate)}, 'UTC')
305329
GROUP BY
306330
"time_bucket_start"
307331
) AS "t"
308332
ON "t"."time_bucket_start" = "time_bucket_list"."time_bucket"
333+
SETTINGS max_threads = 8
309334
`,
310335
queryId: `trace_status_breakdown_for_target_id_`,
311-
timeout: 10_000,
336+
timeout: 30_000,
312337
});
313338

314339
return TraceStatusBreakdownBucketList.parse(result.data);
@@ -344,15 +369,26 @@ export class TraceBreakdownLoader {
344369
const arrJoinColumnAlias = 'arr_join_column_value';
345370

346371
for (const { key, columnExpression, limit, arrayJoinColumn } of inputs) {
372+
const prewhereConditions: SqlValue[] = [];
373+
const whereConditions: SqlValue[] = [];
374+
375+
for (const condition of this.conditions) {
376+
if (condition.sql.includes('target_id') || condition.sql.includes('"timestamp"')) {
377+
prewhereConditions.push(condition);
378+
} else {
379+
whereConditions.push(condition);
380+
}
381+
}
382+
347383
statements.push(sql`
348384
SELECT
349385
'${sql.raw(key)}' AS "key"
350386
, toString(${sql.raw(columnExpression ?? arrJoinColumnAlias)}) AS "value"
351387
, count(*) AS "count"
352388
FROM "otel_traces_normalized"
353389
${sql.raw(arrayJoinColumn ? `ARRAY JOIN ${arrayJoinColumn} AS "${arrJoinColumnAlias}"` : '')}
354-
WHERE
355-
${sql.join(this.conditions, ' AND ')}
390+
${prewhereConditions.length ? sql`PREWHERE ${sql.join(prewhereConditions, ' AND ')}` : sql``}
391+
${whereConditions.length ? sql`WHERE ${sql.join(whereConditions, ' AND ')}` : sql``}
356392
GROUP BY
357393
"value"
358394
ORDER BY
@@ -363,6 +399,7 @@ export class TraceBreakdownLoader {
363399

364400
const query = sql`
365401
${sql.join(statements, ' UNION ALL ')}
402+
SETTINGS max_threads = 8
366403
`;
367404

368405
const results = await this.clickhouse.query<{
@@ -372,7 +409,7 @@ export class TraceBreakdownLoader {
372409
}>({
373410
query,
374411
queryId: 'traces_filter_options',
375-
timeout: 10_000,
412+
timeout: 60_000,
376413
});
377414

378415
const rowsGroupedByKey = results.data.reduce(
@@ -403,6 +440,14 @@ export class TraceBreakdownLoader {
403440

404441
this.conditions = [sql`target_id = ${targetId}`];
405442

443+
if (filter?.period) {
444+
const period = parseDateRangeInput(filter.period);
445+
this.conditions.push(
446+
sql`"timestamp" >= toDateTime(${formatDate(period.from)}, 'UTC')`,
447+
sql`"timestamp" <= toDateTime(${formatDate(period.to)}, 'UTC')`,
448+
);
449+
}
450+
406451
if (filter?.traceIds?.length) {
407452
this.conditions.push(sql`"trace_id" IN (${sql.array(filter.traceIds, 'String')})`);
408453
}
@@ -571,7 +616,6 @@ const traceFields = sql`
571616
, "http_url" AS "httpUrl"
572617
, "duration"
573618
, "graphql_operation_name" AS "graphqlOperationName"
574-
, "graphql_operation_document" AS "graphqlOperationDocument"
575619
, "graphql_operation_hash" AS "graphqlOperationHash"
576620
, "client_name" AS "clientName"
577621
, "client_version" AS "clientVersion"

0 commit comments

Comments
 (0)