Skip to content

Commit d7412c9

Browse files
[9.2] [Discover][Unified Traces] Improve Unified Traces API call latencies (elastic#240285) (elastic#240329)
# Backport This will backport the following commits from `main` to `9.2`: - [[Discover][Unified Traces] Improve Unified Traces API call latencies (elastic#240285)](elastic#240285) <!--- Backport version: 9.6.6 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sorenlouv/backport) <!--BACKPORT [{"author":{"name":"Gonçalo Rica Pais da Silva","email":"[email protected]"},"sourceCommit":{"committedDate":"2025-10-23T15:32:15Z","message":"[Discover][Unified Traces] Improve Unified Traces API call latencies (elastic#240285)\n\n## Summary\n\nIn some of the Unified Trace API calls, there are various async calls\nbeing made sequentially when they could in fact be made concurrently.\nThis PR fixes that by making these small improvements to make various\ncalls concurrent where possible and thus avoiding the added latency.\n\nThese changes were benchmarked and for the unified trace call, it has\nthe following profile: the first time is the elapsed time for getting\nthe APM and Logs clients, and the second time is the total time elapsed\nwith the full response returned:\n\nBEFORE:\n```\nclients: 648.295ms\nresponse: 1.165s\n\nclients: 653.911ms\nresponse: 1.166s\n\nclients: 649.343ms\nresponse: 1.328s\n```\n\nAFTER:\n```\nclients: 335.631ms\nresponse: 836.463ms\n\nclients: 332.548ms\nresponse: 669.448ms\n\nclients: 325.188ms\nresponse: 820.929ms\n```\n\n## How to test\n\n- Go to Discover page in Observability mode, select a trace index in\neither classic or ES|QL mode\n- Going to a trace overview, the focused trace waterfall and the full\ntrace waterfall views should not regress in functionality or error\nunexpectedly.","sha":"ca9eb7c8fc8e5a01bf8601573b3da66bb821307c","branchLabelMapping":{"^v9.3.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","Team:obs-ux-infra_services","backport:version","v9.2.0","v9.3.0","v9.1.6","v8.19.6","v9.0.9"],"title":"[Discover][Unified Traces] Improve Unified Traces API call latencies","number":240285,"url":"https://github.com/elastic/kibana/pull/240285","mergeCommit":{"message":"[Discover][Unified Traces] Improve Unified Traces API call latencies (elastic#240285)\n\n## Summary\n\nIn some of the Unified Trace API calls, there are various async calls\nbeing made sequentially when they could in fact be made concurrently.\nThis PR fixes that by making these small improvements to make various\ncalls concurrent where possible and thus avoiding the added latency.\n\nThese changes were benchmarked and for the unified trace call, it has\nthe following profile: the first time is the elapsed time for getting\nthe APM and Logs clients, and the second time is the total time elapsed\nwith the full response returned:\n\nBEFORE:\n```\nclients: 648.295ms\nresponse: 1.165s\n\nclients: 653.911ms\nresponse: 1.166s\n\nclients: 649.343ms\nresponse: 1.328s\n```\n\nAFTER:\n```\nclients: 335.631ms\nresponse: 836.463ms\n\nclients: 332.548ms\nresponse: 669.448ms\n\nclients: 325.188ms\nresponse: 820.929ms\n```\n\n## How to test\n\n- Go to Discover page in Observability mode, select a trace index in\neither classic or ES|QL mode\n- Going to a trace overview, the focused trace waterfall and the full\ntrace waterfall views should not regress in functionality or error\nunexpectedly.","sha":"ca9eb7c8fc8e5a01bf8601573b3da66bb821307c"}},"sourceBranch":"main","suggestedTargetBranches":["9.2","9.1","8.19","9.0"],"targetPullRequestStates":[{"branch":"9.2","label":"v9.2.0","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"main","label":"v9.3.0","branchLabelMappingKey":"^v9.3.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/240285","number":240285,"mergeCommit":{"message":"[Discover][Unified Traces] Improve Unified Traces API call latencies (elastic#240285)\n\n## Summary\n\nIn some of the Unified Trace API calls, there are various async calls\nbeing made sequentially when they could in fact be made concurrently.\nThis PR fixes that by making these small improvements to make various\ncalls concurrent where possible and thus avoiding the added latency.\n\nThese changes were benchmarked and for the unified trace call, it has\nthe following profile: the first time is the elapsed time for getting\nthe APM and Logs clients, and the second time is the total time elapsed\nwith the full response returned:\n\nBEFORE:\n```\nclients: 648.295ms\nresponse: 1.165s\n\nclients: 653.911ms\nresponse: 1.166s\n\nclients: 649.343ms\nresponse: 1.328s\n```\n\nAFTER:\n```\nclients: 335.631ms\nresponse: 836.463ms\n\nclients: 332.548ms\nresponse: 669.448ms\n\nclients: 325.188ms\nresponse: 820.929ms\n```\n\n## How to test\n\n- Go to Discover page in Observability mode, select a trace index in\neither classic or ES|QL mode\n- Going to a trace overview, the focused trace waterfall and the full\ntrace waterfall views should not regress in functionality or error\nunexpectedly.","sha":"ca9eb7c8fc8e5a01bf8601573b3da66bb821307c"}},{"branch":"9.1","label":"v9.1.6","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"8.19","label":"v8.19.6","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"9.0","label":"v9.0.9","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"}]}] BACKPORT--> Co-authored-by: Gonçalo Rica Pais da Silva <[email protected]>
1 parent 2bd81f2 commit d7412c9

File tree

2 files changed

+67
-59
lines changed

2 files changed

+67
-59
lines changed

x-pack/solutions/observability/plugins/apm/server/routes/traces/get_unified_trace_items.ts

Lines changed: 52 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,9 @@ import {
3333
import { asMutableArray } from '../../../common/utils/as_mutable_array';
3434
import type { TraceItem } from '../../../common/waterfall/unified_trace_item';
3535
import { MAX_ITEMS_PER_PAGE } from './get_trace_items';
36-
import type { UnifiedTraceErrors } from './get_unified_trace_errors';
36+
import { getUnifiedTraceErrors, type UnifiedTraceErrors } from './get_unified_trace_errors';
3737
import { parseOtelDuration } from '../../lib/helpers/parse_otel_duration';
38+
import type { LogsClient } from '../../lib/helpers/create_es_client/create_logs_client';
3839

3940
const fields = asMutableArray(['@timestamp', 'trace.id', 'service.name'] as const);
4041

@@ -85,25 +86,33 @@ export function getErrorsByDocId(unifiedTraceErrors: UnifiedTraceErrors) {
8586
*/
8687
export async function getUnifiedTraceItems({
8788
apmEventClient,
89+
logsClient,
8890
maxTraceItemsFromUrlParam,
8991
traceId,
9092
start,
9193
end,
9294
config,
93-
unifiedTraceErrors,
9495
}: {
9596
apmEventClient: APMEventClient;
97+
logsClient: LogsClient;
9698
maxTraceItemsFromUrlParam?: number;
9799
traceId: string;
98100
start: number;
99101
end: number;
100102
config: APMConfig;
101-
unifiedTraceErrors: UnifiedTraceErrors;
102-
}): Promise<TraceItem[]> {
103+
}): Promise<{ traceItems: TraceItem[]; unifiedTraceErrors: UnifiedTraceErrors }> {
103104
const maxTraceItems = maxTraceItemsFromUrlParam ?? config.ui.maxTraceItems;
104105
const size = Math.min(maxTraceItems, MAX_ITEMS_PER_PAGE);
105106

106-
const response = await apmEventClient.search(
107+
const unifiedTraceErrorsPromise = getUnifiedTraceErrors({
108+
apmEventClient,
109+
logsClient,
110+
traceId,
111+
start,
112+
end,
113+
});
114+
115+
const unifiedTracePromise = apmEventClient.search(
107116
'get_unified_trace_items',
108117
{
109118
apm: {
@@ -148,36 +157,45 @@ export async function getUnifiedTraceItems({
148157
{ skipProcessorEventFilter: true }
149158
);
150159

160+
const [unifiedTraceErrors, unifiedTraceItems] = await Promise.all([
161+
unifiedTraceErrorsPromise,
162+
unifiedTracePromise,
163+
]);
164+
151165
const errorsByDocId = getErrorsByDocId(unifiedTraceErrors);
152-
return response.hits.hits
153-
.map((hit) => {
154-
const event = unflattenKnownApmEventFields(hit.fields, fields);
155-
const apmDuration = event.span?.duration?.us || event.transaction?.duration?.us;
156-
const id = event.span?.id || event.transaction?.id;
157-
if (!id) {
158-
return undefined;
159-
}
160-
161-
const docErrors = errorsByDocId[id] || [];
162-
return {
163-
id: event.span?.id ?? event.transaction?.id,
164-
timestampUs: event.timestamp?.us ?? toMicroseconds(event[AT_TIMESTAMP]),
165-
name: event.span?.name ?? event.transaction?.name,
166-
traceId: event.trace.id,
167-
duration: resolveDuration(apmDuration, event.duration),
168-
...((event.event?.outcome || event.status?.code) && {
169-
status: {
170-
fieldName: event.event?.outcome ? EVENT_OUTCOME : STATUS_CODE,
171-
value: event.event?.outcome || event.status?.code,
172-
},
173-
}),
174-
errors: docErrors,
175-
parentId: event.parent?.id,
176-
serviceName: event.service.name,
177-
type: event.span?.subtype || event.span?.type || event.kind,
178-
} as TraceItem;
179-
})
180-
.filter((_) => _) as TraceItem[];
166+
167+
return {
168+
traceItems: unifiedTraceItems.hits.hits
169+
.map((hit) => {
170+
const event = unflattenKnownApmEventFields(hit.fields, fields);
171+
const apmDuration = event.span?.duration?.us || event.transaction?.duration?.us;
172+
const id = event.span?.id || event.transaction?.id;
173+
if (!id) {
174+
return undefined;
175+
}
176+
177+
const docErrors = errorsByDocId[id] || [];
178+
return {
179+
id: event.span?.id ?? event.transaction?.id,
180+
timestampUs: event.timestamp?.us ?? toMicroseconds(event[AT_TIMESTAMP]),
181+
name: event.span?.name ?? event.transaction?.name,
182+
traceId: event.trace.id,
183+
duration: resolveDuration(apmDuration, event.duration),
184+
...((event.event?.outcome || event.status?.code) && {
185+
status: {
186+
fieldName: event.event?.outcome ? EVENT_OUTCOME : STATUS_CODE,
187+
value: event.event?.outcome || event.status?.code,
188+
},
189+
}),
190+
errors: docErrors,
191+
parentId: event.parent?.id,
192+
serviceName: event.service.name,
193+
type: event.span?.subtype || event.span?.type || event.kind,
194+
} as TraceItem;
195+
})
196+
.filter((_) => _) as TraceItem[],
197+
unifiedTraceErrors,
198+
};
181199
}
182200

183201
/**

x-pack/solutions/observability/plugins/apm/server/routes/traces/route.ts

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -137,29 +137,23 @@ const unifiedTracesByIdRoute = createApmServerRoute({
137137
): Promise<{
138138
traceItems: TraceItem[];
139139
}> => {
140-
const apmEventClient = await getApmEventClient(resources);
141-
const logsClient = await createLogsClient(resources);
140+
const [apmEventClient, logsClient] = await Promise.all([
141+
getApmEventClient(resources),
142+
createLogsClient(resources),
143+
]);
142144

143145
const { params, config } = resources;
144146
const { traceId } = params.path;
145147
const { start, end } = params.query;
146148

147-
const unifiedTraceErrors = await getUnifiedTraceErrors({
149+
const { traceItems } = await getUnifiedTraceItems({
148150
apmEventClient,
149151
logsClient,
150152
traceId,
151153
start,
152154
end,
153-
});
154-
155-
const traceItems = await getUnifiedTraceItems({
156-
apmEventClient,
157-
traceId,
158-
start,
159-
end,
160155
maxTraceItemsFromUrlParam: params.query.maxTraceItems,
161156
config,
162-
unifiedTraceErrors,
163157
});
164158

165159
return {
@@ -183,30 +177,24 @@ const unifiedTracesByIdSummaryRoute = createApmServerRoute({
183177
traceItems?: FocusedTraceItems;
184178
summary: { services: number; traceEvents: number; errors: number };
185179
}> => {
186-
const apmEventClient = await getApmEventClient(resources);
187-
const logsClient = await createLogsClient(resources);
180+
const [apmEventClient, logsClient] = await Promise.all([
181+
getApmEventClient(resources),
182+
createLogsClient(resources),
183+
]);
188184

189185
const { params, config } = resources;
190186
const { traceId } = params.path;
191187
const { start, end, docId } = params.query;
192188

193-
const unifiedTraceErrors = await getUnifiedTraceErrors({
194-
apmEventClient,
195-
logsClient,
196-
traceId,
197-
start,
198-
end,
199-
});
200-
201-
const [traceItems, traceSummaryCount] = await Promise.all([
189+
const [{ traceItems, unifiedTraceErrors }, traceSummaryCount] = await Promise.all([
202190
getUnifiedTraceItems({
203191
apmEventClient,
192+
logsClient,
204193
traceId,
205194
start,
206195
end,
207196
maxTraceItemsFromUrlParam: params.query.maxTraceItems,
208197
config,
209-
unifiedTraceErrors,
210198
}),
211199
getTraceSummaryCount({ apmEventClient, start, end, traceId }),
212200
]);
@@ -233,8 +221,10 @@ const unifiedTracesByIdErrorsRoute = createApmServerRoute({
233221
}),
234222
security: { authz: { requiredPrivileges: ['apm'] } },
235223
handler: async (resources): Promise<ErrorsByTraceId> => {
236-
const apmEventClient = await getApmEventClient(resources);
237-
const logsClient = await createLogsClient(resources);
224+
const [apmEventClient, logsClient] = await Promise.all([
225+
getApmEventClient(resources),
226+
createLogsClient(resources),
227+
]);
238228

239229
const { params } = resources;
240230
const { traceId } = params.path;

0 commit comments

Comments
 (0)