Skip to content

Commit 791ae55

Browse files
authored
telemetry(core): implement tracing (#5591)
## Problem - We can't see an e2e view of our events ## Solution - Implement telemetry tracing using the opentelemetry idea of trace ids. - A trace id helps create a collection of spans. - Only one trace id can be active in an execution flow at a time. - Additionally, it swaps parentMetric for parentId so that we can directly correlate one telemetry event to another ## Other details - A trace is associated with a top level telemetry event. The top level can be either event_trace or any other of our metrics.
1 parent b2ebc10 commit 791ae55

File tree

3 files changed

+177
-33
lines changed

3 files changed

+177
-33
lines changed

packages/core/src/shared/telemetry/spans.ts

Lines changed: 51 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import {
2424
} from '../errors'
2525
import { entries, NumericKeys } from '../utilities/tsUtils'
2626
import { PerformanceTracker } from '../performance/performance'
27+
import { randomUUID } from '../crypto'
2728

2829
const AsyncLocalStorage: typeof AsyncLocalStorageClass =
2930
require('async_hooks').AsyncLocalStorage ??
@@ -145,6 +146,7 @@ export class TelemetrySpan<T extends MetricBase = MetricBase> {
145146
passive: true,
146147
requiredMetadata: [],
147148
}
149+
private readonly metricId: string
148150

149151
/**
150152
* These fields appear on the base metric instead of the 'metadata' and
@@ -166,6 +168,10 @@ export class TelemetrySpan<T extends MetricBase = MetricBase> {
166168
this.definition.trackPerformance && (options?.emit ?? false) // only track the performance if we are also emitting
167169
),
168170
}
171+
172+
this.metricId = randomUUID()
173+
// forced to cast to any since apparently even though <T extends MetricBase>, Partial<T> doesn't guarentee that metricId is available
174+
this.record({ metricId: this.metricId } as any)
169175
}
170176

171177
public get startTime(): Date | undefined {
@@ -261,6 +267,10 @@ export class TelemetrySpan<T extends MetricBase = MetricBase> {
261267
}
262268
}
263269

270+
getMetricId() {
271+
return this.metricId
272+
}
273+
264274
/**
265275
* Creates a copy of the span with an uninitialized start time.
266276
*/
@@ -335,33 +345,50 @@ export class TelemetryTracer extends TelemetryBase {
335345
* reverted after the execution completes.
336346
*/
337347
public run<T, U extends MetricName>(name: U, fn: (span: Metric<MetricShapes[U]>) => T, options?: SpanOptions): T {
338-
const span = this.createSpan(name, options).start()
339-
const frame = this.switchContext(span)
348+
const initTraceId = (callback: () => T): T => {
349+
/**
350+
* Generate a new traceId if one doesn't exist.
351+
* This ensures the traceId is created before the span,
352+
* allowing it to propagate to all child telemetry metrics.
353+
*/
354+
if (!this.attributes?.traceId) {
355+
return this.runRoot(() => {
356+
this.record({ traceId: randomUUID() })
357+
return callback()
358+
})
359+
}
360+
return callback()
361+
}
362+
363+
return initTraceId(() => {
364+
const span = this.createSpan(name, options).start()
365+
const frame = this.switchContext(span)
340366

341-
try {
342-
//
343-
// TODO: Since updating to `@types/node@16`, typescript flags this code with error:
344-
//
345-
// Error: npm ERR! src/shared/telemetry/spans.ts(255,57): error TS2345: Argument of type
346-
// 'TelemetrySpan<MetricBase>' is not assignable to parameter of type 'Metric<MetricShapes[U]>'.
347-
//
348-
const result = this.#context.run(frame, fn, span as any)
367+
try {
368+
//
369+
// TODO: Since updating to `@types/node@16`, typescript flags this code with error:
370+
//
371+
// Error: npm ERR! src/shared/telemetry/spans.ts(255,57): error TS2345: Argument of type
372+
// 'TelemetrySpan<MetricBase>' is not assignable to parameter of type 'Metric<MetricShapes[U]>'.
373+
//
374+
const result = this.#context.run(frame, fn, span as any)
375+
376+
if (result instanceof Promise) {
377+
return result
378+
.then((v) => (span.stop(), v))
379+
.catch((e) => {
380+
span.stop(e)
381+
throw e
382+
}) as unknown as T
383+
}
349384

350-
if (result instanceof Promise) {
385+
span.stop()
351386
return result
352-
.then((v) => (span.stop(), v))
353-
.catch((e) => {
354-
span.stop(e)
355-
throw e
356-
}) as unknown as T
387+
} catch (e) {
388+
span.stop(e)
389+
throw e
357390
}
358-
359-
span.stop()
360-
return result
361-
} catch (e) {
362-
span.stop(e)
363-
throw e
364-
}
391+
})
365392
}
366393

367394
/**
@@ -437,7 +464,7 @@ export class TelemetryTracer extends TelemetryBase {
437464
private createSpan(name: string, options?: SpanOptions): TelemetrySpan {
438465
const span = new TelemetrySpan(name, options).record(this.attributes ?? {})
439466
if (this.activeSpan && this.activeSpan.name !== rootSpanName) {
440-
return span.record({ parentMetric: this.activeSpan.name } satisfies { parentMetric: string } as any)
467+
return span.record({ parentId: this.activeSpan.getMetricId() })
441468
}
442469

443470
return span

packages/core/src/shared/telemetry/vscodeTelemetry.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,11 @@
315315
"name": "totalFiles",
316316
"type": "int",
317317
"description": "The total number of files being sent to Amazon Q"
318+
},
319+
{
320+
"name": "traceId",
321+
"type": "string",
322+
"description": "The unique identifier of a trace"
318323
}
319324
],
320325
"metrics": [
@@ -1178,6 +1183,21 @@
11781183
"required": true
11791184
}
11801185
]
1186+
},
1187+
{
1188+
"name": "trace_event",
1189+
"description": "Represents the root trace spanning multiple operations",
1190+
"passive": true,
1191+
"metadata": [
1192+
{
1193+
"type": "traceId",
1194+
"required": true
1195+
},
1196+
{
1197+
"type": "name",
1198+
"required": true
1199+
}
1200+
]
11811201
}
11821202
]
11831203
}

packages/core/src/test/shared/telemetry/spans.test.ts

Lines changed: 106 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import { withTelemetryContext } from '../../../shared/telemetry/util'
1515
import { SinonSandbox } from 'sinon'
1616
import sinon from 'sinon'
1717
import { stubPerformance } from '../../utilities/performance'
18+
import * as crypto from '../../../shared/crypto'
1819

1920
describe('TelemetrySpan', function () {
2021
let clock: ReturnType<typeof installFakeClock>
@@ -291,7 +292,17 @@ describe('TelemetryTracer', function () {
291292
})
292293

293294
describe('nested run()', function () {
295+
let uuidStub: sinon.SinonStub
294296
const nestedName = 'nested_metric' as MetricName
297+
const testId = 'foo-foo-foo-foo-foo'
298+
const flowName = 'testTraceFlow'
299+
300+
beforeEach(() => {
301+
uuidStub = sandbox.stub(crypto, 'randomUUID')
302+
303+
// in the first call we set the trace id in subsequent calls we get the span ids
304+
uuidStub.returns(testId)
305+
})
295306

296307
it('can record metadata in nested spans', function () {
297308
tracer.run(metricName, (span1) => {
@@ -310,18 +321,18 @@ describe('TelemetryTracer', function () {
310321
it('removes spans when exiting an execution context', function () {
311322
tracer.run(metricName, () => {
312323
tracer.run(nestedName, () => {
313-
assert.strictEqual(tracer.spans.length, 2)
324+
assert.strictEqual(tracer.spans.length, 3)
314325
})
315326

316-
assert.strictEqual(tracer.spans.length, 1)
327+
assert.strictEqual(tracer.spans.length, 2)
317328
})
318329
})
319330

320331
it('adds spans during a nested execution, closing them when after', function () {
321332
tracer.run(metricName, () => {
322-
tracer.run(nestedName, () => assert.strictEqual(tracer.spans.length, 2))
323-
tracer.run(nestedName, () => assert.strictEqual(tracer.spans.length, 2))
324-
assert.strictEqual(tracer.spans.length, 1)
333+
tracer.run(nestedName, () => assert.strictEqual(tracer.spans.length, 3))
334+
tracer.run(nestedName, () => assert.strictEqual(tracer.spans.length, 3))
335+
assert.strictEqual(tracer.spans.length, 2)
325336
})
326337

327338
assert.strictEqual(tracer.spans.length, 0)
@@ -330,16 +341,102 @@ describe('TelemetryTracer', function () {
330341
it('supports nesting the same event name', function () {
331342
tracer.run(metricName, () => {
332343
tracer.run(metricName, () => {
333-
assert.strictEqual(tracer.spans.length, 2)
334-
assert.ok(tracer.spans.every((s) => s.name === metricName))
344+
assert.strictEqual(tracer.spans.length, 3)
345+
assert.ok(tracer.spans.filter((m) => m.name !== 'root').every((s) => s.name === metricName))
335346
})
336347
})
337348
})
338349

339-
it('attaches the parent event name to the child span', function () {
350+
it('attaches the parent id to the child span', function () {
340351
tracer.run(metricName, () => tracer.run(nestedName, () => {}))
341352
assertTelemetry(metricName, { result: 'Succeeded' })
342-
assertTelemetry(nestedName, { result: 'Succeeded', parentMetric: metricName } as any)
353+
assertTelemetry(nestedName, { result: 'Succeeded', parentId: testId } as any)
354+
})
355+
356+
it('should set trace id', function () {
357+
telemetry.trace_event.run((span) => {
358+
span.record({ name: flowName })
359+
assert.deepStrictEqual(telemetry.activeSpan?.getMetricId(), testId)
360+
})
361+
const event = getMetrics('trace_event')
362+
assert.deepStrictEqual(event[0].traceId, testId)
363+
assert.deepStrictEqual(event[0].name, 'testTraceFlow')
364+
})
365+
366+
it('trace id is propogated to children', function () {
367+
const metricsIds = {
368+
trace_event: {
369+
metricId: 'traceEvent',
370+
traceId: testId,
371+
parentId: undefined,
372+
},
373+
amazonq_startConversation: {
374+
metricId: 'amazonq_startConversation',
375+
traceId: testId,
376+
parentId: 'traceEvent',
377+
},
378+
amazonq_addMessage: {
379+
metricId: 'amazonq_addMessage',
380+
traceId: testId,
381+
parentId: 'amazonq_startConversation',
382+
},
383+
vscode_executeCommand: {
384+
metricId: 'vscode_executeCommand',
385+
traceId: testId,
386+
parentId: 'traceEvent',
387+
},
388+
amazonq_enterFocusConversation: {
389+
metricId: 'amazonq_enterFocusConversation',
390+
traceId: testId,
391+
parentId: 'vscode_executeCommand',
392+
},
393+
amazonq_exitFocusConversation: {
394+
metricId: 'amazonq_exitFocusConversation',
395+
traceId: testId,
396+
parentId: 'amazonq_enterFocusConversation',
397+
},
398+
amazonq_closeChat: {
399+
metricId: 'amazonq_closeChat',
400+
traceId: testId,
401+
parentId: 'traceEvent',
402+
},
403+
}
404+
405+
/**
406+
* randomUUID calls:
407+
* The first is called on the root event that never gets emitted
408+
* The second is called when generating the traceId
409+
* The rest are called when generating the spanIds
410+
*/
411+
uuidStub.onCall(1).returns(testId)
412+
let index = 2
413+
for (const v of Object.values(metricsIds)) {
414+
uuidStub.onCall(index).returns(v.metricId)
415+
index++
416+
}
417+
418+
telemetry.trace_event.run(() => {
419+
telemetry.amazonq_startConversation.run(() => {
420+
telemetry.amazonq_addMessage.run(() => {})
421+
})
422+
telemetry.vscode_executeCommand.run(() => {
423+
telemetry.amazonq_enterFocusConversation.run(() => {
424+
telemetry.amazonq_exitFocusConversation.run(() => {})
425+
})
426+
})
427+
telemetry.amazonq_closeChat.emit({
428+
result: 'Succeeded',
429+
})
430+
})
431+
432+
const spanEntries = Object.entries(metricsIds)
433+
for (let x = 0; x < spanEntries.length; x++) {
434+
const [metricName, { metricId, traceId, parentId }] = spanEntries[x]
435+
const metric = getMetrics(metricName as keyof MetricShapes)[0] as any
436+
assert.deepStrictEqual(metric.traceId, traceId)
437+
assert.deepStrictEqual(metric.metricId, metricId)
438+
assert.deepStrictEqual(metric.parentId, parentId)
439+
}
343440
})
344441
})
345442

0 commit comments

Comments
 (0)