refactor(telemetry): add context to errors

nkomonen-amazon · nkomonen-amazon · commit 8b21952af7b4 · 2024-10-18T23:23:28.000-04:00
The withTelemetryContext() decorator adds context to telemetry
but not thrown errors.

Solution:

Adding this decorator to a method will add context to any thrown exceptions.
This is helpful in telemetry as it will provide information about the caller.

Signed-off-by: nkomonen-amazon &lt;nkomonen@amazon.com&gt;
diff --git a/docs/telemetry.md b/docs/telemetry.md
@@ -142,7 +142,10 @@ Finally, if `setupStep2()` was the thing that failed we would see a metric like:
 
 ## Adding a "Stack Trace" to your metric
 
-### Problem
+When errors are thrown we do not attach the stack trace in telemetry. We only know about the error itself, but
+not the path it took to get there. We sometimes need this stack trace to debug, and only have telemetry to get insight on what happened since we do not have access to logs.
+
+### Scenario
 
 Common example: _"I have a function, `thisFailsSometimes()` that is called in multiple places. The function sometimes fails, I know from telemetry, but I do not know if it is failing when it is a specific caller. If I knew the call stack/trace that it took to call my function that would help me debug."_
 
@@ -168,34 +171,67 @@ function thisFailsSometimes(num: number) {
 ### Solution
 
 Add a value to `function` in the options of a `run()`. This will result in a stack of functions identifiers that were previously called
-before `thisFailsSometimes()` was run. You can then retrieve the stack in the `run()` of your final metric using `getFunctionStack()`.
+before `failsDependingOnInput()` was run. You can then retrieve the stack in the `run()` of your final metric using `getFunctionStack()`.
 
 ```typescript
-function outerA() {
-    telemetry.my_Metric.run(() => thisFailsSometimes(1), { functionId: { name: 'outerA' }})
+function runsSuccessfully() {
+    telemetry.my_Metric.run(() => failsDependingOnInput(1), { functionId: { name: 'runsSuccessfully' }})
 }
 
-function outerB() {
-    telemetry.my_Metric.run(() => thisFailsSometimes(0), { functionId: { source: 'outerB' }})
+function thisThrows() {
+    telemetry.my_Metric.run(() => failsDependingOnInput(0), { functionId: { source: 'thisThrows' }})
 }
 
-function thisFailsSometimes(num: number) {
+function failsDependingOnInput(num: number) {
     return telemetry.my_Metric.run(() => {
         telemetry.record({ theCallStack: asStringifiedStack(telemetry.getFunctionStack())})
         if (number === 0) {
             throw Error('Cannot be 0')
         }
         ...
-    }, { functionId: { name: 'thisFailsSometimes' }})
+    }, { functionId: { name: 'failsDependingOnInput' }})
 }
 
-// Results in a metric: { theCallStack: 'outerB:thisFailsSometimes', result: 'Failed' }
-// { theCallStack: 'outerB:thisFailsSometimes' } implies 'outerB' was run first, then 'thisFailsSometimes'. See docstrings for more info.
-outerB()
+// Results in a metric: { theCallStack: 'thisThrows:failsDependingOnInput', result: 'Failed' }
+// { theCallStack: 'thisThrows:failsDependingOnInput' } implies 'thisThrows' was run first, then 'failsDependingOnInput'. See docstrings for more info.
+thisThrows()
+```
+
+Additionally the `@withTelemetryContext()` decorator can be added to methods to do the same as above, but with a cleaner syntax.
+
+```typescript
+class MyClass {
+    @withTelemetryContext({ name: 'runsSuccessfully', class: 'MyClass' })
+    public runsSuccessfully() {
+        failsDependingOnInput(1)
+    }
+
+    @withTelemetryContext({ name: 'thisThrows', class: 'MyClass' })
+    public thisThrows() {
+        failsDependingOnInput(0)
+    }
+
+    private failsDependingOnInput(num: number) {
+        return telemetry.my_Metric.run(() => {
+            telemetry.record({ theCallStack: asStringifiedStack(telemetry.getFunctionStack())})
+            if (number === 0) {
+                throw Error('Cannot be 0')
+            }
+            ...
+        }, { functionId: { name: 'failsDependingOnInput' }})
+    }
+
+}
+
+
+// Results in a metric: { theCallStack: 'MyClass#thisThrows,failsDependingOnInput', result: 'Failed' }
+new MyClass().thisThrows()
 ```
 
 ### Important Notes
 
+-   Using `@withTelemetryContext` will wrap errors with the functionId properties to add more context
+
 -   If a nested function does not use a `run()` then it will not be part of the call stack.
 
     ```typescript
@@ -216,25 +252,6 @@ outerB()
     c() // result: 'a:c', note that 'b' is not included
     ```
 
--   If you are using `run()` with a class method, you can also add the class to the entry for more context
-
-    ```typescript
-    class A {
-        a() {
-            return telemetry.my_Metric.run(() => this.b(), { functionId: { name: 'a', class: 'A' } })
-        }
-
-        b() {
-            return telemetry.my_Metric.run(() => asStringifiedStack(telemetry.getFunctionStack()), {
-                functionId: { name: 'b', class: 'A' },
-            })
-        }
-    }
-
-    const inst = new A()
-    inst.a() // 'A#a,b'
-    ```
-
 -   If you do not want your `run()` to emit telemetry, set `emit: false` in the options
 
     ```typescript
diff --git a/packages/core/src/shared/telemetry/util.ts b/packages/core/src/shared/telemetry/util.ts
@@ -26,9 +26,10 @@ import { isValidationExemptMetric } from './exemptMetrics'
 import { isAmazonQ, isCloud9, isSageMaker } from '../../shared/extensionUtilities'
 import { randomUUID } from '../crypto'
 import { ClassToInterfaceType } from '../utilities/tsUtils'
-import { FunctionEntry, type TelemetryTracer } from './spans'
+import { FunctionEntry } from './spans'
 import { telemetry } from './telemetry'
 import { v5 as uuidV5 } from 'uuid'
+import { ToolkitError } from '../errors'
 
 const legacySettingsTelemetryValueDisable = 'Disable'
 const legacySettingsTelemetryValueEnable = 'Enable'
@@ -341,7 +342,7 @@ export function getOperatingSystem(): OperatingSystem {
 
 /**
  * Decorator that simply wraps the method with a non-emitting telemetry `run()`, automatically
- * `record()`ing the provided function id for later use by {@link TelemetryTracer.getFunctionStack()}
+ * `record()`ing the provided function id for later use by TelemetryTracer.getFunctionStack()
  *
  * This saves us from needing to wrap the entire function:
  *
@@ -376,8 +377,18 @@ export function withTelemetryContext(functionId: FunctionEntry) {
         function decoratedMethod(this: This, ...args: Args): Return {
             return telemetry.function_call.run(
                 () => {
-                    // DEVELOPERS: Set a breakpoint here and step in to it to debug the original function
-                    return originalMethod.call(this, ...args)
+                    try {
+                        // DEVELOPERS: Set a breakpoint here and step in to it to debug the original function
+                        const result = originalMethod.call(this, ...args)
+                        if (result instanceof Promise) {
+                            return result.catch((e) => {
+                                throw addContextToError(e, functionId)
+                            }) as Return
+                        }
+                        return result
+                    } catch (e) {
+                        throw addContextToError(e, functionId)
+                    }
                 },
                 {
                     emit: false,
@@ -388,4 +399,10 @@ export function withTelemetryContext(functionId: FunctionEntry) {
         return decoratedMethod
     }
     return decorator
+
+    function addContextToError(e: unknown, functionId: FunctionEntry) {
+        return ToolkitError.chain(e, `ctx: ${functionId.class ? functionId.class + '#' : ''}${functionId.name}`, {
+            code: functionId.class,
+        })
+    }
 }
diff --git a/packages/core/src/test/shared/telemetry/spans.test.ts b/packages/core/src/test/shared/telemetry/spans.test.ts
@@ -4,7 +4,7 @@
  */
 
 import assert from 'assert'
-import { ToolkitError } from '../../../shared/errors'
+import { getErrorId, ToolkitError } from '../../../shared/errors'
 import { asStringifiedStack, FunctionEntry, TelemetrySpan, TelemetryTracer } from '../../../shared/telemetry/spans'
 import { MetricName, MetricShapes, telemetry } from '../../../shared/telemetry/telemetry'
 import { assertTelemetry, getMetrics, installFakeClock } from '../../testUtil'
@@ -588,6 +588,64 @@ describe('TelemetryTracer', function () {
                 inst.doesNotEmit()
                 assertTelemetry('function_call', [])
             })
+
+            class TestThrows {
+                @withTelemetryContext({ name: 'throwsError', class: 'TestThrows' })
+                throwsError() {
+                    throw arbitraryError
+                }
+
+                @withTelemetryContext({ name: 'throwsError' })
+                throwsErrorButNoClass() {
+                    throw arbitraryError
+                }
+
+                @withTelemetryContext({ name: 'throwsError' })
+                async throwsAsyncError() {
+                    throw arbitraryError
+                }
+            }
+            const arbitraryError = new Error('arbitrary error')
+
+            it(`withTelemetryContext wraps errors with function id context`, async function () {
+                const inst = new TestThrows()
+                assert.throws(
+                    () => inst.throwsError(),
+                    (e) => {
+                        if (!(e instanceof ToolkitError)) {
+                            return false
+                        }
+                        const id = getErrorId(e)
+                        const message = e.message
+                        const cause = e.cause
+                        return id === 'TestThrows' && message === 'ctx: throwsError' && cause === arbitraryError
+                    }
+                )
+                assert.throws(
+                    () => inst.throwsErrorButNoClass(),
+                    (e) => {
+                        if (!(e instanceof ToolkitError)) {
+                            return false
+                        }
+                        const id = getErrorId(e)
+                        const message = e.message
+                        const cause = e.cause
+                        return id === 'Error' && message === 'ctx: throwsError' && cause === arbitraryError
+                    }
+                )
+                await assert.rejects(
+                    () => inst.throwsAsyncError(),
+                    (e) => {
+                        if (!(e instanceof ToolkitError)) {
+                            return false
+                        }
+                        const id = getErrorId(e)
+                        const message = e.message
+                        const cause = e.cause
+                        return id === 'Error' && message === 'ctx: throwsError' && cause === arbitraryError
+                    }
+                )
+            })
         })
     })