Skip to content

Commit c2b7daf

Browse files
Add OpenTelemetry error recording with Activity.AddException polyfill (#446)
* Add OpenTelemetry error recording with Activity.AddException polyfill Addresses #429 - telemetry activities were not consistently marked as errors when exceptions occurred. This adds a net8 polyfill for the .NET 10 Activity.AddException API, a SetErrorStatus extension that both sets ActivityStatusCode.Error and records exception events per OTel semantic conventions, and consistent error recording across all activity scopes (jobs, queues, work items, messaging, locks). Co-authored-by: Cursor <cursoragent@cursor.com> * Remove redundant Activity.Current error status from WorkItemJob The job runner already sets error status on the parent activity for any non-success, non-cancelled JobResult. This call was setting it on Activity.Current before the child activity was created, duplicating work the runner handles. Co-authored-by: Cursor <cursoragent@cursor.com> * PR Feedback * Apply suggestion from @niemyjski * PR feedback --------- Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent e0842d6 commit c2b7daf

File tree

10 files changed

+138
-27
lines changed

10 files changed

+138
-27
lines changed

src/Foundatio.TestHarness/Jobs/HelloWorldJob.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using System;
1+
using System;
22
using System.Threading;
33
using System.Threading.Tasks;
44
using Foundatio.Jobs;

src/Foundatio/Jobs/JobResult.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using System;
1+
using System;
22
using Microsoft.Extensions.Logging;
33

44
namespace Foundatio.Jobs;

src/Foundatio/Jobs/JobWithLockBase.cs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using System;
1+
using System;
22
using System.Threading;
33
using System.Threading.Tasks;
44
using Foundatio.Lock;
@@ -45,7 +45,16 @@ public virtual async Task<JobResult> RunAsync(CancellationToken cancellationToke
4545
{
4646
lockActivity?.AddTag("job.id", JobId);
4747

48-
lockValue = await GetLockAsync(cancellationToken).AnyContext();
48+
try
49+
{
50+
lockValue = await GetLockAsync(cancellationToken).AnyContext();
51+
}
52+
catch (Exception ex)
53+
{
54+
lockActivity?.SetErrorStatus(ex);
55+
throw;
56+
}
57+
4958
if (lockValue is null)
5059
{
5160
return JobResult.CancelledWithMessage("Unable to acquire job lock");

src/Foundatio/Jobs/QueueJobBase.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using System;
1+
using System;
22
using System.Diagnostics;
33
using System.Threading;
44
using System.Threading.Tasks;
@@ -55,9 +55,9 @@ public virtual async Task<JobResult> RunAsync(CancellationToken cancellationToke
5555
using var linkedCancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
5656
linkedCancellationTokenSource.CancelAfter(TimeSpan.FromSeconds(30));
5757

58+
using var dequeueActivity = StartDequeueActivity();
5859
try
5960
{
60-
using var dequeueActivity = StartDequeueActivity();
6161
queueEntry = await _queue.Value.DequeueAsync(linkedCancellationTokenSource.Token).AnyContext();
6262
EnrichDequeueActivity(dequeueActivity, queueEntry);
6363
}
@@ -67,6 +67,7 @@ public virtual async Task<JobResult> RunAsync(CancellationToken cancellationToke
6767
}
6868
catch (Exception ex)
6969
{
70+
dequeueActivity?.SetErrorStatus(ex, $"Error trying to dequeue message: {ex.Message}");
7071
return JobResult.FromException(ex, $"Error trying to dequeue message: {ex.Message}");
7172
}
7273

@@ -132,6 +133,7 @@ public async Task<JobResult> ProcessAsync(IQueueEntry<T> queueEntry, Cancellatio
132133
}
133134
catch (Exception ex)
134135
{
136+
activity?.SetErrorStatus(ex);
135137
_logger.LogError(ex, "Error processing {QueueName} queue entry: {QueueEntryId}", _queueName, queueEntry.Id);
136138

137139
if (!queueEntry.IsCompleted && !queueEntry.IsAbandoned)

src/Foundatio/Jobs/WorkItemJob/WorkItemJob.cs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using System;
1+
using System;
22
using System.Collections.Concurrent;
33
using System.Diagnostics;
44
using System.Threading;
@@ -93,6 +93,7 @@ public async Task<JobResult> ProcessAsync(IQueueEntry<WorkItemData> queueEntry,
9393
}
9494
catch (Exception ex)
9595
{
96+
activity?.SetErrorStatus(ex, $"Abandoning {queueEntry.Value.Type} work item: {queueEntry.Id}: Failed to parse {workItemDataType.Name} work item data");
9697
await queueEntry.AbandonAsync().AnyContext();
9798
return JobResult.FromException(ex, $"Abandoning {queueEntry.Value.Type} work item: {queueEntry.Id}: Failed to parse {workItemDataType.Name} work item data");
9899
}
@@ -101,7 +102,9 @@ public async Task<JobResult> ProcessAsync(IQueueEntry<WorkItemData> queueEntry,
101102
if (handler == null)
102103
{
103104
await queueEntry.CompleteAsync().AnyContext();
104-
return JobResult.FailedWithMessage($"Completing {queueEntry.Value.Type} work item: {queueEntry.Id}: Handler for type {workItemDataType.Name} not registered");
105+
var result = JobResult.FailedWithMessage($"Completing {queueEntry.Value.Type} work item: {queueEntry.Id}: Handler for type {workItemDataType.Name} not registered");
106+
activity?.SetErrorStatus(message: result.Message);
107+
return result;
105108
}
106109

107110
if (queueEntry.Value.SendProgressReports)
@@ -165,6 +168,8 @@ await Task.WhenAll(
165168
}
166169
catch (Exception ex)
167170
{
171+
activity?.SetErrorStatus(ex);
172+
168173
if (queueEntry.Value.SendProgressReports)
169174
await ReportProgressAsync(handler, queueEntry, -1, $"Failed: {ex.Message}").AnyContext();
170175

src/Foundatio/Lock/CacheLockProvider.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ public async Task<ILock> AcquireAsync(string resource, TimeSpan? timeUntilExpire
183183
_lockTimeoutCounter.Add(1);
184184

185185
if (cancellationToken.IsCancellationRequested)
186-
_logger.LogTrace("Cancellation requested for lock {Resource} ({LockId}) after {Duration:g}", resource, lockId, sw.Elapsed);
186+
_logger.LogDebug("Cancellation requested for lock {Resource} ({LockId}) after {Duration:g}", resource, lockId, sw.Elapsed);
187187
else
188188
_logger.LogWarning("Failed to acquire lock {Resource} ({LockId}) after {Duration:g}", resource, lockId, sw.Elapsed);
189189

src/Foundatio/Messaging/MessageBusBase.cs

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -274,26 +274,34 @@ protected async Task SendMessageToSubscribersAsync(IMessage message)
274274
_logger.LogTrace("Calling subscriber action: {SubscriberId}", subscriber.Id);
275275
using var activity = StartHandleMessageActivity(message);
276276

277-
using (_logger.BeginScope(s => s
278-
.PropertyIf("UniqueId", message.UniqueId, !String.IsNullOrEmpty(message.UniqueId))
279-
.PropertyIf("CorrelationId", message.CorrelationId, !String.IsNullOrEmpty(message.CorrelationId))))
277+
try
280278
{
281-
if (subscriber.Type == typeof(IMessage))
282-
{
283-
await subscriber.Action(message, subscriber.CancellationToken).AnyContext();
284-
}
285-
else if (subscriber.GenericType != null)
286-
{
287-
object typedMessage = Activator.CreateInstance(subscriber.GenericType, message);
288-
await subscriber.Action(typedMessage, subscriber.CancellationToken).AnyContext();
289-
}
290-
else
279+
using (_logger.BeginScope(s => s
280+
.PropertyIf("UniqueId", message.UniqueId, !String.IsNullOrEmpty(message.UniqueId))
281+
.PropertyIf("CorrelationId", message.CorrelationId, !String.IsNullOrEmpty(message.CorrelationId))))
291282
{
292-
await subscriber.Action(message.GetBody(), subscriber.CancellationToken).AnyContext();
283+
if (subscriber.Type == typeof(IMessage))
284+
{
285+
await subscriber.Action(message, subscriber.CancellationToken).AnyContext();
286+
}
287+
else if (subscriber.GenericType != null)
288+
{
289+
object typedMessage = Activator.CreateInstance(subscriber.GenericType, message);
290+
await subscriber.Action(typedMessage, subscriber.CancellationToken).AnyContext();
291+
}
292+
else
293+
{
294+
await subscriber.Action(message.GetBody(), subscriber.CancellationToken).AnyContext();
295+
}
293296
}
294-
}
295297

296-
_logger.LogTrace("Finished calling subscriber action: {SubscriberId}", subscriber.Id);
298+
_logger.LogTrace("Finished calling subscriber action: {SubscriberId}", subscriber.Id);
299+
}
300+
catch (Exception ex)
301+
{
302+
activity?.SetErrorStatus(ex);
303+
throw;
304+
}
297305
}, DisposedCancellationToken);
298306
});
299307

src/Foundatio/Queues/QueueBase.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,15 @@ protected QueueBase(TOptions options) : base(options?.TimeProvider, options?.Log
8383

8484
_nextQueueStatsUpdate = _timeProvider.GetUtcNow().Add(_options.MetricsPollingInterval);
8585
_logger.LogTrace("Getting metrics queue stats for {QueueName} ({QueueId}): Next update scheduled for {NextQueueStatsUpdate:O}", _options.Name, QueueId, _nextQueueStatsUpdate);
86+
using var activity = FoundatioDiagnostics.ActivitySource.StartActivity("Queue Stats: " + _options.Name);
8687
try
8788
{
88-
using var _ = FoundatioDiagnostics.ActivitySource.StartActivity("Queue Stats: " + _options.Name);
8989
_queueStats = GetMetricsQueueStats();
9090
return (_queueStats.Queued, _queueStats.Working, _queueStats.Deadletter);
9191
}
9292
catch (Exception ex)
9393
{
94+
activity?.SetErrorStatus(ex);
9495
_logger.LogError(ex, "Error getting queue metrics for {QueueName} ({QueueId}): {Message}", _options.Name, QueueId, ex.Message);
9596
return (0, 0, 0);
9697
}

src/Foundatio/Utility/FoundatioDiagnostics.cs

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using System.Diagnostics;
1+
using System;
2+
using System.Diagnostics;
23
using System.Diagnostics.Metrics;
34
using System.Reflection;
45

@@ -10,4 +11,62 @@ public static class FoundatioDiagnostics
1011
internal static readonly string AssemblyVersion = typeof(FoundatioDiagnostics).Assembly.GetCustomAttribute<AssemblyInformationalVersionAttribute>()?.InformationalVersion ?? AssemblyName.Version.ToString();
1112
public static readonly ActivitySource ActivitySource = new(AssemblyName.Name, AssemblyVersion);
1213
public static readonly Meter Meter = new("Foundatio", AssemblyVersion);
14+
15+
/// <summary>
16+
/// Sets the activity status to Error and records the exception details.
17+
/// </summary>
18+
/// <param name="activity">The activity to set error status on.</param>
19+
/// <param name="exception">The exception that caused the error (optional).</param>
20+
/// <param name="message">A custom error message (optional, defaults to exception message).</param>
21+
public static void SetErrorStatus(this Activity activity, Exception exception = null, string message = null)
22+
{
23+
if (activity is null)
24+
return;
25+
26+
activity.SetStatus(ActivityStatusCode.Error, message ?? exception?.Message);
27+
28+
if (exception is not null)
29+
activity.AddException(exception);
30+
}
31+
}
32+
33+
#if !NET10_0_OR_GREATER
34+
internal static class ActivityAddExceptionPolyfill
35+
{
36+
private const string ExceptionEventName = "exception";
37+
private const string ExceptionMessageTag = "exception.message";
38+
private const string ExceptionStackTraceTag = "exception.stacktrace";
39+
private const string ExceptionTypeTag = "exception.type";
40+
41+
/// <summary>
42+
/// Polyfill for Activity.AddException available in .NET 10+.
43+
/// Add an <see cref="ActivityEvent" /> containing exception information to the <see cref="Activity.Events" /> list.
44+
/// </summary>
45+
/// <param name="activity">The activity to record the exception on.</param>
46+
/// <param name="exception">The exception to add to the attached events list.</param>
47+
/// <param name="tags">The tags to add to the exception event.</param>
48+
/// <param name="timestamp">The timestamp to add to the exception event.</param>
49+
/// <returns><see langword="this" /> for convenient chaining.</returns>
50+
public static Activity AddException(this Activity activity, Exception exception, in TagList tags = default, DateTimeOffset timestamp = default)
51+
{
52+
ArgumentNullException.ThrowIfNull(activity);
53+
ArgumentNullException.ThrowIfNull(exception);
54+
55+
var exceptionTags = new ActivityTagsCollection();
56+
57+
for (int i = 0; i < tags.Count; i++)
58+
exceptionTags.Add(tags[i]);
59+
60+
if (!exceptionTags.ContainsKey(ExceptionMessageTag))
61+
exceptionTags.Add(ExceptionMessageTag, exception.Message);
62+
63+
if (!exceptionTags.ContainsKey(ExceptionStackTraceTag))
64+
exceptionTags.Add(ExceptionStackTraceTag, exception.ToString());
65+
66+
if (!exceptionTags.ContainsKey(ExceptionTypeTag))
67+
exceptionTags.Add(ExceptionTypeTag, exception.GetType().ToString());
68+
69+
return activity.AddEvent(new ActivityEvent(ExceptionEventName, timestamp, exceptionTags));
70+
}
1371
}
72+
#endif

tests/Foundatio.Tests/Jobs/JobTests.cs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,4 +204,31 @@ public async Task JobLoopPerf()
204204
await job.RunContinuousAsync(null, iterations, TestCancellationToken);
205205
sw.Stop();
206206
}
207+
208+
[Fact]
209+
public async Task RunContinuousAsync_SuccessfulJob_DoesNotSetActivityErrorStatus()
210+
{
211+
// Arrange
212+
Activity capturedActivity = null;
213+
using var listener = new ActivityListener
214+
{
215+
ShouldListenTo = s => s.Name == "Foundatio",
216+
Sample = (ref ActivityCreationOptions<ActivityContext> _) => ActivitySamplingResult.AllDataAndRecorded,
217+
ActivityStopped = a =>
218+
{
219+
if (a.OperationName.StartsWith("Job:"))
220+
capturedActivity = a;
221+
}
222+
};
223+
ActivitySource.AddActivityListener(listener);
224+
225+
var job = new HelloWorldJob(null, Log);
226+
227+
// Act
228+
await job.RunContinuousAsync(iterationLimit: 1, cancellationToken: TestCancellationToken);
229+
230+
// Assert
231+
Assert.NotNull(capturedActivity);
232+
Assert.Equal(ActivityStatusCode.Unset, capturedActivity.Status);
233+
}
207234
}

0 commit comments

Comments
 (0)