Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
5f36cd4
WIP of PoC of tracing.
philliphoff May 15, 2025
76fdee2
Wiring tracing metadata through test server.
philliphoff May 15, 2025
4681dc0
Ensure orchestration traces are parented correctly.
philliphoff May 15, 2025
3421304
Check orchestration activities are same logical activity.
philliphoff May 15, 2025
b951b5d
Test activity traces.
philliphoff May 15, 2025
06697bb
Assert tags of orchestration activities.
philliphoff May 15, 2025
b8bd6ca
Sketch emitting client activity trace.
philliphoff May 16, 2025
47e2018
Exclude Rider files.
philliphoff May 28, 2025
588d518
Attempt to capture activity/orchestration failures.
philliphoff May 28, 2025
59d99b2
Stop orchestration trace when execution completes.
philliphoff Jun 3, 2025
e431238
Update protos to match backend.
philliphoff Jun 4, 2025
4d2c581
Add/set partent trace context to suborchestration action.
philliphoff Jun 5, 2025
5b35016
Port suborchestration tracing.
philliphoff Jun 5, 2025
2a80911
Add tracing for events sent by worker.
philliphoff Jun 6, 2025
889640e
Add tracing for client-sent events.
philliphoff Jun 6, 2025
183c4a1
Merge branch 'main' into philliphoff-orchestration-tracing
philliphoff Jun 10, 2025
bc42ee6
Add tracing for orchestration timers.
philliphoff Jun 10, 2025
5b44cef
Sketch basic tracing test.
philliphoff Jun 11, 2025
fa62a5f
Test activity types.
philliphoff Jun 12, 2025
4ca345d
Add test for sent events.
philliphoff Jun 12, 2025
11803e7
Add test for timer event.
philliphoff Jun 12, 2025
19cf60f
Add test for client raised events.
philliphoff Jun 12, 2025
a37b364
Move files to tracing namespace.
philliphoff Jun 12, 2025
7e8262c
Clean up files.
philliphoff Jun 12, 2025
6b67b9d
Fixup test for suborchestration tracing.
philliphoff Jun 12, 2025
73c8ee0
Add test for suborchestration failure.
philliphoff Jun 12, 2025
9497ad5
Revert change.
philliphoff Jun 12, 2025
99aea1b
Merge branch 'main' into philliphoff-orchestration-tracing
philliphoff Jun 12, 2025
a51a235
Eliminate the unnecessary activity ID (as it can be derived).
philliphoff Jun 13, 2025
14afe63
Fix up a number of warnings.
philliphoff Jul 18, 2025
41c44f1
Fixup doc and style warnings.
philliphoff Jul 18, 2025
f9f8ea3
Revert package props.
philliphoff Jul 18, 2025
0d65131
Updates per PR feedback.
philliphoff Jul 18, 2025
1afa837
Add orchestration status tag on completion.
philliphoff Jul 21, 2025
5a5c199
Update for move of orchestration trace context to "outer" message.
philliphoff Jul 25, 2025
6288cea
Merge branch 'main' into philliphoff-orchestration-tracing
philliphoff Jul 31, 2025
1dc3277
Fix test breaks.
philliphoff Jul 31, 2025
ae9a2fd
Updates per more PR feedback.
philliphoff Jul 31, 2025
bcf6ecf
Merge branch 'main' into philliphoff-orchestration-tracing
philliphoff Aug 7, 2025
6254ca4
Update protobuf version.
philliphoff Aug 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -348,3 +348,6 @@ MigrationBackup/

# Ionide (cross platform F# VS Code tools) working folder
.ionide/

# Rider (cross platform .NET/C# tools) working folder
.idea/
23 changes: 5 additions & 18 deletions src/Client/Grpc/GrpcDurableTaskClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Text;
using Google.Protobuf.WellKnownTypes;
using Microsoft.DurableTask.Client.Entities;
using Microsoft.DurableTask.Tracing;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
Expand Down Expand Up @@ -107,27 +108,9 @@
}
}

if (Activity.Current?.Id != null || Activity.Current?.TraceStateString != null)
{
if (request.ParentTraceContext == null)
{
request.ParentTraceContext = new P.TraceContext();
}

if (Activity.Current?.Id != null)
{
request.ParentTraceContext.TraceParent = Activity.Current?.Id;
}

if (Activity.Current?.TraceStateString != null)
{
request.ParentTraceContext.TraceState = Activity.Current?.TraceStateString;
}
}

DateTimeOffset? startAt = options?.StartAt;
this.logger.SchedulingOrchestration(
request.InstanceId,

Check warning on line 113 in src/Client/Grpc/GrpcDurableTaskClient.cs

View workflow job for this annotation

GitHub Actions / build

Dereference of a possibly null reference.
orchestratorName,
sizeInBytes: request.Input != null ? Encoding.UTF8.GetByteCount(request.Input) : 0,
startAt.GetValueOrDefault(DateTimeOffset.UtcNow));
Expand All @@ -138,6 +121,8 @@
request.ScheduledStartTimestamp = Timestamp.FromDateTimeOffset(startAt.Value.ToUniversalTime());
}

using Activity? newActivity = TraceHelper.StartActivityForNewOrchestration(request);

P.CreateInstanceResponse? result = await this.sidecarClient.StartInstanceAsync(
request, cancellationToken: cancellation);
return result.InstanceId;
Expand All @@ -159,6 +144,8 @@
Input = this.DataConverter.Serialize(eventPayload),
};

using Activity? traceActivity = TraceHelper.StartActivityForNewEventRaisedFromClient(request, instanceId);

await this.sidecarClient.RaiseEventAsync(request, cancellationToken: cancellation);
}

Expand Down
31 changes: 21 additions & 10 deletions src/Grpc/orchestrator_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ message EntityOperationCalledEvent {
}

message EntityLockRequestedEvent {
string criticalSectionId = 1;
string criticalSectionId = 1;
repeated string lockSet = 2;
int32 position = 3;
google.protobuf.StringValue parentInstanceId = 4; // used only within messages, null in histories
Expand All @@ -218,7 +218,7 @@ message EntityUnlockSentEvent {
message EntityLockGrantedEvent {
string criticalSectionId = 1;
}

message HistoryEvent {
int32 eventId = 1;
google.protobuf.Timestamp timestamp = 2;
Expand All @@ -245,8 +245,8 @@ message HistoryEvent {
ExecutionResumedEvent executionResumed = 22;
EntityOperationSignaledEvent entityOperationSignaled = 23;
EntityOperationCalledEvent entityOperationCalled = 24;
EntityOperationCompletedEvent entityOperationCompleted = 25;
EntityOperationFailedEvent entityOperationFailed = 26;
EntityOperationCompletedEvent entityOperationCompleted = 25;
EntityOperationFailedEvent entityOperationFailed = 26;
EntityLockRequestedEvent entityLockRequested = 27;
EntityLockGrantedEvent entityLockGranted = 28;
EntityUnlockSentEvent entityUnlockSent = 29;
Expand All @@ -258,13 +258,15 @@ message ScheduleTaskAction {
google.protobuf.StringValue version = 2;
google.protobuf.StringValue input = 3;
map<string, string> tags = 4;
TraceContext parentTraceContext = 5;
}

message CreateSubOrchestrationAction {
string instanceId = 1;
string name = 2;
google.protobuf.StringValue version = 3;
google.protobuf.StringValue input = 4;
TraceContext parentTraceContext = 5;
}

message CreateTimerAction {
Expand Down Expand Up @@ -314,6 +316,11 @@ message OrchestratorAction {
}
}

message OrchestrationTraceContext {
google.protobuf.StringValue spanID = 1;
google.protobuf.Timestamp spanStartTime = 2;
}

message OrchestratorRequest {
string instanceId = 1;
google.protobuf.StringValue executionId = 2;
Expand All @@ -322,6 +329,8 @@ message OrchestratorRequest {
OrchestratorEntityParameters entityParameters = 5;
bool requiresHistoryStreaming = 6;
map<string, google.protobuf.Value> properties = 7;

OrchestrationTraceContext orchestrationTraceContext = 8;
}

message OrchestratorResponse {
Expand All @@ -333,6 +342,8 @@ message OrchestratorResponse {
// The number of work item events that were processed by the orchestrator.
// This field is optional. If not set, the service should assume that the orchestrator processed all events.
google.protobuf.Int32Value numEventsProcessed = 5;

OrchestrationTraceContext orchestrationTraceContext = 6;
}

message CreateInstanceRequest {
Expand Down Expand Up @@ -498,7 +509,7 @@ message SignalEntityRequest {
}

message SignalEntityResponse {
// no payload
// no payload
}

message GetEntityRequest {
Expand Down Expand Up @@ -673,16 +684,16 @@ service TaskHubSidecarService {

// Waits for an orchestration instance to reach a running or completion state.
rpc WaitForInstanceStart(GetInstanceRequest) returns (GetInstanceResponse);

// Waits for an orchestration instance to reach a completion state (completed, failed, terminated, etc.).
rpc WaitForInstanceCompletion(GetInstanceRequest) returns (GetInstanceResponse);

// Raises an event to a running orchestration instance.
rpc RaiseEvent(RaiseEventRequest) returns (RaiseEventResponse);

// Terminates a running orchestration instance.
rpc TerminateInstance(TerminateRequest) returns (TerminateResponse);

// Suspends a running orchestration instance.
rpc SuspendInstance(SuspendRequest) returns (SuspendResponse);

Expand Down Expand Up @@ -764,7 +775,7 @@ message CompleteTaskResponse {
}

message HealthPing {
// No payload
// No payload
}

message StreamInstanceHistoryRequest {
Expand All @@ -777,4 +788,4 @@ message StreamInstanceHistoryRequest {

message HistoryChunk {
repeated HistoryEvent events = 1;
}
}
4 changes: 2 additions & 2 deletions src/Grpc/versions.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# The following files were downloaded from branch main at 2025-06-02 21:12:34 UTC
https://raw.githubusercontent.com/microsoft/durabletask-protobuf/fd9369c6a03d6af4e95285e432b7c4e943c06970/protos/orchestrator_service.proto
# The following files were downloaded from branch main at 2025-08-08 16:46:11 UTC
https://raw.githubusercontent.com/microsoft/durabletask-protobuf/e88acbd07ae38b499dbe8c4e333e9e3feeb2a9cc/protos/orchestrator_service.proto
40 changes: 39 additions & 1 deletion src/Shared/Grpc/ProtoUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

using System.Buffers;
using System.Buffers.Text;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Text;
using DurableTask.Core;
Expand All @@ -15,6 +16,7 @@
using Google.Protobuf.WellKnownTypes;
using DTCore = DurableTask.Core;
using P = Microsoft.DurableTask.Protobuf;
using TraceHelper = Microsoft.DurableTask.Tracing.TraceHelper;

namespace Microsoft.DurableTask;

Expand Down Expand Up @@ -268,43 +270,72 @@ internal static Timestamp ToTimestamp(this DateTime dateTime)
/// Constructs a <see cref="P.OrchestratorResponse" />.
/// </summary>
/// <param name="instanceId">The orchestrator instance ID.</param>
/// <param name="executionId">The orchestrator execution ID.</param>
/// <param name="customStatus">The orchestrator customer status or <c>null</c> if no custom status.</param>
/// <param name="actions">The orchestrator actions.</param>
/// <param name="completionToken">
/// The completion token for the work item. It must be the exact same <see cref="P.WorkItem.CompletionToken" />
/// value that was provided by the corresponding <see cref="P.WorkItem"/> that triggered the orchestrator execution.
/// </param>
/// <param name="entityConversionState">The entity conversion state, or null if no conversion is required.</param>
/// <param name="orchestrationActivity">The <see cref="Activity" /> that represents orchestration execution.</param>
/// <returns>The orchestrator response.</returns>
/// <exception cref="NotSupportedException">When an orchestrator action is unknown.</exception>
internal static P.OrchestratorResponse ConstructOrchestratorResponse(
string instanceId,
string executionId,
string? customStatus,
IEnumerable<OrchestratorAction> actions,
string completionToken,
EntityConversionState? entityConversionState)
EntityConversionState? entityConversionState,
Activity? orchestrationActivity)
{
Check.NotNull(actions);
var response = new P.OrchestratorResponse
{
InstanceId = instanceId,
CustomStatus = customStatus,
CompletionToken = completionToken,
OrchestrationTraceContext =
new()
{
SpanID = orchestrationActivity?.SpanId.ToString(),
SpanStartTime = orchestrationActivity?.StartTimeUtc.ToTimestamp(),
},
};

foreach (OrchestratorAction action in actions)
{
var protoAction = new P.OrchestratorAction { Id = action.Id };

P.TraceContext? CreateTraceContext()
{
if (orchestrationActivity is null)
{
return null;
}

ActivitySpanId clientSpanId = ActivitySpanId.CreateRandom();
ActivityContext clientActivityContext = new(orchestrationActivity.TraceId, clientSpanId, orchestrationActivity.ActivityTraceFlags, orchestrationActivity.TraceStateString);

return new P.TraceContext
{
TraceParent = $"00-{clientActivityContext.TraceId}-{clientActivityContext.SpanId}-0{clientActivityContext.TraceFlags:d}",
TraceState = clientActivityContext.TraceState,
};
}

switch (action.OrchestratorActionType)
{
case OrchestratorActionType.ScheduleOrchestrator:
var scheduleTaskAction = (ScheduleTaskOrchestratorAction)action;

protoAction.ScheduleTask = new P.ScheduleTaskAction
{
Name = scheduleTaskAction.Name,
Version = scheduleTaskAction.Version,
Input = scheduleTaskAction.Input,
ParentTraceContext = CreateTraceContext(),
};

if (scheduleTaskAction.Tags != null)
Expand All @@ -324,6 +355,7 @@ internal static P.OrchestratorResponse ConstructOrchestratorResponse(
InstanceId = subOrchestrationAction.InstanceId,
Name = subOrchestrationAction.Name,
Version = subOrchestrationAction.Version,
ParentTraceContext = CreateTraceContext(),
};
break;
case OrchestratorActionType.CreateTimer:
Expand Down Expand Up @@ -378,6 +410,12 @@ internal static P.OrchestratorResponse ConstructOrchestratorResponse(
Name = sendEventAction.EventName,
Data = sendEventAction.EventData,
};

// Distributed Tracing: start a new trace activity derived from the orchestration
// for an EventRaisedEvent (external event)
using Activity? traceActivity = TraceHelper.StartTraceActivityForEventRaisedFromWorker(sendEventAction, instanceId, executionId);

traceActivity?.Stop();
}

break;
Expand Down
Loading
Loading