Skip to content

Commit 3253c78

Browse files
committed
Add distributed tracing and structured error handling for extensions
1 parent 842719d commit 3253c78

File tree

15 files changed

+426
-51
lines changed

15 files changed

+426
-51
lines changed

cli/azd/.vscode/cspell.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,9 @@ overrides:
243243
- userosscache
244244
- docstates
245245
- dylib
246+
- filename: cli/azd/pkg/azdext/trace_context.go
247+
words:
248+
- traceparent
246249
ignorePaths:
247250
- "**/*_test.go"
248251
- "**/mock*.go"

cli/azd/grpc/proto/event.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ message EventMessage {
2525
InvokeServiceHandler invoke_service_handler = 5;
2626
ServiceHandlerStatus service_handler_status = 6;
2727
}
28+
29+
// W3C traceparent format for distributed tracing propagation.
30+
// Format: "00-{traceId}-{spanId}-{flags}"
31+
string trace_parent = 98;
2832
}
2933

3034
// Client subscribes to project-related events

cli/azd/grpc/proto/framework_service.proto

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ service FrameworkService {
1717
// Envelope for all possible framework service messages (requests and responses)
1818
message FrameworkServiceMessage {
1919
string request_id = 1;
20+
// W3C Trace Context traceparent header value for distributed tracing.
21+
// Format: "00-{trace-id}-{span-id}-{trace-flags}"
22+
// See: https://www.w3.org/TR/trace-context/
23+
string trace_parent = 98;
2024
FrameworkServiceErrorMessage error = 99;
2125
oneof message_type {
2226
RegisterFrameworkServiceRequest register_framework_service_request = 2;
@@ -41,6 +45,10 @@ message FrameworkServiceMessage {
4145
message FrameworkServiceErrorMessage {
4246
string message = 1;
4347
string details = 2;
48+
// Structured error information for telemetry
49+
string error_code = 3; // Error code from the service (e.g., "Conflict", "NotFound")
50+
int32 status_code = 4; // HTTP status code (e.g., 409, 404, 500)
51+
string service_name = 5; // Service name for telemetry (e.g., "ai.azure.com", "cognitiveservices")
4452
}
4553

4654
// Request to register a framework service provider

cli/azd/grpc/proto/service_target.proto

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ service ServiceTargetService {
1717
// Envelope for all possible service target messages (requests and responses)
1818
message ServiceTargetMessage {
1919
string request_id = 1;
20+
// W3C Trace Context traceparent header value for distributed tracing.
21+
// Format: "00-{trace-id}-{span-id}-{trace-flags}"
22+
// See: https://www.w3.org/TR/trace-context/
23+
string trace_parent = 98;
2024
ServiceTargetErrorMessage error = 99;
2125
oneof message_type {
2226
RegisterServiceTargetRequest register_service_target_request = 2;
@@ -83,6 +87,10 @@ message RegisterServiceTargetResponse {
8387
message ServiceTargetErrorMessage {
8488
string message = 2;
8589
string details = 3;
90+
// Structured error information for telemetry
91+
string error_code = 4; // Error code from the service (e.g., "Conflict", "NotFound")
92+
int32 status_code = 5; // HTTP status code (e.g., 409, 404, 500)
93+
string service_name = 6; // Service name for telemetry (e.g., "ai.azure.com", "cognitiveservices")
8694
}
8795

8896
// GetTargetResource request and response

cli/azd/internal/cmd/errors.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/azure/azure-dev/cli/azd/internal/tracing/fields"
1818
"github.com/azure/azure-dev/cli/azd/pkg/auth"
1919
"github.com/azure/azure-dev/cli/azd/pkg/azapi"
20+
"github.com/azure/azure-dev/cli/azd/pkg/azdext"
2021
"github.com/azure/azure-dev/cli/azd/pkg/exec"
2122
"github.com/azure/azure-dev/cli/azd/pkg/extensions"
2223
"go.opentelemetry.io/otel/attribute"
@@ -33,6 +34,7 @@ func MapError(err error, span tracing.Span) {
3334
var toolExecErr *exec.ExitError
3435
var authFailedErr *auth.AuthFailedError
3536
var extensionRunErr *extensions.ExtensionRunError
37+
var extRespErr *azdext.ExtensionResponseError
3638
if errors.As(err, &respErr) {
3739
serviceName := "other"
3840
statusCode := -1
@@ -84,6 +86,24 @@ func MapError(err error, span tracing.Span) {
8486
errCode = "service.arm.deployment.failed"
8587
} else if errors.As(err, &extensionRunErr) {
8688
errCode = "ext.run.failed"
89+
} else if errors.As(err, &extRespErr) {
90+
// Handle structured errors from extensions (e.g., service target providers)
91+
if extRespErr.HasServiceInfo() {
92+
// Extension provided service information - use it for telemetry
93+
serviceName, hostDomain := mapService(extRespErr.ServiceName)
94+
errDetails = append(errDetails,
95+
fields.ServiceName.String(serviceName),
96+
fields.ServiceHost.String(hostDomain),
97+
fields.ServiceStatusCode.Int(extRespErr.StatusCode),
98+
)
99+
if extRespErr.ErrorCode != "" {
100+
errDetails = append(errDetails, fields.ServiceErrorCode.String(extRespErr.ErrorCode))
101+
}
102+
errCode = fmt.Sprintf("ext.service.%s.%d", serviceName, extRespErr.StatusCode)
103+
} else {
104+
// Extension error without service info
105+
errCode = "ext.service.failed"
106+
}
87107
} else if errors.As(err, &toolExecErr) {
88108
toolName := "other"
89109
cmdName := cmdAsName(toolExecErr.Cmd)

cli/azd/internal/tracing/fields/domains.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ var Domains = []Domain{
2929
{"azurefd.net", "frontdoor"},
3030
{"scm.azurewebsites.net", "kudu"},
3131
{"azurewebsites.net", "websites"},
32+
{"services.ai.azure.com", "ai"},
33+
{"cognitiveservices.azure.com", "cognitiveservices"},
34+
{"openai.azure.com", "openai"},
3235
{"blob.core.windows.net", "blob"},
3336
{"cloudapp.azure.com", "vm"},
3437
{"cloudapp.net", "vm"},

cli/azd/pkg/azdext/event.pb.go

Lines changed: 14 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cli/azd/pkg/azdext/event_message_envelope.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,16 @@ func (ops *EventMessageEnvelope) SetError(msg *EventMessage, err error) {
9696
// No-op: EventMessage uses status strings, not Error field
9797
}
9898

99+
// GetTraceParent returns the W3C traceparent value from the message for distributed tracing.
100+
func (ops *EventMessageEnvelope) GetTraceParent(msg *EventMessage) string {
101+
return msg.GetTraceParent()
102+
}
103+
104+
// SetTraceParent sets the W3C traceparent value on the message for distributed tracing.
105+
func (ops *EventMessageEnvelope) SetTraceParent(msg *EventMessage, traceParent string) {
106+
msg.TraceParent = traceParent
107+
}
108+
99109
// GetInnerMessage returns the inner message from the oneof field
100110
func (ops *EventMessageEnvelope) GetInnerMessage(msg *EventMessage) any {
101111
// The MessageType field is a oneof wrapper. We need to extract the actual inner message.
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
package azdext
5+
6+
import (
7+
"errors"
8+
"fmt"
9+
10+
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
11+
)
12+
13+
// ExtensionResponseError represents an HTTP response error returned from an extension over gRPC.
14+
// It mirrors azcore.ResponseError and preserves structured error information for telemetry purposes.
15+
type ExtensionResponseError struct {
16+
// Message is the human-readable error message
17+
Message string
18+
// Details contains additional error details
19+
Details string
20+
// ErrorCode is the error code from the service (e.g., "Conflict", "NotFound")
21+
ErrorCode string
22+
// StatusCode is the HTTP status code (e.g., 409, 404, 500)
23+
StatusCode int
24+
// ServiceName is the service name for telemetry (e.g., "ai.azure.com")
25+
ServiceName string
26+
}
27+
28+
// Error implements the error interface
29+
func (e *ExtensionResponseError) Error() string {
30+
if e.Details != "" {
31+
return fmt.Sprintf("%s: %s", e.Message, e.Details)
32+
}
33+
return e.Message
34+
}
35+
36+
// HasServiceInfo returns true if the error contains service information for telemetry
37+
func (e *ExtensionResponseError) HasServiceInfo() bool {
38+
return e.StatusCode > 0 && e.ServiceName != ""
39+
}
40+
41+
// errorMessage defines the common interface for protobuf error messages
42+
// This allows us to write generic unwrap logic for any generated proto message
43+
type errorMessage interface {
44+
comparable
45+
GetMessage() string
46+
GetDetails() string
47+
GetErrorCode() string
48+
GetStatusCode() int32
49+
GetServiceName() string
50+
}
51+
52+
// errorInfo is a helper struct to hold extracted error information
53+
// before converting to a specific protobuf message type
54+
type errorInfo struct {
55+
message string
56+
details string
57+
errorCode string
58+
statusCode int32
59+
service string
60+
}
61+
62+
// captureErrorInfo extracts structured error information from a Go error.
63+
// It handles nil errors, ExtensionResponseError, and azcore.ResponseError.
64+
func captureErrorInfo(err error) errorInfo {
65+
if err == nil {
66+
return errorInfo{}
67+
}
68+
69+
// Default to the error string
70+
info := errorInfo{message: err.Error()}
71+
72+
// If it's already an ExtensionResponseError, preserve all fields including Details
73+
var extErr *ExtensionResponseError
74+
if errors.As(err, &extErr) {
75+
info.message = extErr.Message
76+
info.details = extErr.Details
77+
info.errorCode = extErr.ErrorCode
78+
info.statusCode = int32(extErr.StatusCode)
79+
info.service = extErr.ServiceName
80+
return info
81+
}
82+
83+
// Try to extract structured error information from Azure SDK errors
84+
var respErr *azcore.ResponseError
85+
if errors.As(err, &respErr) {
86+
info.errorCode = respErr.ErrorCode
87+
info.statusCode = int32(respErr.StatusCode)
88+
if respErr.RawResponse != nil && respErr.RawResponse.Request != nil {
89+
info.service = respErr.RawResponse.Request.Host
90+
}
91+
}
92+
93+
return info
94+
}
95+
96+
// WrapErrorForServiceTarget wraps a Go error into a ServiceTargetErrorMessage for transmission over gRPC.
97+
func WrapErrorForServiceTarget(err error) *ServiceTargetErrorMessage {
98+
info := captureErrorInfo(err)
99+
if info.message == "" {
100+
return nil
101+
}
102+
103+
return &ServiceTargetErrorMessage{
104+
Message: info.message,
105+
Details: info.details,
106+
ErrorCode: info.errorCode,
107+
StatusCode: info.statusCode,
108+
ServiceName: info.service,
109+
}
110+
}
111+
112+
// WrapErrorForFrameworkService wraps a Go error into a FrameworkServiceErrorMessage for transmission over gRPC.
113+
func WrapErrorForFrameworkService(err error) *FrameworkServiceErrorMessage {
114+
info := captureErrorInfo(err)
115+
if info.message == "" {
116+
return nil
117+
}
118+
119+
return &FrameworkServiceErrorMessage{
120+
Message: info.message,
121+
Details: info.details,
122+
ErrorCode: info.errorCode,
123+
StatusCode: info.statusCode,
124+
ServiceName: info.service,
125+
}
126+
}
127+
128+
// unwrapError is a generic helper to convert protobuf error messages back to Go errors
129+
func unwrapError[T errorMessage](msg T) error {
130+
var zero T
131+
if msg == zero || msg.GetMessage() == "" {
132+
return nil
133+
}
134+
135+
return &ExtensionResponseError{
136+
Message: msg.GetMessage(),
137+
Details: msg.GetDetails(),
138+
ErrorCode: msg.GetErrorCode(),
139+
StatusCode: int(msg.GetStatusCode()),
140+
ServiceName: msg.GetServiceName(),
141+
}
142+
}
143+
144+
// UnwrapErrorFromServiceTarget converts a ServiceTargetErrorMessage back to a Go error.
145+
func UnwrapErrorFromServiceTarget(msg *ServiceTargetErrorMessage) error {
146+
return unwrapError(msg)
147+
}
148+
149+
// UnwrapErrorFromFrameworkService converts a FrameworkServiceErrorMessage back to a Go error.
150+
func UnwrapErrorFromFrameworkService(msg *FrameworkServiceErrorMessage) error {
151+
return unwrapError(msg)
152+
}

0 commit comments

Comments
 (0)