Skip to content

Commit 88d2ad6

Browse files
[otlp] Grpc Status check and retry (open-telemetry#6000)
Co-authored-by: Mikel Blanchard <[email protected]>
1 parent f9a0b4c commit 88d2ad6

16 files changed

+348
-79
lines changed

src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/BaseOtlpGrpcExportClient.cs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,13 @@ namespace OpenTelemetry.Exporter.OpenTelemetryProtocol.Implementation.ExportClie
1313
/// <typeparam name="TRequest">Type of export request.</typeparam>
1414
internal abstract class BaseOtlpGrpcExportClient<TRequest> : IExportClient<TRequest>
1515
{
16-
protected static readonly ExportClientGrpcResponse SuccessExportResponse = new ExportClientGrpcResponse(success: true, deadlineUtc: default, exception: null);
16+
protected static readonly ExportClientGrpcResponse SuccessExportResponse
17+
= new(
18+
success: true,
19+
deadlineUtc: default,
20+
exception: null,
21+
status: null,
22+
grpcStatusDetailsHeader: null);
1723

1824
protected BaseOtlpGrpcExportClient(OtlpExporterOptions options)
1925
{
Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,25 @@
11
// Copyright The OpenTelemetry Authors
22
// SPDX-License-Identifier: Apache-2.0
33

4+
using OpenTelemetry.Exporter.OpenTelemetryProtocol.Implementation.ExportClient.Grpc;
5+
46
namespace OpenTelemetry.Exporter.OpenTelemetryProtocol.Implementation.ExportClient;
57

68
internal sealed class ExportClientGrpcResponse : ExportClientResponse
79
{
810
public ExportClientGrpcResponse(
911
bool success,
1012
DateTime deadlineUtc,
11-
Exception? exception)
13+
Exception? exception,
14+
Status? status,
15+
string? grpcStatusDetailsHeader)
1216
: base(success, deadlineUtc, exception)
1317
{
18+
this.Status = status;
19+
this.GrpcStatusDetailsHeader = grpcStatusDetailsHeader;
1420
}
21+
22+
public Status? Status { get; }
23+
24+
public string? GrpcStatusDetailsHeader { get; }
1525
}

src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/ExportClientResponse.cs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
// Copyright The OpenTelemetry Authors
22
// SPDX-License-Identifier: Apache-2.0
33

4-
using System.Diagnostics.CodeAnalysis;
5-
64
namespace OpenTelemetry.Exporter.OpenTelemetryProtocol.Implementation.ExportClient;
75

86
internal abstract class ExportClientResponse
@@ -14,7 +12,6 @@ protected ExportClientResponse(bool success, DateTime deadlineUtc, Exception? ex
1412
this.DeadlineUtc = deadlineUtc;
1513
}
1614

17-
[MemberNotNullWhen(false, nameof(Exception))]
1815
public bool Success { get; }
1916

2017
public Exception? Exception { get; }

src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/Grpc/GrpcProtocolHelpers.cs

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
// See the License for the specific language governing permissions and
1616
// limitations under the License.
1717

18-
using System.Diagnostics.CodeAnalysis;
1918
#if NET462
2019
using System.Net.Http;
2120
#endif
@@ -27,38 +26,30 @@ internal static class GrpcProtocolHelpers
2726
{
2827
internal const string StatusTrailer = "grpc-status";
2928
internal const string MessageTrailer = "grpc-message";
30-
internal const string CancelledDetail = "No grpc-status found on response.";
3129

32-
public static Status? GetResponseStatus(HttpHeaders trailingHeaders, HttpResponseMessage httpResponse)
30+
public static Status GetResponseStatus(HttpResponseMessage httpResponse, HttpHeaders trailingHeaders)
3331
{
34-
Status? status;
3532
try
3633
{
37-
var result = trailingHeaders.Any() ? TryGetStatusCore(trailingHeaders, out status) : TryGetStatusCore(httpResponse.Headers, out status);
38-
39-
if (!result)
40-
{
41-
status = new Status(StatusCode.Cancelled, CancelledDetail);
42-
}
34+
return trailingHeaders.Any()
35+
? GetStatusCore(trailingHeaders)
36+
: GetStatusCore(httpResponse.Headers);
4337
}
4438
catch (Exception ex)
4539
{
4640
// Handle error from parsing badly formed status
47-
status = new Status(StatusCode.Cancelled, ex.Message, ex);
41+
return new Status(StatusCode.Internal, ex.Message, ex);
4842
}
49-
50-
return status;
5143
}
5244

53-
public static bool TryGetStatusCore(HttpHeaders headers, [NotNullWhen(true)] out Status? status)
45+
public static Status GetStatusCore(HttpHeaders headers)
5446
{
5547
var grpcStatus = GetHeaderValue(headers, StatusTrailer);
5648

5749
// grpc-status is a required trailer
5850
if (grpcStatus == null)
5951
{
60-
status = null;
61-
return false;
52+
return Status.NoReply;
6253
}
6354

6455
int statusValue;
@@ -79,8 +70,7 @@ public static bool TryGetStatusCore(HttpHeaders headers, [NotNullWhen(true)] out
7970
grpcMessage = Uri.UnescapeDataString(grpcMessage);
8071
}
8172

82-
status = new Status((StatusCode)statusValue, grpcMessage ?? string.Empty);
83-
return true;
73+
return new Status((StatusCode)statusValue, grpcMessage ?? string.Empty);
8474
}
8575

8676
public static string? GetHeaderValue(HttpHeaders? headers, string name, bool first = false)

src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/Grpc/Status.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ namespace OpenTelemetry.Exporter.OpenTelemetryProtocol.Implementation.ExportClie
2525
[DebuggerDisplay("{DebuggerToString(),nq}")]
2626
internal struct Status
2727
{
28+
public const string NoReplyDetailMessage = "No grpc-status found on response.";
29+
2830
/// <summary>
2931
/// Default result of a successful RPC. StatusCode=OK, empty details message.
3032
/// </summary>
@@ -35,6 +37,11 @@ internal struct Status
3537
/// </summary>
3638
public static readonly Status DefaultCancelled = new Status(StatusCode.Cancelled, string.Empty);
3739

40+
/// <summary>
41+
/// Default result of a cancelled RPC with no grpc-status found on response.
42+
/// </summary>
43+
public static readonly Status NoReply = new Status(StatusCode.Internal, NoReplyDetailMessage);
44+
3845
/// <summary>
3946
/// Initializes a new instance of the <see cref="Status"/> struct.
4047
/// </summary>

src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcLogExportClient.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public override ExportClientResponse SendExportRequest(OtlpCollector.ExportLogsS
3939
{
4040
OpenTelemetryProtocolExporterEventSource.Log.FailedToReachCollector(this.Endpoint, ex);
4141

42-
return new ExportClientGrpcResponse(success: false, deadlineUtc: deadlineUtc, exception: ex);
42+
return new ExportClientGrpcResponse(success: false, deadlineUtc, ex, null, null);
4343
}
4444
}
4545
}

src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcMetricsExportClient.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public override ExportClientResponse SendExportRequest(OtlpCollector.ExportMetri
3939
{
4040
OpenTelemetryProtocolExporterEventSource.Log.FailedToReachCollector(this.Endpoint, ex);
4141

42-
return new ExportClientGrpcResponse(success: false, deadlineUtc: deadlineUtc, exception: ex);
42+
return new ExportClientGrpcResponse(false, deadlineUtc, ex, null, null);
4343
}
4444
}
4545
}

src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpGrpcTraceExportClient.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public override ExportClientResponse SendExportRequest(OtlpCollector.ExportTrace
3939
{
4040
OpenTelemetryProtocolExporterEventSource.Log.FailedToReachCollector(this.Endpoint, ex);
4141

42-
return new ExportClientGrpcResponse(success: false, deadlineUtc: deadlineUtc, exception: ex);
42+
return new ExportClientGrpcResponse(false, deadlineUtc, ex, null, null);
4343
}
4444
}
4545
}

src/OpenTelemetry.Exporter.OpenTelemetryProtocol/Implementation/ExportClient/OtlpRetry.cs

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,54 @@ public static bool ShouldHandleHttpRequestException(Exception? exception)
8383

8484
public static bool TryGetGrpcRetryResult(ExportClientGrpcResponse response, int retryDelayMilliseconds, out RetryResult retryResult)
8585
{
86+
retryResult = default;
87+
8688
if (response.Exception is RpcException rpcException)
8789
{
8890
return TryGetRetryResult(rpcException.StatusCode, IsGrpcStatusCodeRetryable, response.DeadlineUtc, rpcException.Trailers, TryGetGrpcRetryDelay, retryDelayMilliseconds, out retryResult);
8991
}
92+
else if (response.Status != null)
93+
{
94+
var nextRetryDelayMilliseconds = retryDelayMilliseconds;
95+
96+
if (IsDeadlineExceeded(response.DeadlineUtc))
97+
{
98+
return false;
99+
}
100+
101+
var throttleDelay = Grpc.GrpcStatusDeserializer.TryGetGrpcRetryDelay(response.GrpcStatusDetailsHeader);
102+
var retryable = IsGrpcStatusCodeRetryable(response.Status.Value.StatusCode, throttleDelay.HasValue);
103+
104+
if (!retryable)
105+
{
106+
return false;
107+
}
108+
109+
var delayDuration = throttleDelay ?? TimeSpan.FromMilliseconds(GetRandomNumber(0, nextRetryDelayMilliseconds));
110+
111+
if (IsDeadlineExceeded(response.DeadlineUtc + delayDuration))
112+
{
113+
return false;
114+
}
115+
116+
if (throttleDelay.HasValue)
117+
{
118+
try
119+
{
120+
// TODO: Consider making nextRetryDelayMilliseconds a double to avoid the need for convert/overflow handling
121+
nextRetryDelayMilliseconds = Convert.ToInt32(throttleDelay.Value.TotalMilliseconds);
122+
}
123+
catch (OverflowException)
124+
{
125+
nextRetryDelayMilliseconds = MaxBackoffMilliseconds;
126+
}
127+
}
128+
129+
nextRetryDelayMilliseconds = CalculateNextRetryDelay(nextRetryDelayMilliseconds);
130+
retryResult = new RetryResult(throttleDelay.HasValue, delayDuration, nextRetryDelayMilliseconds);
131+
return true;
132+
}
90133

91-
retryResult = default;
92134
return false;
93135
}
94136

@@ -216,6 +258,24 @@ private static bool IsGrpcStatusCodeRetryable(StatusCode statusCode, bool hasRet
216258
}
217259
}
218260

261+
private static bool IsGrpcStatusCodeRetryable(Grpc.StatusCode statusCode, bool hasRetryDelay)
262+
{
263+
switch (statusCode)
264+
{
265+
case Grpc.StatusCode.Cancelled:
266+
case Grpc.StatusCode.DeadlineExceeded:
267+
case Grpc.StatusCode.Aborted:
268+
case Grpc.StatusCode.OutOfRange:
269+
case Grpc.StatusCode.Unavailable:
270+
case Grpc.StatusCode.DataLoss:
271+
return true;
272+
case Grpc.StatusCode.ResourceExhausted:
273+
return hasRetryDelay;
274+
default:
275+
return false;
276+
}
277+
}
278+
219279
private static bool IsHttpStatusCodeRetryable(HttpStatusCode statusCode, bool hasRetryDelay)
220280
{
221281
switch (statusCode)

0 commit comments

Comments
 (0)