Skip to content

Commit f1ef6c4

Browse files
HTTP metrics export batching does not retry if big failure
1 parent dc86036 commit f1ef6c4

File tree

2 files changed

+147
-35
lines changed

2 files changed

+147
-35
lines changed

exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py

Lines changed: 45 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -237,13 +237,14 @@ def export(
237237
**kwargs,
238238
) -> MetricExportResult:
239239
serialized_data = encode_metrics(metrics_data)
240-
for delay in _create_exp_backoff_generator(
241-
max_value=self._MAX_RETRY_TIMEOUT
242-
):
243-
if delay == self._MAX_RETRY_TIMEOUT:
244-
return MetricExportResult.FAILURE
245240

246-
if self._max_export_batch_size is None:
241+
if self._max_export_batch_size is None:
242+
for delay in _create_exp_backoff_generator(
243+
max_value=self._MAX_RETRY_TIMEOUT
244+
):
245+
if delay == self._MAX_RETRY_TIMEOUT:
246+
return MetricExportResult.FAILURE
247+
247248
resp = self._export(serialized_data.SerializeToString())
248249
# pylint: disable=no-else-return
249250
if resp.ok:
@@ -264,38 +265,47 @@ def export(
264265
)
265266
return MetricExportResult.FAILURE
266267

267-
# Else, attempt export in batches for this retry
268-
else:
269-
export_result = MetricExportResult.SUCCESS
270-
for split_metrics_data in self._split_metrics_data(
271-
serialized_data
272-
):
273-
split_resp = self._export(
274-
split_metrics_data.SerializeToString()
275-
)
268+
return MetricExportResult.FAILURE
276269

277-
if split_resp.ok:
278-
export_result = MetricExportResult.SUCCESS
279-
elif self._retryable(split_resp):
280-
_logger.warning(
281-
"Transient error %s encountered while exporting metric batch, retrying in %ss.",
282-
split_resp.reason,
283-
delay,
284-
)
285-
sleep(delay)
286-
continue
287-
else:
288-
_logger.error(
289-
"Failed to export batch code: %s, reason: %s",
290-
split_resp.status_code,
291-
split_resp.text,
292-
)
293-
export_result = MetricExportResult.FAILURE
270+
# Else, attempt export in batches
271+
split_metrics_batches = list(self._split_metrics_data(serialized_data))
272+
export_result = MetricExportResult.SUCCESS
273+
274+
for split_metrics_data in split_metrics_batches:
275+
# Export current batch until success, non-transient error, or timeout reached
276+
for delay in _create_exp_backoff_generator(
277+
max_value=self._MAX_RETRY_TIMEOUT
278+
):
279+
if delay == self._MAX_RETRY_TIMEOUT:
280+
export_result = MetricExportResult.FAILURE
281+
break
294282

295-
# Return result after all batches are attempted
296-
return export_result
283+
split_resp = self._export(
284+
split_metrics_data.SerializeToString()
285+
)
286+
# pylint: disable=no-else-return
287+
if split_resp.ok:
288+
export_result = MetricExportResult.SUCCESS
289+
break
290+
elif self._retryable(split_resp):
291+
_logger.warning(
292+
"Transient error %s encountered while exporting metric batch, retrying in %ss.",
293+
split_resp.reason,
294+
delay,
295+
)
296+
sleep(delay)
297+
continue
298+
else:
299+
_logger.error(
300+
"Failed to export batch code: %s, reason: %s",
301+
split_resp.status_code,
302+
split_resp.text,
303+
)
304+
export_result = MetricExportResult.FAILURE
305+
break
297306

298-
return MetricExportResult.FAILURE
307+
# Return last result after all batches are attempted
308+
return export_result
299309

300310
def _split_metrics_data(
301311
self,

exporter/opentelemetry-exporter-otlp-proto-http/tests/metrics/test_otlp_metrics_exporter.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,108 @@ def test_exponential_backoff(self, mock_sleep):
831831
[call(1), call(2), call(4), call(8), call(16), call(32)]
832832
)
833833

834+
@patch.object(OTLPMetricExporter, "_export")
835+
@patch(
836+
"opentelemetry.exporter.otlp.proto.http.metric_exporter._create_exp_backoff_generator"
837+
)
838+
@patch("opentelemetry.exporter.otlp.proto.http.metric_exporter.sleep")
839+
@patch(
840+
"opentelemetry.exporter.otlp.proto.http.metric_exporter.encode_metrics"
841+
)
842+
def test_export_retries_with_batching(
843+
self,
844+
mock_encode_metrics,
845+
mock_sleep,
846+
mock_backoff_generator,
847+
mock_export,
848+
):
849+
mock_backoff_generator.return_value = iter([1, 2, 4])
850+
mock_export.side_effect = [
851+
# Non-retryable
852+
MagicMock(ok=False, status_code=400, reason="bad request"),
853+
# Retryable
854+
MagicMock(
855+
ok=False, status_code=500, reason="internal server error"
856+
),
857+
# Success
858+
MagicMock(ok=True),
859+
]
860+
mock_encode_metrics.return_value = pb2.MetricsData(
861+
resource_metrics=[
862+
_resource_metrics(
863+
index=1,
864+
scope_metrics=[
865+
_scope_metrics(
866+
index=1,
867+
metrics=[
868+
_gauge(
869+
index=1,
870+
data_points=[
871+
_number_data_point(11),
872+
_number_data_point(12),
873+
_number_data_point(13),
874+
],
875+
),
876+
],
877+
),
878+
],
879+
),
880+
]
881+
)
882+
batch_1 = pb2.MetricsData(
883+
resource_metrics=[
884+
_resource_metrics(
885+
index=1,
886+
scope_metrics=[
887+
_scope_metrics(
888+
index=1,
889+
metrics=[
890+
_gauge(
891+
index=1,
892+
data_points=[
893+
_number_data_point(11),
894+
_number_data_point(12),
895+
],
896+
),
897+
],
898+
),
899+
],
900+
),
901+
]
902+
)
903+
batch_2 = pb2.MetricsData(
904+
resource_metrics=[
905+
_resource_metrics(
906+
index=1,
907+
scope_metrics=[
908+
_scope_metrics(
909+
index=1,
910+
metrics=[
911+
_gauge(
912+
index=1,
913+
data_points=[
914+
_number_data_point(13),
915+
],
916+
),
917+
],
918+
),
919+
],
920+
),
921+
]
922+
)
923+
924+
exporter = OTLPMetricExporter(max_export_batch_size=2)
925+
result = exporter.export("foo")
926+
self.assertEqual(result, MetricExportResult.SUCCESS)
927+
self.assertEqual(mock_export.call_count, 3)
928+
mock_export.assert_has_calls(
929+
[
930+
call(batch_1.SerializeToString()),
931+
call(batch_2.SerializeToString()),
932+
call(batch_2.SerializeToString()),
933+
]
934+
)
935+
834936
def test_aggregation_temporality(self):
835937
otlp_metric_exporter = OTLPMetricExporter()
836938

0 commit comments

Comments
 (0)