Skip to content

Commit 0658ed5

Browse files
authored
fix(telemetry): python 2.7 memory leak (#5526)
## Description Telemetry logs: Fix an error when Telemetry is trying to copy and clear the telemetry logs list and that generates a memory leak on Python 2.7. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/contributing.html#Release-Note-Guidelines) are followed. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] PR description includes explicit acknowledgement/acceptance of the performance implications of this PR as reported in the benchmarks PR comment. ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment.
1 parent d43beed commit 0658ed5

File tree

2 files changed

+24
-19
lines changed

2 files changed

+24
-19
lines changed

ddtrace/internal/telemetry/writer.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -297,15 +297,23 @@ def _flush_namespace_metrics(self):
297297
# type () -> List[Metric]
298298
"""Returns a list of all generated metrics and clears the namespace's list"""
299299
with self._lock:
300-
namespace_metrics = self._namespace.get()
301-
self._namespace._flush()
300+
try:
301+
namespace_metrics = self._namespace.get()
302+
except Exception:
303+
log.debug("Unexpected error in Telemetry Metrics", exc_info=True)
304+
finally:
305+
self._namespace._flush()
302306
return namespace_metrics
303307

304308
def _flush_log_metrics(self):
305309
# type () -> List[Metric]
306310
with self._lock:
307-
log_metrics = self._logs.copy()
308-
self._logs = []
311+
try:
312+
log_metrics = list(self._logs)
313+
except Exception:
314+
log.debug("Unexpected error in Logs Metrics", exc_info=True)
315+
finally:
316+
self._logs = []
309317
return log_metrics
310318

311319
def _generate_metrics_event(self, namespace_metrics):

tests/telemetry/test_telemetry_metrics.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import sys
2-
31
import pytest
42

53
from ddtrace.internal.telemetry.constants import TELEMETRY_NAMESPACE_TAG_APPSEC
@@ -33,7 +31,12 @@ def _assert_metric(
3331
}
3432
assert events[0]["request_type"] == type_paypload
3533

36-
assert events[0] == _get_request_body(payload, type_paypload, seq_id)
34+
# Python 2.7 and Python 3.5 fail with dictionaries and lists order
35+
expected_body = _get_request_body(payload, type_paypload, seq_id)
36+
expected_body_sorted = expected_body["payload"]["series"].sort(key=lambda x: x["metric"], reverse=False)
37+
result_event = events[0]["payload"]["series"].sort(key=lambda x: x["metric"], reverse=False)
38+
39+
assert result_event == expected_body_sorted
3740

3841

3942
def _assert_logs(
@@ -46,10 +49,14 @@ def _assert_logs(
4649

4750
assert len([event for event in events if event["request_type"] == TELEMETRY_TYPE_LOGS]) == seq_id
4851

49-
assert events[0] == _get_request_body(expected_payload, TELEMETRY_TYPE_LOGS, seq_id)
52+
# Python 2.7 and Python 3.5 fail with dictionaries and lists order
53+
expected_body = _get_request_body(expected_payload, TELEMETRY_TYPE_LOGS, seq_id)
54+
expected_body_sorted = expected_body["payload"].sort(key=lambda x: x["message"], reverse=False)
55+
result_event = events[0]["payload"].sort(key=lambda x: x["message"], reverse=False)
56+
57+
assert result_event == expected_body_sorted
5058

5159

52-
@pytest.mark.skipif(sys.version_info < (3, 6), reason="mock.ANY doesn't works in py3.5 or lower")
5360
def test_send_metric_flush_and_generate_metrics_series_is_restarted(test_agent_metrics_session, mock_time):
5461
"""Check the queue of metrics is empty after run periodic method of PeriodicService"""
5562
with override_global_config(dict(_telemetry_metrics_enabled=True)):
@@ -72,7 +79,6 @@ def test_send_metric_flush_and_generate_metrics_series_is_restarted(test_agent_m
7279
_assert_metric(test_agent_metrics_session, expected_series, seq_id=2)
7380

7481

75-
@pytest.mark.skipif(sys.version_info < (3, 6), reason="mock.ANY doesn't works in py3.5 or lower")
7682
def test_send_metric_datapoint_equal_type_and_tags_yields_single_series(test_agent_metrics_session, mock_time):
7783
"""Check metrics datapoints and the aggregations by datapoint ID.
7884
A datapoint ID is at least: a metric name, a metric value, and the time at which the value was collected.
@@ -97,7 +103,6 @@ def test_send_metric_datapoint_equal_type_and_tags_yields_single_series(test_age
97103
_assert_metric(test_agent_metrics_session, expected_series)
98104

99105

100-
@pytest.mark.skipif(sys.version_info < (3, 6), reason="mock.ANY doesn't works in py3.5 or lower")
101106
def test_send_metric_datapoint_equal_type_different_tags_yields_multiple_series(test_agent_metrics_session, mock_time):
102107
"""Check metrics datapoints and the aggregations by datapoint ID.
103108
A datapoint ID is at least: a metric name, a metric value, and the time at which the value was collected.
@@ -155,7 +160,6 @@ def test_send_metric_datapoint_equal_tags_different_type_throws_error(test_agent
155160
)
156161

157162

158-
@pytest.mark.skipif(sys.version_info < (3, 6), reason="mock.ANY doesn't works in py3.5 or lower")
159163
def test_send_tracers_count_metric(test_agent_metrics_session, mock_time):
160164
with override_global_config(dict(_telemetry_metrics_enabled=True)):
161165
telemetry_writer = test_agent_metrics_session.telemetry_writer
@@ -192,7 +196,6 @@ def test_send_tracers_count_metric(test_agent_metrics_session, mock_time):
192196
_assert_metric(test_agent_metrics_session, expected_series)
193197

194198

195-
@pytest.mark.skipif(sys.version_info < (3, 6), reason="mock.ANY doesn't works in py3.5 or lower")
196199
def test_send_appsec_rate_metric(test_agent_metrics_session, mock_time):
197200
with override_global_config(dict(_telemetry_metrics_enabled=True)):
198201
telemetry_writer = test_agent_metrics_session.telemetry_writer
@@ -224,7 +227,6 @@ def test_send_appsec_rate_metric(test_agent_metrics_session, mock_time):
224227
_assert_metric(test_agent_metrics_session, expected_series, namespace=TELEMETRY_NAMESPACE_TAG_APPSEC)
225228

226229

227-
@pytest.mark.skipif(sys.version_info < (3, 6), reason="mock.ANY doesn't works in py3.5 or lower")
228230
def test_send_appsec_gauge_metric(test_agent_metrics_session, mock_time):
229231
with override_global_config(dict(_telemetry_metrics_enabled=True)):
230232
telemetry_writer = test_agent_metrics_session.telemetry_writer
@@ -263,7 +265,6 @@ def test_send_appsec_gauge_metric(test_agent_metrics_session, mock_time):
263265
_assert_metric(test_agent_metrics_session, expected_series, namespace=TELEMETRY_NAMESPACE_TAG_APPSEC)
264266

265267

266-
@pytest.mark.skipif(sys.version_info < (3, 6), reason="mock.ANY doesn't works in py3.5 or lower")
267268
def test_send_appsec_distributions_metric(test_agent_metrics_session, mock_time):
268269
with override_global_config(dict(_telemetry_metrics_enabled=True)):
269270
telemetry_writer = test_agent_metrics_session.telemetry_writer
@@ -286,7 +287,6 @@ def test_send_appsec_distributions_metric(test_agent_metrics_session, mock_time)
286287
)
287288

288289

289-
@pytest.mark.skipif(sys.version_info < (3, 6), reason="mock.ANY doesn't works in py3.5 or lower")
290290
def test_send_metric_flush_and_distributions_series_is_restarted(test_agent_metrics_session, mock_time):
291291
"""Check the queue of metrics is empty after run periodic method of PeriodicService"""
292292
with override_global_config(dict(_telemetry_metrics_enabled=True)):
@@ -328,7 +328,6 @@ def test_send_metric_flush_and_distributions_series_is_restarted(test_agent_metr
328328
)
329329

330330

331-
@pytest.mark.skipif(sys.version_info < (3, 6), reason="mock.ANY doesn't works in py3.5 or lower")
332331
def test_send_log_metric_simple(test_agent_metrics_session, mock_time):
333332
"""Check the queue of metrics is empty after run periodic method of PeriodicService"""
334333
with override_global_config(dict(_telemetry_metrics_enabled=True)):
@@ -345,7 +344,6 @@ def test_send_log_metric_simple(test_agent_metrics_session, mock_time):
345344
_assert_logs(test_agent_metrics_session, expected_payload)
346345

347346

348-
@pytest.mark.skipif(sys.version_info < (3, 6), reason="mock.ANY doesn't works in py3.5 or lower")
349347
def test_send_log_metric_simple_tags(test_agent_metrics_session, mock_time):
350348
"""Check the queue of metrics is empty after run periodic method of PeriodicService"""
351349
with override_global_config(dict(_telemetry_metrics_enabled=True)):
@@ -363,7 +361,6 @@ def test_send_log_metric_simple_tags(test_agent_metrics_session, mock_time):
363361
_assert_logs(test_agent_metrics_session, expected_payload)
364362

365363

366-
@pytest.mark.skipif(sys.version_info < (3, 6), reason="mock.ANY doesn't works in py3.5 or lower")
367364
def test_send_multiple_log_metric(test_agent_metrics_session, mock_time):
368365
"""Check the queue of metrics is empty after run periodic method of PeriodicService"""
369366
with override_global_config(dict(_telemetry_metrics_enabled=True)):

0 commit comments

Comments
 (0)