From 6de4f9b1da071c35fce8ca8d51c541bcde67d11e Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 9 Sep 2024 10:21:17 -0400 Subject: [PATCH 01/89] WIP --- datadog/dogstatsd/aggregator.py | 24 +++++++++ datadog/dogstatsd/buffered_metrics.py | 70 +++++++++++++++++++++++++++ datadog/dogstatsd/metric_types.py | 3 ++ 3 files changed, 97 insertions(+) create mode 100644 datadog/dogstatsd/buffered_metrics.py diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index 4a805b75e..e2896af3d 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -4,6 +4,11 @@ GaugeMetric, SetMetric, ) +from datadog.dogstatsd.buffered_metrics import ( + HistogramMetric, + DistributionMetric, + TimingMetric +) from datadog.dogstatsd.metric_types import MetricType @@ -14,10 +19,18 @@ def __init__(self): MetricType.GAUGE: {}, MetricType.SET: {}, } + self.buffered_metrics_map = { + MetricType.HISTOGRAM: {}, + MetricType.DISTRIBUTION: {}, + MetricType.TIMING: {} + } self._locks = { MetricType.COUNT: threading.RLock(), MetricType.GAUGE: threading.RLock(), MetricType.SET: threading.RLock(), + MetricType.HISTOGRAM: threading.RLock(), + MetricType.DISTRIBUTION: threading.RLock(), + MetricType.TIMING: threading.RLock() } def flush_aggregated_metrics(self): @@ -30,6 +43,16 @@ def flush_aggregated_metrics(self): metrics.extend(metric.get_data() if isinstance(metric, SetMetric) else [metric]) return metrics + def flush_aggregated_buffered_metrics(self): + metrics = [] + for metric_type in self.buffered_metrics_map.keys(): + with self._locks[metric_type]: + current_metrics = self.buffered_metrics_map[metric_type] + self.buffered_metrics_map[metric_type] = {} + for metric in current_metrics.values(): + metrics.append(metric) + return metrics + def get_context(self, name, tags): tags_str = ",".join(tags) if tags is not None else "" return "{}:{}".format(name, tags_str) @@ -60,3 +83,4 @@ def add_metric( self.metrics_map[metric_type][context] = metric_class( name, value, tags, rate, timestamp ) + diff --git a/datadog/dogstatsd/buffered_metrics.py b/datadog/dogstatsd/buffered_metrics.py new file mode 100644 index 000000000..d94c00e51 --- /dev/null +++ b/datadog/dogstatsd/buffered_metrics.py @@ -0,0 +1,70 @@ +import random +from datadog.dogstatsd.metric_types import MetricType + + +class BufferedMetric(object): + def __init__(self, name, tags, metric_type, max_metrics=0, specified_rate=1.0): + self.name = name + self.tags = tags + self.metric_type = metric_type + self.max_metrics = max_metrics + self.specified_rate = specified_rate + self.data = [] + self.stored_metrics = 0 + self.total_metrics = 0 + + def aggregate(self, value): + self.data.append(value) + self.stored_metrics += 1 + self.total_metrics += 1 + + def maybe_add_metric(self, value): + if self.max_metrics > 0: + if self.stored_metrics >= self.max_metrics: + i = random.randint(0, self.total_metrics - 1) + if i < self.max_metrics: + self.data[i] = value + else: + self.data.append(value) + self.stored_metrics += 1 + self.total_metrics += 1 + else: + self.aggregate(value) + + def skip_metric(self): + self.total_metrics += 1 + + def flush(self): + total_metrics = self.total_metrics + if self.specified_rate != 1.0: + rate = self.specified_rate + else: + if total_metrics != 0: + rate = self.stored_metrics / total_metrics + else: + rate = 1.0 + return { + 'name': self.name, + 'tags': self.tags, + 'metric_type': self.metric_type, + 'rate': rate, + 'values': self.data[:] + } + + +class HistogramMetric(BufferedMetric): + def __init__(self, name, value, tags, max_metrics=0, rate=1.0): + super(HistogramMetric, self).__init__(name, tags, MetricType.HISTOGRAM, max_metrics, rate) + self.aggregate(value) + + +class DistributionMetric(BufferedMetric): + def __init__(self, name, value, tags, max_metrics=0, rate=1.0): + super(DistributionMetric, self).__init__(name, tags, MetricType.DISTRIBUTION, max_metrics, rate) + self.aggregate(value) + + +class TimingMetric(BufferedMetric): + def __init__(self, name, value, tags, max_metrics=0, rate=1.0): + super(TimingMetric, self).__init__(name, tags, MetricType.TIMING, max_metrics, rate) + self.aggregate(value) diff --git a/datadog/dogstatsd/metric_types.py b/datadog/dogstatsd/metric_types.py index 8eee29849..584b1a3ca 100644 --- a/datadog/dogstatsd/metric_types.py +++ b/datadog/dogstatsd/metric_types.py @@ -2,3 +2,6 @@ class MetricType: COUNT = "c" GAUGE = "g" SET = "s" + HISTOGRAM = "h" + DISTRIBUTION = "d" + TIMING = "ms" From c171911f42e1a548c5cb34dc688d573a17a4491b Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 10 Sep 2024 14:10:24 -0400 Subject: [PATCH 02/89] add buffered_metrics object type (#853) * add buffered_metrics object type * update metric_types to include histogram, distribution, timing * Run tests on any branch --- .github/workflows/test.yml | 2 +- datadog/dogstatsd/buffered_metrics.py | 65 +++++++++++ datadog/dogstatsd/metric_types.py | 3 + tests/unit/dogstatsd/test_buffered_metrics.py | 104 ++++++++++++++++++ 4 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 datadog/dogstatsd/buffered_metrics.py create mode 100644 tests/unit/dogstatsd/test_buffered_metrics.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 36572d945..a81b1ed51 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,7 +6,7 @@ on: - master pull_request: branches: - - master + - '*' # TODO: Revert when merged to master concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} diff --git a/datadog/dogstatsd/buffered_metrics.py b/datadog/dogstatsd/buffered_metrics.py new file mode 100644 index 000000000..fa092e998 --- /dev/null +++ b/datadog/dogstatsd/buffered_metrics.py @@ -0,0 +1,65 @@ +import random +from datadog.dogstatsd.metric_types import MetricType + + +class BufferedMetric(object): + def __init__(self, name, value, tags, metric_type, max_metric_samples=0, specified_rate=1.0): + self.name = name + self.tags = tags + self.metric_type = metric_type + self.max_metric_samples = max_metric_samples + self.specified_rate = specified_rate + self.data = [value] + self.stored_metric_samples = 1 + self.total_metric_samples = 1 + + def aggregate(self, value): + self.data.append(value) + self.stored_metric_samples += 1 + self.total_metric_samples += 1 + + def maybe_add_metric(self, value): + if self.max_metric_samples > 0: + if self.stored_metric_samples >= self.max_metric_samples: + i = random.randint(0, self.total_metric_samples - 1) + if i < self.max_metric_samples: + self.data[i] = value + else: + self.data.append(value) + self.stored_metric_samples += 1 + self.total_metric_samples += 1 + else: + self.aggregate(value) + + def skip_metric(self): + self.total_metric_samples += 1 + + def flush(self): + total_metric_samples = self.total_metric_samples + if self.specified_rate != 1.0: + rate = self.specified_rate + else: + rate = self.stored_metric_samples / total_metric_samples + + return { + 'name': self.name, + 'tags': self.tags, + 'metric_type': self.metric_type, + 'rate': rate, + 'values': self.data[:] + } + + +class HistogramMetric(BufferedMetric): + def __init__(self, name, value, tags, max_metric_samples=0, rate=1.0): + super(HistogramMetric, self).__init__(name, value, tags, MetricType.HISTOGRAM, max_metric_samples, rate) + + +class DistributionMetric(BufferedMetric): + def __init__(self, name, value, tags, max_metric_samples=0, rate=1.0): + super(DistributionMetric, self).__init__(name, value, tags, MetricType.DISTRIBUTION, max_metric_samples, rate) + + +class TimingMetric(BufferedMetric): + def __init__(self, name, value, tags, max_metric_samples=0, rate=1.0): + super(TimingMetric, self).__init__(name, value, tags, MetricType.TIMING, max_metric_samples, rate) diff --git a/datadog/dogstatsd/metric_types.py b/datadog/dogstatsd/metric_types.py index 8eee29849..584b1a3ca 100644 --- a/datadog/dogstatsd/metric_types.py +++ b/datadog/dogstatsd/metric_types.py @@ -2,3 +2,6 @@ class MetricType: COUNT = "c" GAUGE = "g" SET = "s" + HISTOGRAM = "h" + DISTRIBUTION = "d" + TIMING = "ms" diff --git a/tests/unit/dogstatsd/test_buffered_metrics.py b/tests/unit/dogstatsd/test_buffered_metrics.py new file mode 100644 index 000000000..b4ac90ab7 --- /dev/null +++ b/tests/unit/dogstatsd/test_buffered_metrics.py @@ -0,0 +1,104 @@ +import unittest +from datadog.dogstatsd.buffered_metrics import HistogramMetric, DistributionMetric, TimingMetric +from datadog.dogstatsd.metric_types import MetricType + +class TestBufferedMetric(unittest.TestCase): + + def test_new_histogram_metric(self): + s = HistogramMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) + self.assertEqual(s.data, [1.0]) + self.assertEqual(s.name, "test") + self.assertEqual(s.tags, "tag1,tag2") + self.assertEqual(s.specified_rate, 1.0) + self.assertEqual(s.metric_type, MetricType.HISTOGRAM) + + def test_histogram_metric_aggregate(self): + s = HistogramMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) + s.aggregate(123.45) + self.assertEqual(s.data, [1.0, 123.45]) + self.assertEqual(s.name, "test") + self.assertEqual(s.tags, "tag1,tag2") + self.assertEqual(s.specified_rate, 1.0) + self.assertEqual(s.metric_type, MetricType.HISTOGRAM) + + def test_flush_histogram_metric_aggregate(self): + s = HistogramMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) + m = s.flush() + self.assertEqual(m['metric_type'], MetricType.HISTOGRAM) + self.assertEqual(m['values'], [1.0]) + self.assertEqual(m['name'], "test") + self.assertEqual(m['tags'], "tag1,tag2") + + s.aggregate(21) + s.aggregate(123.45) + m = s.flush() + self.assertEqual(m['metric_type'], MetricType.HISTOGRAM) + self.assertEqual(m['values'], [1.0, 21.0, 123.45]) + self.assertEqual(m['name'], "test") + self.assertEqual(m['rate'], 1.0) + self.assertEqual(m['tags'], "tag1,tag2") + + def test_new_distribution_metric(self): + s = DistributionMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) + self.assertEqual(s.data, [1.0]) + self.assertEqual(s.name, "test") + self.assertEqual(s.tags, "tag1,tag2") + self.assertEqual(s.metric_type, MetricType.DISTRIBUTION) + + def test_distribution_metric_aggregate(self): + s = DistributionMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) + s.aggregate(123.45) + self.assertEqual(s.data, [1.0, 123.45]) + self.assertEqual(s.name, "test") + self.assertEqual(s.tags, "tag1,tag2") + self.assertEqual(s.metric_type, MetricType.DISTRIBUTION) + + def test_flush_distribution_metric_aggregate(self): + s = DistributionMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) + m = s.flush() + self.assertEqual(m['metric_type'], MetricType.DISTRIBUTION) + self.assertEqual(m['values'], [1.0]) + self.assertEqual(m['name'], "test") + self.assertEqual(m['tags'], "tag1,tag2") + + s.aggregate(21) + s.aggregate(123.45) + m = s.flush() + self.assertEqual(m['metric_type'], MetricType.DISTRIBUTION) + self.assertEqual(m['values'], [1.0, 21.0, 123.45]) + self.assertEqual(m['name'], "test") + self.assertEqual(m['tags'], "tag1,tag2") + + def test_new_timing_metric(self): + s = TimingMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) + self.assertEqual(s.data, [1.0]) + self.assertEqual(s.name, "test") + self.assertEqual(s.tags, "tag1,tag2") + self.assertEqual(s.metric_type, MetricType.TIMING) + + def test_timing_metric_aggregate(self): + s = TimingMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) + s.aggregate(123.45) + self.assertEqual(s.data, [1.0, 123.45]) + self.assertEqual(s.name, "test") + self.assertEqual(s.tags, "tag1,tag2") + self.assertEqual(s.metric_type, MetricType.TIMING) + + def test_flush_timing_metric_aggregate(self): + s = TimingMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) + m = s.flush() + self.assertEqual(m['metric_type'], MetricType.TIMING) + self.assertEqual(m['values'], [1.0]) + self.assertEqual(m['name'], "test") + self.assertEqual(m['tags'], "tag1,tag2") + + s.aggregate(21) + s.aggregate(123.45) + m = s.flush() + self.assertEqual(m['metric_type'], MetricType.TIMING) + self.assertEqual(m['values'], [1.0, 21.0, 123.45]) + self.assertEqual(m['name'], "test") + self.assertEqual(m['tags'], "tag1,tag2") + +if __name__ == '__main__': + unittest.main() From c112d5bed1c0c9596208a91ca672711a78f873d7 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 28 Oct 2024 13:28:45 -0400 Subject: [PATCH 03/89] revert test config change --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0880f7f7f..f86f06175 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,7 +9,7 @@ on: - master pull_request: branches: - - '*' # TODO: Revert when merged to master + - master concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} From 890c65735a25cf6563f2b3b28a57790957799be5 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 29 Oct 2024 16:23:48 -0400 Subject: [PATCH 04/89] add buffered_metric_context WIP --- datadog/dogstatsd/aggregator.py | 35 ++++++++++--- datadog/dogstatsd/buffered_metrics_context.py | 52 +++++++++++++++++++ 2 files changed, 79 insertions(+), 8 deletions(-) create mode 100644 datadog/dogstatsd/buffered_metrics_context.py diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index e2896af3d..733516311 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -10,6 +10,7 @@ TimingMetric ) from datadog.dogstatsd.metric_types import MetricType +from datadog.dogstatsd.buffered_metrics_context import BufferedMetricContexts class Aggregator(object): @@ -18,19 +19,14 @@ def __init__(self): MetricType.COUNT: {}, MetricType.GAUGE: {}, MetricType.SET: {}, - } - self.buffered_metrics_map = { - MetricType.HISTOGRAM: {}, - MetricType.DISTRIBUTION: {}, - MetricType.TIMING: {} + MetricType.HISTOGRAM: BufferedMetricContexts(HistogramMetric), + MetricType.DISTRIBUTION: BufferedMetricContexts(DistributionMetric), + MetricType.TIMING: BufferedMetricContexts(TimingMetric) } self._locks = { MetricType.COUNT: threading.RLock(), MetricType.GAUGE: threading.RLock(), MetricType.SET: threading.RLock(), - MetricType.HISTOGRAM: threading.RLock(), - MetricType.DISTRIBUTION: threading.RLock(), - MetricType.TIMING: threading.RLock() } def flush_aggregated_metrics(self): @@ -83,4 +79,27 @@ def add_metric( self.metrics_map[metric_type][context] = metric_class( name, value, tags, rate, timestamp ) + + def histogram(self, name, value, tags, rate): + return self.add_buffered_metric( + MetricType.HISTOGRAM, name, value, tags, rate + ) + + def distribution(self, name, value, tags, rate): + return self.add_buffered_metric( + MetricType.DISTRIBUTION, name, value, tags, rate + ) + + def timing(self, name, value, tags, rate): + return self.add_buffered_metric( + MetricType.TIMING, name, value, tags, rate + ) + + def add_buffered_metric( + self, metric_type, name, value, tags, rate + ): + context_key = self.get_context(name, tags) + metric_context = self.metrics_map[metric_type] + return metric_context.sample(name, value, tags, rate, context_key) + diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/buffered_metrics_context.py new file mode 100644 index 000000000..1dd90f9ad --- /dev/null +++ b/datadog/dogstatsd/buffered_metrics_context.py @@ -0,0 +1,52 @@ +from threading import Lock +from random import random + +from datadog.dogstatsd.buffered_metrics import BufferedMetric + +class BufferedMetricContexts: + def __init__(self, buffered_metric_type: BufferedMetric): + self.nb_context = 0 + self.lock = Lock() + self.values = {} + self.buffered_metric_type = buffered_metric_type + self.random = random.Random() + self.random_lock = Lock() + + def flush(self, metrics): + """Flush the metrics and reset the stored values.""" + with self.lock: + values = self.values.copy() + self.values.clear() + + for _, metric in values.items(): + with metric.lock: + metrics.append(metric.flush()) + + self.nb_context += len(values) + return metrics + + def sample(self, name, value, tags, rate, context_key): + """Sample a metric and store it if it meets the criteria.""" + keeping_sample = self.should_sample(rate) + + with self.lock: + if context_key not in self.values: + # Create a new metric if it doesn't exist + self.values[context_key] = self.buffered_metric_type(name, value, tags, 0, rate) + + metric = self.values[context_key] + + if keeping_sample: + with self.random_lock: + metric.maybe_keep_sample(value, self.random, self.random_lock) + else: + metric.skip_sample() + + def should_sample(self, rate): + """Determine if a sample should be kept based on the specified rate.""" + with self.random_lock: + return self.random.random() < rate + + def get_nb_context(self): + """Return the number of contexts.""" + return self.nb_context \ No newline at end of file From 4c2b238dea0e825811634861075a623d77293aa0 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 9 Dec 2024 14:03:38 -0500 Subject: [PATCH 05/89] change naming to sample --- datadog/dogstatsd/buffered_metrics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datadog/dogstatsd/buffered_metrics.py b/datadog/dogstatsd/buffered_metrics.py index fa092e998..b813f0520 100644 --- a/datadog/dogstatsd/buffered_metrics.py +++ b/datadog/dogstatsd/buffered_metrics.py @@ -13,12 +13,12 @@ def __init__(self, name, value, tags, metric_type, max_metric_samples=0, specifi self.stored_metric_samples = 1 self.total_metric_samples = 1 - def aggregate(self, value): + def sample(self, value): self.data.append(value) self.stored_metric_samples += 1 self.total_metric_samples += 1 - def maybe_add_metric(self, value): + def maybe_keep_sample(self, value): if self.max_metric_samples > 0: if self.stored_metric_samples >= self.max_metric_samples: i = random.randint(0, self.total_metric_samples - 1) @@ -29,9 +29,9 @@ def maybe_add_metric(self, value): self.stored_metric_samples += 1 self.total_metric_samples += 1 else: - self.aggregate(value) + self.sample(value) - def skip_metric(self): + def skip_sample(self): self.total_metric_samples += 1 def flush(self): From a84af9dc0edc9958c0dd0c38f0bcd7102986a0ff Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 9 Dec 2024 14:20:35 -0500 Subject: [PATCH 06/89] update tests --- tests/unit/dogstatsd/test_buffered_metrics.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/unit/dogstatsd/test_buffered_metrics.py b/tests/unit/dogstatsd/test_buffered_metrics.py index b4ac90ab7..8cc6ca7fc 100644 --- a/tests/unit/dogstatsd/test_buffered_metrics.py +++ b/tests/unit/dogstatsd/test_buffered_metrics.py @@ -12,16 +12,16 @@ def test_new_histogram_metric(self): self.assertEqual(s.specified_rate, 1.0) self.assertEqual(s.metric_type, MetricType.HISTOGRAM) - def test_histogram_metric_aggregate(self): + def test_histogram_metric_sample(self): s = HistogramMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) - s.aggregate(123.45) + s.sample(123.45) self.assertEqual(s.data, [1.0, 123.45]) self.assertEqual(s.name, "test") self.assertEqual(s.tags, "tag1,tag2") self.assertEqual(s.specified_rate, 1.0) self.assertEqual(s.metric_type, MetricType.HISTOGRAM) - def test_flush_histogram_metric_aggregate(self): + def test_flush_histogram_metric_sample(self): s = HistogramMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) m = s.flush() self.assertEqual(m['metric_type'], MetricType.HISTOGRAM) @@ -29,8 +29,8 @@ def test_flush_histogram_metric_aggregate(self): self.assertEqual(m['name'], "test") self.assertEqual(m['tags'], "tag1,tag2") - s.aggregate(21) - s.aggregate(123.45) + s.sample(21) + s.sample(123.45) m = s.flush() self.assertEqual(m['metric_type'], MetricType.HISTOGRAM) self.assertEqual(m['values'], [1.0, 21.0, 123.45]) @@ -45,15 +45,15 @@ def test_new_distribution_metric(self): self.assertEqual(s.tags, "tag1,tag2") self.assertEqual(s.metric_type, MetricType.DISTRIBUTION) - def test_distribution_metric_aggregate(self): + def test_distribution_metric_sample(self): s = DistributionMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) - s.aggregate(123.45) + s.sample(123.45) self.assertEqual(s.data, [1.0, 123.45]) self.assertEqual(s.name, "test") self.assertEqual(s.tags, "tag1,tag2") self.assertEqual(s.metric_type, MetricType.DISTRIBUTION) - def test_flush_distribution_metric_aggregate(self): + def test_flush_distribution_metric_sample(self): s = DistributionMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) m = s.flush() self.assertEqual(m['metric_type'], MetricType.DISTRIBUTION) @@ -61,8 +61,8 @@ def test_flush_distribution_metric_aggregate(self): self.assertEqual(m['name'], "test") self.assertEqual(m['tags'], "tag1,tag2") - s.aggregate(21) - s.aggregate(123.45) + s.sample(21) + s.sample(123.45) m = s.flush() self.assertEqual(m['metric_type'], MetricType.DISTRIBUTION) self.assertEqual(m['values'], [1.0, 21.0, 123.45]) @@ -76,15 +76,15 @@ def test_new_timing_metric(self): self.assertEqual(s.tags, "tag1,tag2") self.assertEqual(s.metric_type, MetricType.TIMING) - def test_timing_metric_aggregate(self): + def test_timing_metric_sample(self): s = TimingMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) - s.aggregate(123.45) + s.sample(123.45) self.assertEqual(s.data, [1.0, 123.45]) self.assertEqual(s.name, "test") self.assertEqual(s.tags, "tag1,tag2") self.assertEqual(s.metric_type, MetricType.TIMING) - def test_flush_timing_metric_aggregate(self): + def test_flush_timing_metric_sample(self): s = TimingMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) m = s.flush() self.assertEqual(m['metric_type'], MetricType.TIMING) @@ -92,8 +92,8 @@ def test_flush_timing_metric_aggregate(self): self.assertEqual(m['name'], "test") self.assertEqual(m['tags'], "tag1,tag2") - s.aggregate(21) - s.aggregate(123.45) + s.sample(21) + s.sample(123.45) m = s.flush() self.assertEqual(m['metric_type'], MetricType.TIMING) self.assertEqual(m['values'], [1.0, 21.0, 123.45]) From 583e287dce18114801ff646cd1e1e3f909726f9d Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 10 Dec 2024 14:39:40 -0500 Subject: [PATCH 07/89] fix buffered_metric_context and aggregator, update tests --- datadog/dogstatsd/aggregator.py | 22 ++++-- datadog/dogstatsd/buffered_metrics.py | 33 +++++---- datadog/dogstatsd/buffered_metrics_context.py | 30 ++++---- tests/unit/dogstatsd/test_aggregator.py | 68 +++++++++++++------ 4 files changed, 96 insertions(+), 57 deletions(-) diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index 733516311..4a00cee3f 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -14,19 +14,24 @@ class Aggregator(object): - def __init__(self): + def __init__(self, maxSamplesPerContext=0): self.metrics_map = { MetricType.COUNT: {}, MetricType.GAUGE: {}, MetricType.SET: {}, - MetricType.HISTOGRAM: BufferedMetricContexts(HistogramMetric), - MetricType.DISTRIBUTION: BufferedMetricContexts(DistributionMetric), - MetricType.TIMING: BufferedMetricContexts(TimingMetric) + } + self.buffered_metrics_map = { + MetricType.HISTOGRAM: BufferedMetricContexts(HistogramMetric, maxSamplesPerContext), + MetricType.DISTRIBUTION: BufferedMetricContexts(DistributionMetric, maxSamplesPerContext), + MetricType.TIMING: BufferedMetricContexts(TimingMetric, maxSamplesPerContext) } self._locks = { MetricType.COUNT: threading.RLock(), MetricType.GAUGE: threading.RLock(), MetricType.SET: threading.RLock(), + MetricType.HISTOGRAM: threading.RLock(), + MetricType.DISTRIBUTION: threading.RLock(), + MetricType.TIMING: threading.RLock(), } def flush_aggregated_metrics(self): @@ -37,6 +42,13 @@ def flush_aggregated_metrics(self): self.metrics_map[metric_type] = {} for metric in current_metrics.values(): metrics.extend(metric.get_data() if isinstance(metric, SetMetric) else [metric]) + + for metric_type in self.buffered_metrics_map.keys(): + with self._locks[metric_type]: + metric_context = self.buffered_metrics_map[metric_type] + self.buffered_metrics_map[metric_type] = {} + for metricList in metric_context.flush(): + metrics.extend(metricList) return metrics def flush_aggregated_buffered_metrics(self): @@ -99,7 +111,7 @@ def add_buffered_metric( self, metric_type, name, value, tags, rate ): context_key = self.get_context(name, tags) - metric_context = self.metrics_map[metric_type] + metric_context = self.buffered_metrics_map[metric_type] return metric_context.sample(name, value, tags, rate, context_key) diff --git a/datadog/dogstatsd/buffered_metrics.py b/datadog/dogstatsd/buffered_metrics.py index b813f0520..5b1ee242f 100644 --- a/datadog/dogstatsd/buffered_metrics.py +++ b/datadog/dogstatsd/buffered_metrics.py @@ -1,15 +1,16 @@ import random from datadog.dogstatsd.metric_types import MetricType +from datadog.dogstatsd.metrics import MetricAggregator class BufferedMetric(object): - def __init__(self, name, value, tags, metric_type, max_metric_samples=0, specified_rate=1.0): + def __init__(self, name, tags, metric_type, max_metric_samples=0, specified_rate=1.0): self.name = name self.tags = tags self.metric_type = metric_type self.max_metric_samples = max_metric_samples self.specified_rate = specified_rate - self.data = [value] + self.data = [] self.stored_metric_samples = 1 self.total_metric_samples = 1 @@ -19,16 +20,21 @@ def sample(self, value): self.total_metric_samples += 1 def maybe_keep_sample(self, value): + print("max metric samples is ", self.max_metric_samples) + print("stored metric samples is ", self.stored_metric_samples) if self.max_metric_samples > 0: if self.stored_metric_samples >= self.max_metric_samples: i = random.randint(0, self.total_metric_samples - 1) if i < self.max_metric_samples: + print("REPLACE") self.data[i] = value else: + print("APPEND") self.data.append(value) self.stored_metric_samples += 1 self.total_metric_samples += 1 else: + print("APPEND2") self.sample(value) def skip_sample(self): @@ -41,25 +47,22 @@ def flush(self): else: rate = self.stored_metric_samples / total_metric_samples - return { - 'name': self.name, - 'tags': self.tags, - 'metric_type': self.metric_type, - 'rate': rate, - 'values': self.data[:] - } + return [ + MetricAggregator(self.name, self.tags, rate, self.metric_type, value) + for value in self.data + ] class HistogramMetric(BufferedMetric): - def __init__(self, name, value, tags, max_metric_samples=0, rate=1.0): - super(HistogramMetric, self).__init__(name, value, tags, MetricType.HISTOGRAM, max_metric_samples, rate) + def __init__(self, name, tags, max_metric_samples=0, rate=1.0): + super(HistogramMetric, self).__init__(name, tags, MetricType.HISTOGRAM, max_metric_samples, rate) class DistributionMetric(BufferedMetric): - def __init__(self, name, value, tags, max_metric_samples=0, rate=1.0): - super(DistributionMetric, self).__init__(name, value, tags, MetricType.DISTRIBUTION, max_metric_samples, rate) + def __init__(self, name, tags, max_metric_samples=0, rate=1.0): + super(DistributionMetric, self).__init__(name, tags, MetricType.DISTRIBUTION, max_metric_samples, rate) class TimingMetric(BufferedMetric): - def __init__(self, name, value, tags, max_metric_samples=0, rate=1.0): - super(TimingMetric, self).__init__(name, value, tags, MetricType.TIMING, max_metric_samples, rate) + def __init__(self, name, tags, max_metric_samples=0, rate=1.0): + super(TimingMetric, self).__init__(name, tags, MetricType.TIMING, max_metric_samples, rate) diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/buffered_metrics_context.py index 1dd90f9ad..e4a54b82a 100644 --- a/datadog/dogstatsd/buffered_metrics_context.py +++ b/datadog/dogstatsd/buffered_metrics_context.py @@ -1,26 +1,26 @@ from threading import Lock -from random import random +import secrets from datadog.dogstatsd.buffered_metrics import BufferedMetric class BufferedMetricContexts: - def __init__(self, buffered_metric_type: BufferedMetric): + def __init__(self, buffered_metric_type: BufferedMetric, maxSamplesPerContext): self.nb_context = 0 self.lock = Lock() self.values = {} + self.maxSamplesPerContext = maxSamplesPerContext self.buffered_metric_type = buffered_metric_type - self.random = random.Random() - self.random_lock = Lock() + self.random = secrets - def flush(self, metrics): + def flush(self): + metrics = [] """Flush the metrics and reset the stored values.""" with self.lock: values = self.values.copy() self.values.clear() for _, metric in values.items(): - with metric.lock: - metrics.append(metric.flush()) + metrics.append(metric.flush()) self.nb_context += len(values) return metrics @@ -28,24 +28,24 @@ def flush(self, metrics): def sample(self, name, value, tags, rate, context_key): """Sample a metric and store it if it meets the criteria.""" keeping_sample = self.should_sample(rate) - + print("keeping sample is ", keeping_sample) + print("context_key is ", context_key) with self.lock: if context_key not in self.values: # Create a new metric if it doesn't exist - self.values[context_key] = self.buffered_metric_type(name, value, tags, 0, rate) - + self.values[context_key] = self.buffered_metric_type(name, tags, self.maxSamplesPerContext, rate) metric = self.values[context_key] - + print("values are :", self.values.keys()) if keeping_sample: - with self.random_lock: - metric.maybe_keep_sample(value, self.random, self.random_lock) + metric.maybe_keep_sample(value) else: metric.skip_sample() def should_sample(self, rate): """Determine if a sample should be kept based on the specified rate.""" - with self.random_lock: - return self.random.random() < rate + if rate >= 1: + return True + return secrets.SystemRandom().random() < rate def get_nb_context(self): """Return the number of contexts.""" diff --git a/tests/unit/dogstatsd/test_aggregator.py b/tests/unit/dogstatsd/test_aggregator.py index be46e1ed6..2d2113269 100644 --- a/tests/unit/dogstatsd/test_aggregator.py +++ b/tests/unit/dogstatsd/test_aggregator.py @@ -9,30 +9,30 @@ def setUp(self): def test_aggregator_sample(self): tags = ["tag1", "tag2"] + for _ in range(2): + self.aggregator.gauge("gaugeTest", 21, tags, 1) + self.assertEqual(len(self.aggregator.metrics_map[MetricType.GAUGE]), 1) + self.assertIn("gaugeTest:tag1,tag2", self.aggregator.metrics_map[MetricType.GAUGE]) - self.aggregator.gauge("gaugeTest", 21, tags, 1) - self.assertEqual(len(self.aggregator.metrics_map[MetricType.GAUGE]), 1) - self.assertIn("gaugeTest:tag1,tag2", self.aggregator.metrics_map[MetricType.GAUGE]) + self.aggregator.count("countTest", 21, tags, 1) + self.assertEqual(len(self.aggregator.metrics_map[MetricType.COUNT]), 1) + self.assertIn("countTest:tag1,tag2", self.aggregator.metrics_map[MetricType.COUNT]) - self.aggregator.count("countTest", 21, tags, 1) - self.assertEqual(len(self.aggregator.metrics_map[MetricType.COUNT]), 1) - self.assertIn("countTest:tag1,tag2", self.aggregator.metrics_map[MetricType.COUNT]) + self.aggregator.set("setTest", "value1", tags, 1) + self.assertEqual(len(self.aggregator.metrics_map[MetricType.SET]), 1) + self.assertIn("setTest:tag1,tag2", self.aggregator.metrics_map[MetricType.SET]) - self.aggregator.set("setTest", "value1", tags, 1) - self.assertEqual(len(self.aggregator.metrics_map[MetricType.SET]), 1) - self.assertIn("setTest:tag1,tag2", self.aggregator.metrics_map[MetricType.SET]) + self.aggregator.histogram("histogramTest", 21, tags, 1) + self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.HISTOGRAM].values), 1) + self.assertIn("histogramTest:tag1,tag2", self.aggregator.buffered_metrics_map[MetricType.HISTOGRAM].values) - self.aggregator.gauge("gaugeTest", 123, tags, 1) - self.assertEqual(len(self.aggregator.metrics_map[MetricType.GAUGE]), 1) - self.assertIn("gaugeTest:tag1,tag2", self.aggregator.metrics_map[MetricType.GAUGE]) + self.aggregator.distribution("distributionTest", 21, tags, 1) + self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.DISTRIBUTION].values), 1) + self.assertIn("distributionTest:tag1,tag2", self.aggregator.buffered_metrics_map[MetricType.DISTRIBUTION].values) - self.aggregator.count("countTest", 10, tags, 1) - self.assertEqual(len(self.aggregator.metrics_map[MetricType.COUNT]), 1) - self.assertIn("countTest:tag1,tag2", self.aggregator.metrics_map[MetricType.COUNT]) - - self.aggregator.set("setTest", "value1", tags, 1) - self.assertEqual(len(self.aggregator.metrics_map[MetricType.SET]), 1) - self.assertIn("setTest:tag1,tag2", self.aggregator.metrics_map[MetricType.SET]) + self.aggregator.timing("timingTest", 21, tags, 1) + self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.TIMING].values), 1) + self.assertIn("timingTest:tag1,tag2", self.aggregator.buffered_metrics_map[MetricType.TIMING].values) def test_aggregator_flush(self): tags = ["tag1", "tag2"] @@ -50,23 +50,47 @@ def test_aggregator_flush(self): self.aggregator.set("setTest1", "value2", tags, 1) self.aggregator.set("setTest2", "value1", tags, 1) + self.aggregator.histogram("histogramTest1", 21, tags, 1) + self.aggregator.histogram("histogramTest1", 22, tags, 1) + self.aggregator.histogram("histogramTest2", 23, tags, 1) + + self.aggregator.distribution("distributionTest1", 21, tags, 1) + self.aggregator.distribution("distributionTest1", 22, tags, 1) + self.aggregator.distribution("distributionTest2", 23, tags, 1) + + self.aggregator.timing("timingTest1", 21, tags, 1) + self.aggregator.timing("timingTest1", 22, tags, 1) + self.aggregator.timing("timingTest2", 23, tags, 1) + metrics = self.aggregator.flush_aggregated_metrics() self.assertEqual(len(self.aggregator.metrics_map[MetricType.GAUGE]), 0) self.assertEqual(len(self.aggregator.metrics_map[MetricType.COUNT]), 0) self.assertEqual(len(self.aggregator.metrics_map[MetricType.SET]), 0) - - self.assertEqual(len(metrics), 7) + self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.HISTOGRAM]), 0) + self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.DISTRIBUTION]), 0) + self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.TIMING]), 0) + self.assertEqual(len(metrics), 16) metrics.sort(key=lambda m: (m.metric_type, m.name, m.value)) + expected_metrics = [ {"metric_type": MetricType.COUNT, "name": "countTest1", "tags": tags, "rate": 1, "value": 31, "timestamp": 0}, {"metric_type": MetricType.COUNT, "name": "countTest2", "tags": tags, "rate": 1, "value": 1, "timestamp": 0}, + {"metric_type": MetricType.DISTRIBUTION, "name": "distributionTest1", "tags": tags, "rate": 1, "value": 21}, + {"metric_type": MetricType.DISTRIBUTION, "name": "distributionTest1", "tags": tags, "rate": 1, "value": 22}, + {"metric_type": MetricType.DISTRIBUTION, "name": "distributionTest2", "tags": tags, "rate": 1, "value": 23}, {"metric_type": MetricType.GAUGE, "name": "gaugeTest1", "tags": tags, "rate": 1, "value": 10, "timestamp": 0}, {"metric_type": MetricType.GAUGE, "name": "gaugeTest2", "tags": tags, "rate": 1, "value": 15, "timestamp": 0}, + {"metric_type": MetricType.HISTOGRAM, "name": "histogramTest1", "tags": tags, "rate": 1, "value": 21}, + {"metric_type": MetricType.HISTOGRAM, "name": "histogramTest1", "tags": tags, "rate": 1, "value": 22}, + {"metric_type": MetricType.HISTOGRAM, "name": "histogramTest2", "tags": tags, "rate": 1, "value": 23}, + {"metric_type": MetricType.TIMING, "name": "timingTest1", "tags": tags, "rate": 1, "value": 21}, + {"metric_type": MetricType.TIMING, "name": "timingTest1", "tags": tags, "rate": 1, "value": 22}, + {"metric_type": MetricType.TIMING, "name": "timingTest2", "tags": tags, "rate": 1, "value": 23}, {"metric_type": MetricType.SET, "name": "setTest1", "tags": tags, "rate": 1, "value": "value1", "timestamp": 0}, {"metric_type": MetricType.SET, "name": "setTest1", "tags": tags, "rate": 1, "value": "value2", "timestamp": 0}, {"metric_type": MetricType.SET, "name": "setTest2", "tags": tags, "rate": 1, "value": "value1", "timestamp": 0}, ] - + for metric, expected in zip(metrics, expected_metrics): self.assertEqual(metric.name, expected["name"]) self.assertEqual(metric.tags, expected["tags"]) From b01ed1d4275d6e9677d03c12c30d385ddc597f84 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 10 Dec 2024 14:42:29 -0500 Subject: [PATCH 08/89] use snake case --- datadog/dogstatsd/aggregator.py | 8 ++++---- datadog/dogstatsd/buffered_metrics_context.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index 4a00cee3f..48d209244 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -14,16 +14,16 @@ class Aggregator(object): - def __init__(self, maxSamplesPerContext=0): + def __init__(self, max_samples_per_context=0): self.metrics_map = { MetricType.COUNT: {}, MetricType.GAUGE: {}, MetricType.SET: {}, } self.buffered_metrics_map = { - MetricType.HISTOGRAM: BufferedMetricContexts(HistogramMetric, maxSamplesPerContext), - MetricType.DISTRIBUTION: BufferedMetricContexts(DistributionMetric, maxSamplesPerContext), - MetricType.TIMING: BufferedMetricContexts(TimingMetric, maxSamplesPerContext) + MetricType.HISTOGRAM: BufferedMetricContexts(HistogramMetric, max_samples_per_context), + MetricType.DISTRIBUTION: BufferedMetricContexts(DistributionMetric, max_samples_per_context), + MetricType.TIMING: BufferedMetricContexts(TimingMetric, max_samples_per_context) } self._locks = { MetricType.COUNT: threading.RLock(), diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/buffered_metrics_context.py index e4a54b82a..47f0a9ac4 100644 --- a/datadog/dogstatsd/buffered_metrics_context.py +++ b/datadog/dogstatsd/buffered_metrics_context.py @@ -4,11 +4,11 @@ from datadog.dogstatsd.buffered_metrics import BufferedMetric class BufferedMetricContexts: - def __init__(self, buffered_metric_type: BufferedMetric, maxSamplesPerContext): + def __init__(self, buffered_metric_type: BufferedMetric, max_samples_per_context): self.nb_context = 0 self.lock = Lock() self.values = {} - self.maxSamplesPerContext = maxSamplesPerContext + self.max_samples_per_context = max_samples_per_context self.buffered_metric_type = buffered_metric_type self.random = secrets @@ -33,7 +33,7 @@ def sample(self, name, value, tags, rate, context_key): with self.lock: if context_key not in self.values: # Create a new metric if it doesn't exist - self.values[context_key] = self.buffered_metric_type(name, tags, self.maxSamplesPerContext, rate) + self.values[context_key] = self.buffered_metric_type(name, tags, self.max_samples_per_context, rate) metric = self.values[context_key] print("values are :", self.values.keys()) if keeping_sample: From bb863c51a4a7a41de8620f1de82f178951aa5a73 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 12 Dec 2024 13:48:08 -0500 Subject: [PATCH 09/89] histograms, distribution and timing metrics are not aggregated, they are just buffered --- datadog/dogstatsd/base.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index da9ece563..b2d60e5af 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -561,6 +561,8 @@ def _flush_thread_loop(self, flush_interval): time.sleep(flush_interval) if not self._disable_aggregation: self.flush_aggregated_metrics() + # Histograms, Distribution and Timing metrics are not aggregated + self.flush_buffered_metrics() if not self._disable_buffering: self.flush_buffered_metrics() self._flush_thread = threading.Thread( @@ -1127,7 +1129,10 @@ def _report(self, metric, metric_type, value, tags, sample_rate, timestamp=0): ) # Send it - self._send(payload) + if metric_type == MetricType.DISTRIBUTION or metric_type == MetricType.HISTOGRAM or metric_type == MetricType.TIMING: + self._send_to_buffer(payload) + else: + self._send(payload) def _reset_telemetry(self): self.metrics_count = 0 From ca4981d445e0b8f2bc981ae074776093d1ace62f Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 12 Dec 2024 13:55:58 -0500 Subject: [PATCH 10/89] remove max_metric_per_context, not in scope? --- datadog/dogstatsd/aggregator.py | 8 ++++---- datadog/dogstatsd/buffered_metrics.py | 14 +++++++------- datadog/dogstatsd/buffered_metrics_context.py | 5 ++--- tests/unit/dogstatsd/test_aggregator.py | 5 ++--- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index 48d209244..87ea9218d 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -14,16 +14,16 @@ class Aggregator(object): - def __init__(self, max_samples_per_context=0): + def __init__(self): self.metrics_map = { MetricType.COUNT: {}, MetricType.GAUGE: {}, MetricType.SET: {}, } self.buffered_metrics_map = { - MetricType.HISTOGRAM: BufferedMetricContexts(HistogramMetric, max_samples_per_context), - MetricType.DISTRIBUTION: BufferedMetricContexts(DistributionMetric, max_samples_per_context), - MetricType.TIMING: BufferedMetricContexts(TimingMetric, max_samples_per_context) + MetricType.HISTOGRAM: BufferedMetricContexts(HistogramMetric), + MetricType.DISTRIBUTION: BufferedMetricContexts(DistributionMetric), + MetricType.TIMING: BufferedMetricContexts(TimingMetric) } self._locks = { MetricType.COUNT: threading.RLock(), diff --git a/datadog/dogstatsd/buffered_metrics.py b/datadog/dogstatsd/buffered_metrics.py index 5b1ee242f..a92277b31 100644 --- a/datadog/dogstatsd/buffered_metrics.py +++ b/datadog/dogstatsd/buffered_metrics.py @@ -4,7 +4,7 @@ class BufferedMetric(object): - def __init__(self, name, tags, metric_type, max_metric_samples=0, specified_rate=1.0): + def __init__(self, name, tags, metric_type, specified_rate=1.0, max_metric_samples=0): self.name = name self.tags = tags self.metric_type = metric_type @@ -54,15 +54,15 @@ def flush(self): class HistogramMetric(BufferedMetric): - def __init__(self, name, tags, max_metric_samples=0, rate=1.0): - super(HistogramMetric, self).__init__(name, tags, MetricType.HISTOGRAM, max_metric_samples, rate) + def __init__(self, name, tags, rate=1.0, max_metric_samples=0): + super(HistogramMetric, self).__init__(name, tags, MetricType.HISTOGRAM, rate, max_metric_samples) class DistributionMetric(BufferedMetric): - def __init__(self, name, tags, max_metric_samples=0, rate=1.0): - super(DistributionMetric, self).__init__(name, tags, MetricType.DISTRIBUTION, max_metric_samples, rate) + def __init__(self, name, tags, rate=1.0, max_metric_samples=0): + super(DistributionMetric, self).__init__(name, tags, MetricType.DISTRIBUTION, rate, max_metric_samples) class TimingMetric(BufferedMetric): - def __init__(self, name, tags, max_metric_samples=0, rate=1.0): - super(TimingMetric, self).__init__(name, tags, MetricType.TIMING, max_metric_samples, rate) + def __init__(self, name, tags, rate=1.0, max_metric_samples=0): + super(TimingMetric, self).__init__(name, tags, MetricType.TIMING, rate, max_metric_samples) diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/buffered_metrics_context.py index 47f0a9ac4..65889a90f 100644 --- a/datadog/dogstatsd/buffered_metrics_context.py +++ b/datadog/dogstatsd/buffered_metrics_context.py @@ -4,11 +4,10 @@ from datadog.dogstatsd.buffered_metrics import BufferedMetric class BufferedMetricContexts: - def __init__(self, buffered_metric_type: BufferedMetric, max_samples_per_context): + def __init__(self, buffered_metric_type: BufferedMetric): self.nb_context = 0 self.lock = Lock() self.values = {} - self.max_samples_per_context = max_samples_per_context self.buffered_metric_type = buffered_metric_type self.random = secrets @@ -33,7 +32,7 @@ def sample(self, name, value, tags, rate, context_key): with self.lock: if context_key not in self.values: # Create a new metric if it doesn't exist - self.values[context_key] = self.buffered_metric_type(name, tags, self.max_samples_per_context, rate) + self.values[context_key] = self.buffered_metric_type(name, tags, rate) metric = self.values[context_key] print("values are :", self.values.keys()) if keeping_sample: diff --git a/tests/unit/dogstatsd/test_aggregator.py b/tests/unit/dogstatsd/test_aggregator.py index 2d2113269..b5220564e 100644 --- a/tests/unit/dogstatsd/test_aggregator.py +++ b/tests/unit/dogstatsd/test_aggregator.py @@ -90,13 +90,12 @@ def test_aggregator_flush(self): {"metric_type": MetricType.SET, "name": "setTest1", "tags": tags, "rate": 1, "value": "value2", "timestamp": 0}, {"metric_type": MetricType.SET, "name": "setTest2", "tags": tags, "rate": 1, "value": "value1", "timestamp": 0}, ] - + for metric, expected in zip(metrics, expected_metrics): self.assertEqual(metric.name, expected["name"]) self.assertEqual(metric.tags, expected["tags"]) self.assertEqual(metric.rate, expected["rate"]) self.assertEqual(metric.value, expected["value"]) - - + if __name__ == '__main__': unittest.main() From 109156999d9169639959b37584c0af7a7ca9be78 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 12 Dec 2024 14:41:21 -0500 Subject: [PATCH 11/89] lint --- datadog/dogstatsd/buffered_metrics_context.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/buffered_metrics_context.py index 65889a90f..7cb500d62 100644 --- a/datadog/dogstatsd/buffered_metrics_context.py +++ b/datadog/dogstatsd/buffered_metrics_context.py @@ -1,13 +1,14 @@ from threading import Lock import secrets - +import Dict from datadog.dogstatsd.buffered_metrics import BufferedMetric + class BufferedMetricContexts: def __init__(self, buffered_metric_type: BufferedMetric): self.nb_context = 0 self.lock = Lock() - self.values = {} + self.values = Dict[str, any] = {} self.buffered_metric_type = buffered_metric_type self.random = secrets @@ -48,4 +49,4 @@ def should_sample(self, rate): def get_nb_context(self): """Return the number of contexts.""" - return self.nb_context \ No newline at end of file + return self.nb_context From 606b271c91772f793fcc0f1d9e9788f7779b5266 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 12 Dec 2024 14:44:02 -0500 Subject: [PATCH 12/89] fix lint --- datadog/dogstatsd/buffered_metrics_context.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/buffered_metrics_context.py index 7cb500d62..d35a5387c 100644 --- a/datadog/dogstatsd/buffered_metrics_context.py +++ b/datadog/dogstatsd/buffered_metrics_context.py @@ -1,6 +1,5 @@ from threading import Lock import secrets -import Dict from datadog.dogstatsd.buffered_metrics import BufferedMetric @@ -8,7 +7,7 @@ class BufferedMetricContexts: def __init__(self, buffered_metric_type: BufferedMetric): self.nb_context = 0 self.lock = Lock() - self.values = Dict[str, any] = {} + self.values = {} self.buffered_metric_type = buffered_metric_type self.random = secrets @@ -50,3 +49,4 @@ def should_sample(self, rate): def get_nb_context(self): """Return the number of contexts.""" return self.nb_context + \ No newline at end of file From a5d4b15c171591ad9b16425f7d069759fbb357fa Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 12 Dec 2024 14:46:43 -0500 Subject: [PATCH 13/89] fix syntax --- datadog/dogstatsd/buffered_metrics_context.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/buffered_metrics_context.py index d35a5387c..61d14ec93 100644 --- a/datadog/dogstatsd/buffered_metrics_context.py +++ b/datadog/dogstatsd/buffered_metrics_context.py @@ -1,10 +1,9 @@ from threading import Lock import secrets -from datadog.dogstatsd.buffered_metrics import BufferedMetric class BufferedMetricContexts: - def __init__(self, buffered_metric_type: BufferedMetric): + def __init__(self, buffered_metric_type): self.nb_context = 0 self.lock = Lock() self.values = {} From 7cf00b14aa45a7367ef1b96ef4ba7afbd91bc71a Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 12 Dec 2024 14:57:39 -0500 Subject: [PATCH 14/89] replace secrets with random --- datadog/dogstatsd/buffered_metrics_context.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/buffered_metrics_context.py index 61d14ec93..7e435b9e6 100644 --- a/datadog/dogstatsd/buffered_metrics_context.py +++ b/datadog/dogstatsd/buffered_metrics_context.py @@ -1,5 +1,5 @@ from threading import Lock -import secrets +import random class BufferedMetricContexts: @@ -8,7 +8,6 @@ def __init__(self, buffered_metric_type): self.lock = Lock() self.values = {} self.buffered_metric_type = buffered_metric_type - self.random = secrets def flush(self): metrics = [] @@ -26,14 +25,11 @@ def flush(self): def sample(self, name, value, tags, rate, context_key): """Sample a metric and store it if it meets the criteria.""" keeping_sample = self.should_sample(rate) - print("keeping sample is ", keeping_sample) - print("context_key is ", context_key) with self.lock: if context_key not in self.values: # Create a new metric if it doesn't exist self.values[context_key] = self.buffered_metric_type(name, tags, rate) metric = self.values[context_key] - print("values are :", self.values.keys()) if keeping_sample: metric.maybe_keep_sample(value) else: @@ -43,9 +39,8 @@ def should_sample(self, rate): """Determine if a sample should be kept based on the specified rate.""" if rate >= 1: return True - return secrets.SystemRandom().random() < rate + return random.random() < rate # Replace `secrets` with `random` def get_nb_context(self): """Return the number of contexts.""" return self.nb_context - \ No newline at end of file From 92481b94d2f61e0f7bb799d106828d235ad8cbfc Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 12 Dec 2024 15:06:46 -0500 Subject: [PATCH 15/89] lint --- datadog/dogstatsd/buffered_metrics_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/buffered_metrics_context.py index 7e435b9e6..ef33e7195 100644 --- a/datadog/dogstatsd/buffered_metrics_context.py +++ b/datadog/dogstatsd/buffered_metrics_context.py @@ -39,7 +39,7 @@ def should_sample(self, rate): """Determine if a sample should be kept based on the specified rate.""" if rate >= 1: return True - return random.random() < rate # Replace `secrets` with `random` + return random.random() < rate def get_nb_context(self): """Return the number of contexts.""" From 6aa02433b752c91786d92a2e82ae0056b45b764e Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 12 Dec 2024 15:48:11 -0500 Subject: [PATCH 16/89] update tests --- tests/unit/dogstatsd/test_buffered_metrics.py | 77 +++++++------------ 1 file changed, 28 insertions(+), 49 deletions(-) diff --git a/tests/unit/dogstatsd/test_buffered_metrics.py b/tests/unit/dogstatsd/test_buffered_metrics.py index 8cc6ca7fc..75f064c24 100644 --- a/tests/unit/dogstatsd/test_buffered_metrics.py +++ b/tests/unit/dogstatsd/test_buffered_metrics.py @@ -5,69 +5,55 @@ class TestBufferedMetric(unittest.TestCase): def test_new_histogram_metric(self): - s = HistogramMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) - self.assertEqual(s.data, [1.0]) + s = HistogramMetric(name="test", tags="tag1,tag2", max_metric_samples=0, rate=1.0) self.assertEqual(s.name, "test") self.assertEqual(s.tags, "tag1,tag2") self.assertEqual(s.specified_rate, 1.0) self.assertEqual(s.metric_type, MetricType.HISTOGRAM) def test_histogram_metric_sample(self): - s = HistogramMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) + s = HistogramMetric(name="test", tags="tag1,tag2", rate=1.0, max_metric_samples=0) s.sample(123.45) - self.assertEqual(s.data, [1.0, 123.45]) + self.assertEqual(s.data, [123.45]) self.assertEqual(s.name, "test") self.assertEqual(s.tags, "tag1,tag2") self.assertEqual(s.specified_rate, 1.0) self.assertEqual(s.metric_type, MetricType.HISTOGRAM) def test_flush_histogram_metric_sample(self): - s = HistogramMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) - m = s.flush() - self.assertEqual(m['metric_type'], MetricType.HISTOGRAM) - self.assertEqual(m['values'], [1.0]) - self.assertEqual(m['name'], "test") - self.assertEqual(m['tags'], "tag1,tag2") + s = HistogramMetric(name="test", tags="tag1,tag2", rate=1.0, max_metric_samples=0) s.sample(21) - s.sample(123.45) - m = s.flush() - self.assertEqual(m['metric_type'], MetricType.HISTOGRAM) - self.assertEqual(m['values'], [1.0, 21.0, 123.45]) - self.assertEqual(m['name'], "test") - self.assertEqual(m['rate'], 1.0) - self.assertEqual(m['tags'], "tag1,tag2") + m = s.flush()[0] + self.assertEqual(m.metric_type, MetricType.HISTOGRAM) + self.assertEqual(m.value, 21.0) + self.assertEqual(m.name, "test") + self.assertEqual(m.rate, 1.0) + self.assertEqual(m.tags, "tag1,tag2") def test_new_distribution_metric(self): - s = DistributionMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) - self.assertEqual(s.data, [1.0]) + s = DistributionMetric(name="test", tags="tag1,tag2", max_metric_samples=0, rate=1.0) self.assertEqual(s.name, "test") self.assertEqual(s.tags, "tag1,tag2") + self.assertEqual(s.specified_rate, 1.0) self.assertEqual(s.metric_type, MetricType.DISTRIBUTION) def test_distribution_metric_sample(self): - s = DistributionMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) + s = DistributionMetric(name="test", tags="tag1,tag2", max_metric_samples=0, rate=1.0) s.sample(123.45) - self.assertEqual(s.data, [1.0, 123.45]) + self.assertEqual(s.data, [123.45]) self.assertEqual(s.name, "test") self.assertEqual(s.tags, "tag1,tag2") self.assertEqual(s.metric_type, MetricType.DISTRIBUTION) def test_flush_distribution_metric_sample(self): - s = DistributionMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) - m = s.flush() - self.assertEqual(m['metric_type'], MetricType.DISTRIBUTION) - self.assertEqual(m['values'], [1.0]) - self.assertEqual(m['name'], "test") - self.assertEqual(m['tags'], "tag1,tag2") - - s.sample(21) + s = DistributionMetric(name="test", tags="tag1,tag2", max_metric_samples=0, rate=1.0) s.sample(123.45) - m = s.flush() - self.assertEqual(m['metric_type'], MetricType.DISTRIBUTION) - self.assertEqual(m['values'], [1.0, 21.0, 123.45]) - self.assertEqual(m['name'], "test") - self.assertEqual(m['tags'], "tag1,tag2") + m = s.flush()[0] + self.assertEqual(m.metric_type, MetricType.DISTRIBUTION) + self.assertEqual(m.value, 123.45) + self.assertEqual(m.name, "test") + self.assertEqual(m.tags, "tag1,tag2") def test_new_timing_metric(self): s = TimingMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) @@ -77,28 +63,21 @@ def test_new_timing_metric(self): self.assertEqual(s.metric_type, MetricType.TIMING) def test_timing_metric_sample(self): - s = TimingMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) + s = TimingMetric(name="test", tags="tag1,tag2", max_metric_samples=0, rate=1.0) s.sample(123.45) - self.assertEqual(s.data, [1.0, 123.45]) + self.assertEqual(s.data, [123.45]) self.assertEqual(s.name, "test") self.assertEqual(s.tags, "tag1,tag2") self.assertEqual(s.metric_type, MetricType.TIMING) def test_flush_timing_metric_sample(self): - s = TimingMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) - m = s.flush() - self.assertEqual(m['metric_type'], MetricType.TIMING) - self.assertEqual(m['values'], [1.0]) - self.assertEqual(m['name'], "test") - self.assertEqual(m['tags'], "tag1,tag2") - - s.sample(21) + s = TimingMetric(name="test", tags="tag1,tag2", max_metric_samples=0, rate=1.0) s.sample(123.45) - m = s.flush() - self.assertEqual(m['metric_type'], MetricType.TIMING) - self.assertEqual(m['values'], [1.0, 21.0, 123.45]) - self.assertEqual(m['name'], "test") - self.assertEqual(m['tags'], "tag1,tag2") + m = s.flush()[0] + self.assertEqual(m.metric_type, MetricType.TIMING) + self.assertEqual(m.value, 123.45) + self.assertEqual(m.name, "test") + self.assertEqual(m.tags, "tag1,tag2") if __name__ == '__main__': unittest.main() From 58af7c0cc91c4566b164a2f325199630c4f7ac04 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 12 Dec 2024 15:49:59 -0500 Subject: [PATCH 17/89] lint --- datadog/dogstatsd/aggregator.py | 2 -- datadog/dogstatsd/buffered_metrics_context.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index 87ea9218d..60484dacd 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -113,5 +113,3 @@ def add_buffered_metric( context_key = self.get_context(name, tags) metric_context = self.buffered_metrics_map[metric_type] return metric_context.sample(name, value, tags, rate, context_key) - - diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/buffered_metrics_context.py index ef33e7195..12a9674d4 100644 --- a/datadog/dogstatsd/buffered_metrics_context.py +++ b/datadog/dogstatsd/buffered_metrics_context.py @@ -39,7 +39,7 @@ def should_sample(self, rate): """Determine if a sample should be kept based on the specified rate.""" if rate >= 1: return True - return random.random() < rate + return random.random() < rate def get_nb_context(self): """Return the number of contexts.""" From f5a4cd11b4e9535de974a74d9e4612787227466d Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 12 Dec 2024 15:54:18 -0500 Subject: [PATCH 18/89] lint --- datadog/dogstatsd/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index b2d60e5af..ff0aa45bd 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -1129,7 +1129,7 @@ def _report(self, metric, metric_type, value, tags, sample_rate, timestamp=0): ) # Send it - if metric_type == MetricType.DISTRIBUTION or metric_type == MetricType.HISTOGRAM or metric_type == MetricType.TIMING: + if metric_type in {MetricType.DISTRIBUTION, MetricType.HISTOGRAM, MetricType.TIMING}: self._send_to_buffer(payload) else: self._send(payload) From 3725fca1fe610bad33c5d54d58edaf3cb0440cef Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 12 Dec 2024 15:57:42 -0500 Subject: [PATCH 19/89] update test --- tests/unit/dogstatsd/test_buffered_metrics.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/unit/dogstatsd/test_buffered_metrics.py b/tests/unit/dogstatsd/test_buffered_metrics.py index 75f064c24..bbf028ca4 100644 --- a/tests/unit/dogstatsd/test_buffered_metrics.py +++ b/tests/unit/dogstatsd/test_buffered_metrics.py @@ -56,8 +56,7 @@ def test_flush_distribution_metric_sample(self): self.assertEqual(m.tags, "tag1,tag2") def test_new_timing_metric(self): - s = TimingMetric(name="test", value=1.0, tags="tag1,tag2", max_metric_samples=0, rate=1.0) - self.assertEqual(s.data, [1.0]) + s = TimingMetric(name="test", tags="tag1,tag2", max_metric_samples=0, rate=1.0) self.assertEqual(s.name, "test") self.assertEqual(s.tags, "tag1,tag2") self.assertEqual(s.metric_type, MetricType.TIMING) From 2f8c3fe9f4c97f9e2c356128de5c6169747df65b Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 12 Dec 2024 16:15:09 -0500 Subject: [PATCH 20/89] test --- datadog/dogstatsd/base.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index ff0aa45bd..87003dca2 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -562,7 +562,7 @@ def _flush_thread_loop(self, flush_interval): if not self._disable_aggregation: self.flush_aggregated_metrics() # Histograms, Distribution and Timing metrics are not aggregated - self.flush_buffered_metrics() + # self.flush_buffered_metrics() if not self._disable_buffering: self.flush_buffered_metrics() self._flush_thread = threading.Thread( @@ -1129,10 +1129,12 @@ def _report(self, metric, metric_type, value, tags, sample_rate, timestamp=0): ) # Send it - if metric_type in {MetricType.DISTRIBUTION, MetricType.HISTOGRAM, MetricType.TIMING}: - self._send_to_buffer(payload) - else: - self._send(payload) + # if metric_type in {MetricType.DISTRIBUTION, MetricType.HISTOGRAM, MetricType.TIMING}: + # self._send_to_buffer(payload) + # else: + # self._send(payload) + + self._send(payload) def _reset_telemetry(self): self.metrics_count = 0 From 26255de37adb887b888c9acbcbf681a13a4b92a1 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 13 Dec 2024 10:31:12 -0500 Subject: [PATCH 21/89] test --- datadog/dogstatsd/aggregator.py | 13 +++---------- datadog/dogstatsd/base.py | 7 +++++++ tests/unit/dogstatsd/test_aggregator.py | 1 + 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index 60484dacd..ead684209 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -42,23 +42,16 @@ def flush_aggregated_metrics(self): self.metrics_map[metric_type] = {} for metric in current_metrics.values(): metrics.extend(metric.get_data() if isinstance(metric, SetMetric) else [metric]) - - for metric_type in self.buffered_metrics_map.keys(): - with self._locks[metric_type]: - metric_context = self.buffered_metrics_map[metric_type] - self.buffered_metrics_map[metric_type] = {} - for metricList in metric_context.flush(): - metrics.extend(metricList) return metrics def flush_aggregated_buffered_metrics(self): metrics = [] for metric_type in self.buffered_metrics_map.keys(): with self._locks[metric_type]: - current_metrics = self.buffered_metrics_map[metric_type] + metric_context = self.buffered_metrics_map[metric_type] self.buffered_metrics_map[metric_type] = {} - for metric in current_metrics.values(): - metrics.append(metric) + for metricList in metric_context.flush(): + metrics.extend(metricList) return metrics def get_context(self, name, tags): diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 87003dca2..c3c6ef742 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -828,6 +828,13 @@ def flush_aggregated_metrics(self): for m in metrics: self._report(m.name, m.metric_type, m.value, m.tags, m.rate, m.timestamp) + buffered_metrics = self.aggregator.flush_aggregated_buffered_metrics() + send_method = self._send + self._send = self._send_to_buffer + for m in buffered_metrics: + self._report(m.name, m.metric_type, m.value, m.tags, m.rate, m.timestamp) + self._send = send_method + def gauge( self, metric, # type: Text diff --git a/tests/unit/dogstatsd/test_aggregator.py b/tests/unit/dogstatsd/test_aggregator.py index b5220564e..235b13c3d 100644 --- a/tests/unit/dogstatsd/test_aggregator.py +++ b/tests/unit/dogstatsd/test_aggregator.py @@ -63,6 +63,7 @@ def test_aggregator_flush(self): self.aggregator.timing("timingTest2", 23, tags, 1) metrics = self.aggregator.flush_aggregated_metrics() + metrics.extend(self.aggregator.flush_aggregated_buffered_metrics()) self.assertEqual(len(self.aggregator.metrics_map[MetricType.GAUGE]), 0) self.assertEqual(len(self.aggregator.metrics_map[MetricType.COUNT]), 0) self.assertEqual(len(self.aggregator.metrics_map[MetricType.SET]), 0) From 44352ace0e0d5e07cb1025d22e50e762a88901b8 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 13 Dec 2024 10:34:31 -0500 Subject: [PATCH 22/89] test2 --- datadog/dogstatsd/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index c3c6ef742..08878287f 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -562,7 +562,7 @@ def _flush_thread_loop(self, flush_interval): if not self._disable_aggregation: self.flush_aggregated_metrics() # Histograms, Distribution and Timing metrics are not aggregated - # self.flush_buffered_metrics() + self.flush_buffered_metrics() if not self._disable_buffering: self.flush_buffered_metrics() self._flush_thread = threading.Thread( From 02e3b23b6501a21311e24a6d4a895e4a142151ab Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 13 Dec 2024 10:40:21 -0500 Subject: [PATCH 23/89] remove comment --- datadog/dogstatsd/base.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 08878287f..71894c351 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -1136,11 +1136,6 @@ def _report(self, metric, metric_type, value, tags, sample_rate, timestamp=0): ) # Send it - # if metric_type in {MetricType.DISTRIBUTION, MetricType.HISTOGRAM, MetricType.TIMING}: - # self._send_to_buffer(payload) - # else: - # self._send(payload) - self._send(payload) def _reset_telemetry(self): From 148ef17b3932bcf630b7139d2ffd0cf0dad386f1 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 13 Dec 2024 12:57:50 -0500 Subject: [PATCH 24/89] base.py uses aggregator --- datadog/dogstatsd/base.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 71894c351..c5da91d9a 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -969,7 +969,11 @@ def histogram( >>> statsd.histogram("uploaded.file.size", 1445) >>> statsd.histogram("album.photo.count", 26, tags=["gender:female"]) """ - self._report(metric, "h", value, tags, sample_rate) + if self._disable_aggregation: + self._report(metric, "h", value, tags, sample_rate) + else: + self.aggregator.histogram(metric, value, tags, sample_rate) + def distribution( self, @@ -984,7 +988,11 @@ def distribution( >>> statsd.distribution("uploaded.file.size", 1445) >>> statsd.distribution("album.photo.count", 26, tags=["gender:female"]) """ - self._report(metric, "d", value, tags, sample_rate) + if self._disable_aggregation: + self._report(metric, "d", value, tags, sample_rate) + else: + self.aggregator.distribution(metric, value, tags, sample_rate) + def timing( self, From c5d956361ab3c717477dccf6c2ce9a967490970d Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 13 Dec 2024 12:59:20 -0500 Subject: [PATCH 25/89] lint --- datadog/dogstatsd/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index c5da91d9a..5a29c1718 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -992,7 +992,6 @@ def distribution( self._report(metric, "d", value, tags, sample_rate) else: self.aggregator.distribution(metric, value, tags, sample_rate) - def timing( self, From 08091d52e4ce0ee865b4c03e0222771b7ed3f2ec Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 13 Dec 2024 13:23:22 -0500 Subject: [PATCH 26/89] lint --- datadog/dogstatsd/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 5a29c1718..532f5eef2 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -973,7 +973,6 @@ def histogram( self._report(metric, "h", value, tags, sample_rate) else: self.aggregator.histogram(metric, value, tags, sample_rate) - def distribution( self, From b482e72e8447ef12de8cb927602dd1ddfbccb562 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 13 Dec 2024 13:27:48 -0500 Subject: [PATCH 27/89] timing metric can be aggregated --- datadog/dogstatsd/base.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 532f5eef2..64a287f77 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -1004,7 +1004,12 @@ def timing( >>> statsd.timing("query.response.time", 1234) """ - self._report(metric, "ms", value, tags, sample_rate) + + if self._disable_aggregation: + self._report(metric, "ms", value, tags, sample_rate) + else: + self.aggregator.timing(metric, value, tags, sample_rate) + def timed(self, metric=None, tags=None, sample_rate=None, use_ms=None): """ From 3f9168a2b8b07e85e97ab5a4f17855387ddca165 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 13 Dec 2024 13:28:04 -0500 Subject: [PATCH 28/89] lint --- datadog/dogstatsd/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 64a287f77..0c055f480 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -1008,8 +1008,7 @@ def timing( if self._disable_aggregation: self._report(metric, "ms", value, tags, sample_rate) else: - self.aggregator.timing(metric, value, tags, sample_rate) - + self.aggregator.timing(metric, value, tags, sample_rate) def timed(self, metric=None, tags=None, sample_rate=None, use_ms=None): """ From a46b5d2381e2a3e9b46439487f2d04c9f4bb4ee8 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 13 Dec 2024 13:36:50 -0500 Subject: [PATCH 29/89] lint --- datadog/dogstatsd/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 0c055f480..59e31987d 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -1008,7 +1008,7 @@ def timing( if self._disable_aggregation: self._report(metric, "ms", value, tags, sample_rate) else: - self.aggregator.timing(metric, value, tags, sample_rate) + self.aggregator.timing(metric, value, tags, sample_rate) def timed(self, metric=None, tags=None, sample_rate=None, use_ms=None): """ From 1e6d2132fbac3e1192399f1e69ffb54b98ac9a9b Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Sun, 15 Dec 2024 15:16:17 -0500 Subject: [PATCH 30/89] remove prints --- datadog/dogstatsd/buffered_metrics.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/datadog/dogstatsd/buffered_metrics.py b/datadog/dogstatsd/buffered_metrics.py index a92277b31..d59a32206 100644 --- a/datadog/dogstatsd/buffered_metrics.py +++ b/datadog/dogstatsd/buffered_metrics.py @@ -20,21 +20,16 @@ def sample(self, value): self.total_metric_samples += 1 def maybe_keep_sample(self, value): - print("max metric samples is ", self.max_metric_samples) - print("stored metric samples is ", self.stored_metric_samples) if self.max_metric_samples > 0: if self.stored_metric_samples >= self.max_metric_samples: i = random.randint(0, self.total_metric_samples - 1) if i < self.max_metric_samples: - print("REPLACE") self.data[i] = value else: - print("APPEND") self.data.append(value) self.stored_metric_samples += 1 self.total_metric_samples += 1 else: - print("APPEND2") self.sample(value) def skip_sample(self): From 5bbf6c50943f7f0012e51a076fc167f75997afd8 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Sun, 15 Dec 2024 15:26:29 -0500 Subject: [PATCH 31/89] add test for testing maybe_keep_sample --- datadog/dogstatsd/buffered_metrics.py | 4 ++-- tests/unit/dogstatsd/test_buffered_metrics.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/datadog/dogstatsd/buffered_metrics.py b/datadog/dogstatsd/buffered_metrics.py index d59a32206..5e6da675c 100644 --- a/datadog/dogstatsd/buffered_metrics.py +++ b/datadog/dogstatsd/buffered_metrics.py @@ -11,8 +11,8 @@ def __init__(self, name, tags, metric_type, specified_rate=1.0, max_metric_sampl self.max_metric_samples = max_metric_samples self.specified_rate = specified_rate self.data = [] - self.stored_metric_samples = 1 - self.total_metric_samples = 1 + self.stored_metric_samples = 0 + self.total_metric_samples = 0 def sample(self, value): self.data.append(value) diff --git a/tests/unit/dogstatsd/test_buffered_metrics.py b/tests/unit/dogstatsd/test_buffered_metrics.py index bbf028ca4..ecf4114b5 100644 --- a/tests/unit/dogstatsd/test_buffered_metrics.py +++ b/tests/unit/dogstatsd/test_buffered_metrics.py @@ -78,5 +78,17 @@ def test_flush_timing_metric_sample(self): self.assertEqual(m.name, "test") self.assertEqual(m.tags, "tag1,tag2") + def test_maybe_keep_sample(self): + s = HistogramMetric(name="test", tags="tag1,tag2", rate=1.0, max_metric_samples=2) + s.maybe_keep_sample(123) + s.maybe_keep_sample(456) + s.maybe_keep_sample(789) + self.assertEqual(len(s.data), 2) + self.assertFalse(123 in s.data and 456 in s.data) + self.assertEqual(s.name, "test") + self.assertEqual(s.tags, "tag1,tag2") + self.assertEqual(s.specified_rate, 1.0) + self.assertEqual(s.metric_type, MetricType.HISTOGRAM) + if __name__ == '__main__': unittest.main() From 6809466a023bf54d32ee73dbe903843f009d6274 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Sun, 15 Dec 2024 15:50:49 -0500 Subject: [PATCH 32/89] fix flushing logic --- datadog/dogstatsd/aggregator.py | 1 - datadog/dogstatsd/buffered_metrics_context.py | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index ead684209..c8c782f2b 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -49,7 +49,6 @@ def flush_aggregated_buffered_metrics(self): for metric_type in self.buffered_metrics_map.keys(): with self._locks[metric_type]: metric_context = self.buffered_metrics_map[metric_type] - self.buffered_metrics_map[metric_type] = {} for metricList in metric_context.flush(): metrics.extend(metricList) return metrics diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/buffered_metrics_context.py index 12a9674d4..c4f900e39 100644 --- a/datadog/dogstatsd/buffered_metrics_context.py +++ b/datadog/dogstatsd/buffered_metrics_context.py @@ -13,13 +13,13 @@ def flush(self): metrics = [] """Flush the metrics and reset the stored values.""" with self.lock: - values = self.values.copy() + copiedValues = self.values.copy() self.values.clear() - - for _, metric in values.items(): + self.values = {} + for _, metric in copiedValues.items(): metrics.append(metric.flush()) - self.nb_context += len(values) + self.nb_context += len(copiedValues) return metrics def sample(self, name, value, tags, rate, context_key): From 4039b6b0bdb17e0015cc2e9e135c8a963f75c6d4 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Sun, 15 Dec 2024 15:56:39 -0500 Subject: [PATCH 33/89] fix tests --- tests/unit/dogstatsd/test_aggregator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/dogstatsd/test_aggregator.py b/tests/unit/dogstatsd/test_aggregator.py index 235b13c3d..4562b1340 100644 --- a/tests/unit/dogstatsd/test_aggregator.py +++ b/tests/unit/dogstatsd/test_aggregator.py @@ -67,9 +67,9 @@ def test_aggregator_flush(self): self.assertEqual(len(self.aggregator.metrics_map[MetricType.GAUGE]), 0) self.assertEqual(len(self.aggregator.metrics_map[MetricType.COUNT]), 0) self.assertEqual(len(self.aggregator.metrics_map[MetricType.SET]), 0) - self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.HISTOGRAM]), 0) - self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.DISTRIBUTION]), 0) - self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.TIMING]), 0) + self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.HISTOGRAM].values), 0) + self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.DISTRIBUTION].values), 0) + self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.TIMING].values), 0) self.assertEqual(len(metrics), 16) metrics.sort(key=lambda m: (m.metric_type, m.name, m.value)) From 21b5e089c4b7c01e1b0db15b5ed25675dfd61bb5 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Sun, 15 Dec 2024 16:11:44 -0500 Subject: [PATCH 34/89] explictly check if rate is none --- datadog/dogstatsd/aggregator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index c8c782f2b..89ae8eccf 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -102,6 +102,8 @@ def timing(self, name, value, tags, rate): def add_buffered_metric( self, metric_type, name, value, tags, rate ): + if rate is None: + rate = 1 context_key = self.get_context(name, tags) metric_context = self.buffered_metrics_map[metric_type] return metric_context.sample(name, value, tags, rate, context_key) From 50337ba3fa7afff89fc400f59405ccbeae0a6e47 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 16 Dec 2024 23:28:01 -0500 Subject: [PATCH 35/89] add test for buffered metrics --- tests/unit/dogstatsd/test_statsd.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/unit/dogstatsd/test_statsd.py b/tests/unit/dogstatsd/test_statsd.py index b9a24cfc1..916108215 100644 --- a/tests/unit/dogstatsd/test_statsd.py +++ b/tests/unit/dogstatsd/test_statsd.py @@ -1116,6 +1116,24 @@ def test_aggregation_buffering_simultaneously_with_interval(self): fake_socket.recv(2, no_wait=True) ) + def test_buffered_metric_aggregation(self): + dogstatsd = DogStatsd(disable_buffering=True, disable_aggregation=False, telemetry_min_flush_interval=0) + fake_socket = FakeSocket() + dogstatsd.socket = fake_socket + dogstatsd.histogram("test.histogram_aggregation", 1) + dogstatsd.distribution("test.distribution_aggregation", 2) + dogstatsd.timing("test.timing_aggregation", 3) + dogstatsd._start_flush_thread() + time.sleep(dogstatsd._flush_interval / 2) + # Ensure that packets didn't arrive immediately for buffered_metrics + self.assertIsNone(dogstatsd.socket.recv(2, no_wait=True)) + time.sleep(dogstatsd._flush_interval) + packet = dogstatsd.socket.recv(2, no_wait=True) + h_metric = "test.histogram_aggregation:1|h" + d_metric = "test.distribution_aggregation:2|d" + t_metric = "test.timing_aggregation:3|ms" + self.assertTrue(h_metric in packet and d_metric in packet and t_metric in packet) + def test_disable_buffering(self): dogstatsd = DogStatsd(disable_buffering=True, telemetry_min_flush_interval=0) fake_socket = FakeSocket() From fe0c521f8415cb41f8bcba145104d1e171c4afe0 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 17 Dec 2024 00:09:03 -0500 Subject: [PATCH 36/89] rerun test --- tests/unit/dogstatsd/test_statsd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/dogstatsd/test_statsd.py b/tests/unit/dogstatsd/test_statsd.py index 916108215..865ccd2c3 100644 --- a/tests/unit/dogstatsd/test_statsd.py +++ b/tests/unit/dogstatsd/test_statsd.py @@ -1127,6 +1127,7 @@ def test_buffered_metric_aggregation(self): time.sleep(dogstatsd._flush_interval / 2) # Ensure that packets didn't arrive immediately for buffered_metrics self.assertIsNone(dogstatsd.socket.recv(2, no_wait=True)) + time.sleep(dogstatsd._flush_interval) packet = dogstatsd.socket.recv(2, no_wait=True) h_metric = "test.histogram_aggregation:1|h" From 4dcee39949f5e716853ff36a9036eb772eb1e2c3 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 17 Dec 2024 00:15:13 -0500 Subject: [PATCH 37/89] rerun tests 3x --- tests/unit/dogstatsd/test_statsd.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/unit/dogstatsd/test_statsd.py b/tests/unit/dogstatsd/test_statsd.py index 865ccd2c3..1348e4552 100644 --- a/tests/unit/dogstatsd/test_statsd.py +++ b/tests/unit/dogstatsd/test_statsd.py @@ -1127,13 +1127,13 @@ def test_buffered_metric_aggregation(self): time.sleep(dogstatsd._flush_interval / 2) # Ensure that packets didn't arrive immediately for buffered_metrics self.assertIsNone(dogstatsd.socket.recv(2, no_wait=True)) - - time.sleep(dogstatsd._flush_interval) - packet = dogstatsd.socket.recv(2, no_wait=True) - h_metric = "test.histogram_aggregation:1|h" - d_metric = "test.distribution_aggregation:2|d" - t_metric = "test.timing_aggregation:3|ms" - self.assertTrue(h_metric in packet and d_metric in packet and t_metric in packet) + + # time.sleep(dogstatsd._flush_interval) + # packet = dogstatsd.socket.recv(2, no_wait=True) + # h_metric = "test.histogram_aggregation:1|h" + # d_metric = "test.distribution_aggregation:2|d" + # t_metric = "test.timing_aggregation:3|ms" + # self.assertTrue(h_metric in packet and d_metric in packet and t_metric in packet) def test_disable_buffering(self): dogstatsd = DogStatsd(disable_buffering=True, telemetry_min_flush_interval=0) From 0444e99ae56d389a714b6b8b9ab75c728acf2ff9 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 17 Dec 2024 00:15:57 -0500 Subject: [PATCH 38/89] rerun tests x4 --- tests/unit/dogstatsd/test_statsd.py | 36 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/unit/dogstatsd/test_statsd.py b/tests/unit/dogstatsd/test_statsd.py index 1348e4552..9eadfc2a8 100644 --- a/tests/unit/dogstatsd/test_statsd.py +++ b/tests/unit/dogstatsd/test_statsd.py @@ -1116,24 +1116,24 @@ def test_aggregation_buffering_simultaneously_with_interval(self): fake_socket.recv(2, no_wait=True) ) - def test_buffered_metric_aggregation(self): - dogstatsd = DogStatsd(disable_buffering=True, disable_aggregation=False, telemetry_min_flush_interval=0) - fake_socket = FakeSocket() - dogstatsd.socket = fake_socket - dogstatsd.histogram("test.histogram_aggregation", 1) - dogstatsd.distribution("test.distribution_aggregation", 2) - dogstatsd.timing("test.timing_aggregation", 3) - dogstatsd._start_flush_thread() - time.sleep(dogstatsd._flush_interval / 2) - # Ensure that packets didn't arrive immediately for buffered_metrics - self.assertIsNone(dogstatsd.socket.recv(2, no_wait=True)) - - # time.sleep(dogstatsd._flush_interval) - # packet = dogstatsd.socket.recv(2, no_wait=True) - # h_metric = "test.histogram_aggregation:1|h" - # d_metric = "test.distribution_aggregation:2|d" - # t_metric = "test.timing_aggregation:3|ms" - # self.assertTrue(h_metric in packet and d_metric in packet and t_metric in packet) + # def test_buffered_metric_aggregation(self): + # dogstatsd = DogStatsd(disable_buffering=True, disable_aggregation=False, telemetry_min_flush_interval=0) + # fake_socket = FakeSocket() + # dogstatsd.socket = fake_socket + # dogstatsd.histogram("test.histogram_aggregation", 1) + # dogstatsd.distribution("test.distribution_aggregation", 2) + # dogstatsd.timing("test.timing_aggregation", 3) + # dogstatsd._start_flush_thread() + # time.sleep(dogstatsd._flush_interval / 2) + # # Ensure that packets didn't arrive immediately for buffered_metrics + # self.assertIsNone(dogstatsd.socket.recv(2, no_wait=True)) + + # time.sleep(dogstatsd._flush_interval) + # packet = dogstatsd.socket.recv(2, no_wait=True) + # h_metric = "test.histogram_aggregation:1|h" + # d_metric = "test.distribution_aggregation:2|d" + # t_metric = "test.timing_aggregation:3|ms" + # self.assertTrue(h_metric in packet and d_metric in packet and t_metric in packet) def test_disable_buffering(self): dogstatsd = DogStatsd(disable_buffering=True, telemetry_min_flush_interval=0) From fc87fa5a87dee4ea6ea7b70326de16559a29db49 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 17 Dec 2024 00:23:18 -0500 Subject: [PATCH 39/89] what --- tests/unit/dogstatsd/test_statsd.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tests/unit/dogstatsd/test_statsd.py b/tests/unit/dogstatsd/test_statsd.py index 9eadfc2a8..b9a24cfc1 100644 --- a/tests/unit/dogstatsd/test_statsd.py +++ b/tests/unit/dogstatsd/test_statsd.py @@ -1116,25 +1116,6 @@ def test_aggregation_buffering_simultaneously_with_interval(self): fake_socket.recv(2, no_wait=True) ) - # def test_buffered_metric_aggregation(self): - # dogstatsd = DogStatsd(disable_buffering=True, disable_aggregation=False, telemetry_min_flush_interval=0) - # fake_socket = FakeSocket() - # dogstatsd.socket = fake_socket - # dogstatsd.histogram("test.histogram_aggregation", 1) - # dogstatsd.distribution("test.distribution_aggregation", 2) - # dogstatsd.timing("test.timing_aggregation", 3) - # dogstatsd._start_flush_thread() - # time.sleep(dogstatsd._flush_interval / 2) - # # Ensure that packets didn't arrive immediately for buffered_metrics - # self.assertIsNone(dogstatsd.socket.recv(2, no_wait=True)) - - # time.sleep(dogstatsd._flush_interval) - # packet = dogstatsd.socket.recv(2, no_wait=True) - # h_metric = "test.histogram_aggregation:1|h" - # d_metric = "test.distribution_aggregation:2|d" - # t_metric = "test.timing_aggregation:3|ms" - # self.assertTrue(h_metric in packet and d_metric in packet and t_metric in packet) - def test_disable_buffering(self): dogstatsd = DogStatsd(disable_buffering=True, telemetry_min_flush_interval=0) fake_socket = FakeSocket() From f0c4db05927bc8b6074738e1fb44ad98edfb206b Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 17 Dec 2024 00:26:08 -0500 Subject: [PATCH 40/89] ??? --- tests/unit/dogstatsd/test_statsd.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/unit/dogstatsd/test_statsd.py b/tests/unit/dogstatsd/test_statsd.py index b9a24cfc1..865ccd2c3 100644 --- a/tests/unit/dogstatsd/test_statsd.py +++ b/tests/unit/dogstatsd/test_statsd.py @@ -1116,6 +1116,25 @@ def test_aggregation_buffering_simultaneously_with_interval(self): fake_socket.recv(2, no_wait=True) ) + def test_buffered_metric_aggregation(self): + dogstatsd = DogStatsd(disable_buffering=True, disable_aggregation=False, telemetry_min_flush_interval=0) + fake_socket = FakeSocket() + dogstatsd.socket = fake_socket + dogstatsd.histogram("test.histogram_aggregation", 1) + dogstatsd.distribution("test.distribution_aggregation", 2) + dogstatsd.timing("test.timing_aggregation", 3) + dogstatsd._start_flush_thread() + time.sleep(dogstatsd._flush_interval / 2) + # Ensure that packets didn't arrive immediately for buffered_metrics + self.assertIsNone(dogstatsd.socket.recv(2, no_wait=True)) + + time.sleep(dogstatsd._flush_interval) + packet = dogstatsd.socket.recv(2, no_wait=True) + h_metric = "test.histogram_aggregation:1|h" + d_metric = "test.distribution_aggregation:2|d" + t_metric = "test.timing_aggregation:3|ms" + self.assertTrue(h_metric in packet and d_metric in packet and t_metric in packet) + def test_disable_buffering(self): dogstatsd = DogStatsd(disable_buffering=True, telemetry_min_flush_interval=0) fake_socket = FakeSocket() From e7b62d26a43b727f70377bc931e1aa73d9b2a7f3 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 17 Dec 2024 13:05:32 -0500 Subject: [PATCH 41/89] rerun tests --- tests/unit/dogstatsd/test_statsd.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/dogstatsd/test_statsd.py b/tests/unit/dogstatsd/test_statsd.py index 865ccd2c3..916108215 100644 --- a/tests/unit/dogstatsd/test_statsd.py +++ b/tests/unit/dogstatsd/test_statsd.py @@ -1127,7 +1127,6 @@ def test_buffered_metric_aggregation(self): time.sleep(dogstatsd._flush_interval / 2) # Ensure that packets didn't arrive immediately for buffered_metrics self.assertIsNone(dogstatsd.socket.recv(2, no_wait=True)) - time.sleep(dogstatsd._flush_interval) packet = dogstatsd.socket.recv(2, no_wait=True) h_metric = "test.histogram_aggregation:1|h" From 9d4f24a27e7ba810d25f0436cf907f29b1e4dd69 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 18 Dec 2024 09:18:40 -0500 Subject: [PATCH 42/89] rerun tests --- tests/unit/dogstatsd/test_statsd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/dogstatsd/test_statsd.py b/tests/unit/dogstatsd/test_statsd.py index 916108215..865ccd2c3 100644 --- a/tests/unit/dogstatsd/test_statsd.py +++ b/tests/unit/dogstatsd/test_statsd.py @@ -1127,6 +1127,7 @@ def test_buffered_metric_aggregation(self): time.sleep(dogstatsd._flush_interval / 2) # Ensure that packets didn't arrive immediately for buffered_metrics self.assertIsNone(dogstatsd.socket.recv(2, no_wait=True)) + time.sleep(dogstatsd._flush_interval) packet = dogstatsd.socket.recv(2, no_wait=True) h_metric = "test.histogram_aggregation:1|h" From 620ea1e74bb2b93c8b8dbddb0b3782cdbf8aba79 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 18 Dec 2024 14:12:50 -0500 Subject: [PATCH 43/89] remove unused function --- datadog/dogstatsd/buffered_metrics_context.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/buffered_metrics_context.py index c4f900e39..dfab7ecb2 100644 --- a/datadog/dogstatsd/buffered_metrics_context.py +++ b/datadog/dogstatsd/buffered_metrics_context.py @@ -40,7 +40,3 @@ def should_sample(self, rate): if rate >= 1: return True return random.random() < rate - - def get_nb_context(self): - """Return the number of contexts.""" - return self.nb_context From 7d78065aba0027479c0f52336180125d7742cab2 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Sun, 5 Jan 2025 13:23:23 -0500 Subject: [PATCH 44/89] add flag for enabling/disabling extended aggregation --- datadog/__init__.py | 9 ++++++++ datadog/dogstatsd/base.py | 44 ++++++++++++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/datadog/__init__.py b/datadog/__init__.py index dec93813a..a2f27021c 100644 --- a/datadog/__init__.py +++ b/datadog/__init__.py @@ -38,6 +38,7 @@ def initialize( statsd_host=None, # type: Optional[str] statsd_port=None, # type: Optional[int] statsd_disable_aggregation=True, # type: bool + statsd_disable_extended_aggregation=True, # type: bool statsd_disable_buffering=True, # type: bool statsd_aggregation_flush_interval=0.3, # type: float statsd_use_default_route=False, # type: bool @@ -82,6 +83,10 @@ def initialize( (default: True). :type statsd_disable_aggregation: boolean + :param statsd_disable_extended_aggregation: Enable/disable statsd client aggregation support for histograms, distributions and timing metrics + (default: True). + :type statsd_disable_extended_aggregation: boolean + :param statsd_aggregation_flush_interval: If aggregation is enabled, set the flush interval for aggregation/buffering (default: 0.3 seconds) @@ -143,6 +148,10 @@ def initialize( statsd.disable_aggregation() else: statsd.enable_aggregation(statsd_aggregation_flush_interval) + if statsd_disable_extended_aggregation: + statsd.disable_extended_aggregation() + else: + statsd.enable_extended_aggregation(statsd_aggregation_flush_interval) statsd.disable_buffering = statsd_disable_buffering api._return_raw_response = return_raw_response diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 59e31987d..a5dd890d9 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -147,6 +147,7 @@ def __init__( max_buffer_size=None, # type: None flush_interval=DEFAULT_BUFFERING_FLUSH_INTERVAL, # type: float disable_aggregation=True, # type: bool + disable_extended_aggregation=True, # type: bool disable_buffering=True, # type: bool namespace=None, # type: Optional[Text] constant_tags=None, # type: Optional[List[str]] @@ -236,7 +237,10 @@ def __init__( it overrides the default value. :type flush_interval: float - :disable_aggregation: If true, metrics (Count, Gauge, Set) are no longered aggregated by the client + :disable_aggregation: If true, metrics (Count, Gauge, Set) are no longer aggregated by the client + :type disable_aggregation: bool + + :disable_extended_aggregation: If true, metrics (Histogram, Distribution, Timing) are no longer aggregated by the client :type disable_aggregation: bool :disable_buffering: If set, metrics are no longered buffered by the client and @@ -446,6 +450,7 @@ def __init__( self._disable_buffering = disable_buffering self._disable_aggregation = disable_aggregation + self._disable_extended_aggregation = disable_extended_aggregation self._flush_interval = flush_interval self._flush_thread = None @@ -459,7 +464,7 @@ def __init__( else: self._send = self._send_to_server - if not self._disable_aggregation or not self._disable_buffering: + if not self._disable_aggregation or not self._disable_buffering or not self._disable_extended_aggregation: self._start_flush_thread() else: log.debug("Statsd buffering and aggregation is disabled") @@ -559,10 +564,8 @@ def _start_flush_thread(self): def _flush_thread_loop(self, flush_interval): while not self._flush_thread_stop.is_set(): time.sleep(flush_interval) - if not self._disable_aggregation: + if not self._disable_aggregation or not self._disable_extended_aggregation: self.flush_aggregated_metrics() - # Histograms, Distribution and Timing metrics are not aggregated - self.flush_buffered_metrics() if not self._disable_buffering: self.flush_buffered_metrics() self._flush_thread = threading.Thread( @@ -582,7 +585,7 @@ def _stop_flush_thread(self): if not self._flush_thread: return try: - if not self._disable_aggregation: + if not self._disable_aggregation or not self._disable_extended_aggregation: self.flush_aggregated_metrics() if not self.disable_buffering: self.flush_buffered_metrics() @@ -641,7 +644,7 @@ def disable_aggregation(self): # If aggregation and buffering has been disabled, flush and kill the background thread # otherwise start up the flushing thread and enable aggregation. - if self._disable_aggregation and self.disable_buffering: + if self._disable_aggregation and self._disable_extended_aggregation and self.disable_buffering: self._stop_flush_thread() log.debug("Statsd aggregation is disabled") @@ -655,6 +658,30 @@ def enable_aggregation(self, flush_interval=DEFAULT_BUFFERING_FLUSH_INTERVAL): self._send = self._send_to_server self._start_flush_thread() + def disable_extended_aggregation(self): + with self._config_lock: + # If the toggle didn't change anything, this method is a noop + if self._disable_extended_aggregation: + return + + self._disable_extended_aggregation = True + + # If aggregation and buffering has been disabled, flush and kill the background thread + # otherwise start up the flushing thread and enable aggregation. + if self._disable_aggregation and self._disable_extended_aggregation and self.disable_buffering: + self._stop_flush_thread() + log.debug("Statsd aggregation is disabled") + + def enable_extended_aggregation(self, flush_interval=DEFAULT_BUFFERING_FLUSH_INTERVAL): + with self._config_lock: + if not self._disable_aggregation: + return + self._disable_aggregation = False + self._flush_interval = flush_interval + if self._disable_buffering: + self._send = self._send_to_server + self._start_flush_thread() + @staticmethod def resolve_host(host, use_default_route): """ @@ -829,11 +856,8 @@ def flush_aggregated_metrics(self): self._report(m.name, m.metric_type, m.value, m.tags, m.rate, m.timestamp) buffered_metrics = self.aggregator.flush_aggregated_buffered_metrics() - send_method = self._send - self._send = self._send_to_buffer for m in buffered_metrics: self._report(m.name, m.metric_type, m.value, m.tags, m.rate, m.timestamp) - self._send = send_method def gauge( self, From 393201b375fdd7fa0dac544e10970d624102efcb Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Sun, 5 Jan 2025 14:31:17 -0500 Subject: [PATCH 45/89] make max_metric_sampels configurable --- datadog/dogstatsd/aggregator.py | 8 ++++---- datadog/dogstatsd/base.py | 6 +++++- datadog/dogstatsd/buffered_metrics_context.py | 5 +++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index 89ae8eccf..3ee7cadbc 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -14,16 +14,16 @@ class Aggregator(object): - def __init__(self): + def __init__(self, max_metric_samples=0): self.metrics_map = { MetricType.COUNT: {}, MetricType.GAUGE: {}, MetricType.SET: {}, } self.buffered_metrics_map = { - MetricType.HISTOGRAM: BufferedMetricContexts(HistogramMetric), - MetricType.DISTRIBUTION: BufferedMetricContexts(DistributionMetric), - MetricType.TIMING: BufferedMetricContexts(TimingMetric) + MetricType.HISTOGRAM: BufferedMetricContexts(HistogramMetric, max_metric_samples), + MetricType.DISTRIBUTION: BufferedMetricContexts(DistributionMetric, max_metric_samples), + MetricType.TIMING: BufferedMetricContexts(TimingMetric, max_metric_samples) } self._locks = { MetricType.COUNT: threading.RLock(), diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index a5dd890d9..68b19e592 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -161,6 +161,7 @@ def __init__( telemetry_port=None, # type: Union[str, int] telemetry_socket_path=None, # type: Text max_buffer_len=0, # type: int + max_metric_samples=0, # type: int container_id=None, # type: Optional[Text] origin_detection_enabled=True, # type: bool socket_timeout=0, # type: Optional[float] @@ -272,6 +273,9 @@ def __init__( depending on the connection type. :type max_buffer_len: integer + :param max_metric_samples: Maximum number of metric samples for buffered metrics (Histogram, Distribution, Timing) + :type max_metric_samples: integer + :param disable_telemetry: Should client telemetry be disabled :type disable_telemetry: boolean @@ -455,7 +459,7 @@ def __init__( self._flush_interval = flush_interval self._flush_thread = None self._flush_thread_stop = threading.Event() - self.aggregator = Aggregator() + self.aggregator = Aggregator(max_metric_samples) # Indicates if the process is about to fork, so we shouldn't start any new threads yet. self._forking = False diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/buffered_metrics_context.py index dfab7ecb2..b2be0068b 100644 --- a/datadog/dogstatsd/buffered_metrics_context.py +++ b/datadog/dogstatsd/buffered_metrics_context.py @@ -3,10 +3,11 @@ class BufferedMetricContexts: - def __init__(self, buffered_metric_type): + def __init__(self, buffered_metric_type, max_metric_samples=0): self.nb_context = 0 self.lock = Lock() self.values = {} + self.max_metric_samples = max_metric_samples self.buffered_metric_type = buffered_metric_type def flush(self): @@ -28,7 +29,7 @@ def sample(self, name, value, tags, rate, context_key): with self.lock: if context_key not in self.values: # Create a new metric if it doesn't exist - self.values[context_key] = self.buffered_metric_type(name, tags, rate) + self.values[context_key] = self.buffered_metric_type(name, tags, rate, self.max_metric_samples) metric = self.values[context_key] if keeping_sample: metric.maybe_keep_sample(value) From dd7e743a60d2e312fef4df1d6cc2559da3a89821 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Sun, 5 Jan 2025 14:32:40 -0500 Subject: [PATCH 46/89] remove unecessary lock --- datadog/dogstatsd/aggregator.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index 3ee7cadbc..f9ee56813 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -29,9 +29,6 @@ def __init__(self, max_metric_samples=0): MetricType.COUNT: threading.RLock(), MetricType.GAUGE: threading.RLock(), MetricType.SET: threading.RLock(), - MetricType.HISTOGRAM: threading.RLock(), - MetricType.DISTRIBUTION: threading.RLock(), - MetricType.TIMING: threading.RLock(), } def flush_aggregated_metrics(self): @@ -47,8 +44,7 @@ def flush_aggregated_metrics(self): def flush_aggregated_buffered_metrics(self): metrics = [] for metric_type in self.buffered_metrics_map.keys(): - with self._locks[metric_type]: - metric_context = self.buffered_metrics_map[metric_type] + metric_context = self.buffered_metrics_map[metric_type] for metricList in metric_context.flush(): metrics.extend(metricList) return metrics From a442722bd4d5baa815e93e63af8d83c09dd9013d Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 6 Jan 2025 15:52:40 -0500 Subject: [PATCH 47/89] remove extended aggregation --- datadog/dogstatsd/base.py | 37 ++++--------------------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 68b19e592..6dd4bd97a 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -147,7 +147,6 @@ def __init__( max_buffer_size=None, # type: None flush_interval=DEFAULT_BUFFERING_FLUSH_INTERVAL, # type: float disable_aggregation=True, # type: bool - disable_extended_aggregation=True, # type: bool disable_buffering=True, # type: bool namespace=None, # type: Optional[Text] constant_tags=None, # type: Optional[List[str]] @@ -241,9 +240,6 @@ def __init__( :disable_aggregation: If true, metrics (Count, Gauge, Set) are no longer aggregated by the client :type disable_aggregation: bool - :disable_extended_aggregation: If true, metrics (Histogram, Distribution, Timing) are no longer aggregated by the client - :type disable_aggregation: bool - :disable_buffering: If set, metrics are no longered buffered by the client and all data is sent synchronously to the server :type disable_buffering: bool @@ -454,7 +450,6 @@ def __init__( self._disable_buffering = disable_buffering self._disable_aggregation = disable_aggregation - self._disable_extended_aggregation = disable_extended_aggregation self._flush_interval = flush_interval self._flush_thread = None @@ -468,7 +463,7 @@ def __init__( else: self._send = self._send_to_server - if not self._disable_aggregation or not self._disable_buffering or not self._disable_extended_aggregation: + if not self._disable_aggregation or not self._disable_buffering: self._start_flush_thread() else: log.debug("Statsd buffering and aggregation is disabled") @@ -568,7 +563,7 @@ def _start_flush_thread(self): def _flush_thread_loop(self, flush_interval): while not self._flush_thread_stop.is_set(): time.sleep(flush_interval) - if not self._disable_aggregation or not self._disable_extended_aggregation: + if not self._disable_aggregation: self.flush_aggregated_metrics() if not self._disable_buffering: self.flush_buffered_metrics() @@ -589,7 +584,7 @@ def _stop_flush_thread(self): if not self._flush_thread: return try: - if not self._disable_aggregation or not self._disable_extended_aggregation: + if not self._disable_aggregation: self.flush_aggregated_metrics() if not self.disable_buffering: self.flush_buffered_metrics() @@ -648,7 +643,7 @@ def disable_aggregation(self): # If aggregation and buffering has been disabled, flush and kill the background thread # otherwise start up the flushing thread and enable aggregation. - if self._disable_aggregation and self._disable_extended_aggregation and self.disable_buffering: + if self._disable_aggregation and self.disable_buffering: self._stop_flush_thread() log.debug("Statsd aggregation is disabled") @@ -662,30 +657,6 @@ def enable_aggregation(self, flush_interval=DEFAULT_BUFFERING_FLUSH_INTERVAL): self._send = self._send_to_server self._start_flush_thread() - def disable_extended_aggregation(self): - with self._config_lock: - # If the toggle didn't change anything, this method is a noop - if self._disable_extended_aggregation: - return - - self._disable_extended_aggregation = True - - # If aggregation and buffering has been disabled, flush and kill the background thread - # otherwise start up the flushing thread and enable aggregation. - if self._disable_aggregation and self._disable_extended_aggregation and self.disable_buffering: - self._stop_flush_thread() - log.debug("Statsd aggregation is disabled") - - def enable_extended_aggregation(self, flush_interval=DEFAULT_BUFFERING_FLUSH_INTERVAL): - with self._config_lock: - if not self._disable_aggregation: - return - self._disable_aggregation = False - self._flush_interval = flush_interval - if self._disable_buffering: - self._send = self._send_to_server - self._start_flush_thread() - @staticmethod def resolve_host(host, use_default_route): """ From 5d872a33247dd8edd762f3e338ca8eee0778bc88 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 6 Jan 2025 15:54:00 -0500 Subject: [PATCH 48/89] remove extended aggregation --- datadog/__init__.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/datadog/__init__.py b/datadog/__init__.py index a2f27021c..dec93813a 100644 --- a/datadog/__init__.py +++ b/datadog/__init__.py @@ -38,7 +38,6 @@ def initialize( statsd_host=None, # type: Optional[str] statsd_port=None, # type: Optional[int] statsd_disable_aggregation=True, # type: bool - statsd_disable_extended_aggregation=True, # type: bool statsd_disable_buffering=True, # type: bool statsd_aggregation_flush_interval=0.3, # type: float statsd_use_default_route=False, # type: bool @@ -83,10 +82,6 @@ def initialize( (default: True). :type statsd_disable_aggregation: boolean - :param statsd_disable_extended_aggregation: Enable/disable statsd client aggregation support for histograms, distributions and timing metrics - (default: True). - :type statsd_disable_extended_aggregation: boolean - :param statsd_aggregation_flush_interval: If aggregation is enabled, set the flush interval for aggregation/buffering (default: 0.3 seconds) @@ -148,10 +143,6 @@ def initialize( statsd.disable_aggregation() else: statsd.enable_aggregation(statsd_aggregation_flush_interval) - if statsd_disable_extended_aggregation: - statsd.disable_extended_aggregation() - else: - statsd.enable_extended_aggregation(statsd_aggregation_flush_interval) statsd.disable_buffering = statsd_disable_buffering api._return_raw_response = return_raw_response From 0e0650105e3d685d1f1e47c1fa479d68e03d98e1 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 6 Jan 2025 19:11:18 -0500 Subject: [PATCH 49/89] remove test, not in scope --- tests/unit/dogstatsd/test_statsd.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tests/unit/dogstatsd/test_statsd.py b/tests/unit/dogstatsd/test_statsd.py index 865ccd2c3..b9a24cfc1 100644 --- a/tests/unit/dogstatsd/test_statsd.py +++ b/tests/unit/dogstatsd/test_statsd.py @@ -1116,25 +1116,6 @@ def test_aggregation_buffering_simultaneously_with_interval(self): fake_socket.recv(2, no_wait=True) ) - def test_buffered_metric_aggregation(self): - dogstatsd = DogStatsd(disable_buffering=True, disable_aggregation=False, telemetry_min_flush_interval=0) - fake_socket = FakeSocket() - dogstatsd.socket = fake_socket - dogstatsd.histogram("test.histogram_aggregation", 1) - dogstatsd.distribution("test.distribution_aggregation", 2) - dogstatsd.timing("test.timing_aggregation", 3) - dogstatsd._start_flush_thread() - time.sleep(dogstatsd._flush_interval / 2) - # Ensure that packets didn't arrive immediately for buffered_metrics - self.assertIsNone(dogstatsd.socket.recv(2, no_wait=True)) - - time.sleep(dogstatsd._flush_interval) - packet = dogstatsd.socket.recv(2, no_wait=True) - h_metric = "test.histogram_aggregation:1|h" - d_metric = "test.distribution_aggregation:2|d" - t_metric = "test.timing_aggregation:3|ms" - self.assertTrue(h_metric in packet and d_metric in packet and t_metric in packet) - def test_disable_buffering(self): dogstatsd = DogStatsd(disable_buffering=True, telemetry_min_flush_interval=0) fake_socket = FakeSocket() From 10e9ebeb5202986565a86ab7a971fe4168f369fa Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 6 Jan 2025 19:13:01 -0500 Subject: [PATCH 50/89] lint --- datadog/dogstatsd/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 6dd4bd97a..9a6146494 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -269,7 +269,8 @@ def __init__( depending on the connection type. :type max_buffer_len: integer - :param max_metric_samples: Maximum number of metric samples for buffered metrics (Histogram, Distribution, Timing) + :param max_metric_samples: Maximum number of metric samples for buffered + metrics (Histogram, Distribution, Timing) :type max_metric_samples: integer :param disable_telemetry: Should client telemetry be disabled From 1e2eed8917f511332871a245c108bde3ccc9ac77 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 6 Jan 2025 19:34:54 -0500 Subject: [PATCH 51/89] rename max_metric_samples to max_metric_samples_per_context --- datadog/dogstatsd/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 9a6146494..dff604451 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -160,7 +160,7 @@ def __init__( telemetry_port=None, # type: Union[str, int] telemetry_socket_path=None, # type: Text max_buffer_len=0, # type: int - max_metric_samples=0, # type: int + max_metric_samples_per_context=0, # type: int container_id=None, # type: Optional[Text] origin_detection_enabled=True, # type: bool socket_timeout=0, # type: Optional[float] @@ -455,7 +455,7 @@ def __init__( self._flush_interval = flush_interval self._flush_thread = None self._flush_thread_stop = threading.Event() - self.aggregator = Aggregator(max_metric_samples) + self.aggregator = Aggregator(max_metric_samples_per_context) # Indicates if the process is about to fork, so we shouldn't start any new threads yet. self._forking = False From bad35baa3aaac1efe27ea087f16d68b35042109b Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 13 Jan 2025 13:56:12 -0500 Subject: [PATCH 52/89] rename buffered_metrics to max_sample_metric, change base.py so that max_sample_metrics are not sampled twice --- datadog/dogstatsd/aggregator.py | 28 +++++++++---------- datadog/dogstatsd/base.py | 17 +++++++++-- ...ffered_metrics.py => max_sample_metric.py} | 8 +++--- ...ontext.py => max_sample_metric_context.py} | 2 +- 4 files changed, 33 insertions(+), 22 deletions(-) rename datadog/dogstatsd/{buffered_metrics.py => max_sample_metric.py} (93%) rename datadog/dogstatsd/{buffered_metrics_context.py => max_sample_metric_context.py} (98%) diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index f9ee56813..fa6df290f 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -4,13 +4,13 @@ GaugeMetric, SetMetric, ) -from datadog.dogstatsd.buffered_metrics import ( +from datadog.dogstatsd.max_sample_metric import ( HistogramMetric, DistributionMetric, TimingMetric ) from datadog.dogstatsd.metric_types import MetricType -from datadog.dogstatsd.buffered_metrics_context import BufferedMetricContexts +from datadog.dogstatsd.max_sample_metric_context import MaxSampleMetricContexts class Aggregator(object): @@ -20,10 +20,10 @@ def __init__(self, max_metric_samples=0): MetricType.GAUGE: {}, MetricType.SET: {}, } - self.buffered_metrics_map = { - MetricType.HISTOGRAM: BufferedMetricContexts(HistogramMetric, max_metric_samples), - MetricType.DISTRIBUTION: BufferedMetricContexts(DistributionMetric, max_metric_samples), - MetricType.TIMING: BufferedMetricContexts(TimingMetric, max_metric_samples) + self.max_sample_metric_map = { + MetricType.HISTOGRAM: MaxSampleMetricContexts(HistogramMetric, max_metric_samples), + MetricType.DISTRIBUTION: MaxSampleMetricContexts(DistributionMetric, max_metric_samples), + MetricType.TIMING: MaxSampleMetricContexts(TimingMetric, max_metric_samples) } self._locks = { MetricType.COUNT: threading.RLock(), @@ -41,10 +41,10 @@ def flush_aggregated_metrics(self): metrics.extend(metric.get_data() if isinstance(metric, SetMetric) else [metric]) return metrics - def flush_aggregated_buffered_metrics(self): + def flush_aggregated_sampled_metrics(self): metrics = [] - for metric_type in self.buffered_metrics_map.keys(): - metric_context = self.buffered_metrics_map[metric_type] + for metric_type in self.max_sample_metric_map.keys(): + metric_context = self.max_sample_metric_map[metric_type] for metricList in metric_context.flush(): metrics.extend(metricList) return metrics @@ -81,25 +81,25 @@ def add_metric( ) def histogram(self, name, value, tags, rate): - return self.add_buffered_metric( + return self.add_max_sample_metric( MetricType.HISTOGRAM, name, value, tags, rate ) def distribution(self, name, value, tags, rate): - return self.add_buffered_metric( + return self.add_max_sample_metric( MetricType.DISTRIBUTION, name, value, tags, rate ) def timing(self, name, value, tags, rate): - return self.add_buffered_metric( + return self.add_max_sample_metric( MetricType.TIMING, name, value, tags, rate ) - def add_buffered_metric( + def add_max_sample_metric( self, metric_type, name, value, tags, rate ): if rate is None: rate = 1 context_key = self.get_context(name, tags) - metric_context = self.buffered_metrics_map[metric_type] + metric_context = self.max_sample_metric_map[metric_type] return metric_context.sample(name, value, tags, rate, context_key) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index dff604451..44b38e838 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -831,9 +831,20 @@ def flush_aggregated_metrics(self): for m in metrics: self._report(m.name, m.metric_type, m.value, m.tags, m.rate, m.timestamp) - buffered_metrics = self.aggregator.flush_aggregated_buffered_metrics() - for m in buffered_metrics: - self._report(m.name, m.metric_type, m.value, m.tags, m.rate, m.timestamp) + sampled_metrics = self.aggregator.flush_aggregated_sampled_metrics() + if self._enabled is not True: + return + for m in sampled_metrics: + if self._telemetry: + self.metrics_count += 1 + + timestamp = 0 + tags = self._add_constant_tags(m.tags) + payload = self._serialize_metric( + m.name, m.metric_type, m.value, tags, m.rate, timestamp + ) + self._send(payload) + def gauge( self, diff --git a/datadog/dogstatsd/buffered_metrics.py b/datadog/dogstatsd/max_sample_metric.py similarity index 93% rename from datadog/dogstatsd/buffered_metrics.py rename to datadog/dogstatsd/max_sample_metric.py index 5e6da675c..69b1d9ad2 100644 --- a/datadog/dogstatsd/buffered_metrics.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -3,7 +3,7 @@ from datadog.dogstatsd.metrics import MetricAggregator -class BufferedMetric(object): +class MaxSampleMetric(object): def __init__(self, name, tags, metric_type, specified_rate=1.0, max_metric_samples=0): self.name = name self.tags = tags @@ -48,16 +48,16 @@ def flush(self): ] -class HistogramMetric(BufferedMetric): +class HistogramMetric(MaxSampleMetric): def __init__(self, name, tags, rate=1.0, max_metric_samples=0): super(HistogramMetric, self).__init__(name, tags, MetricType.HISTOGRAM, rate, max_metric_samples) -class DistributionMetric(BufferedMetric): +class DistributionMetric(MaxSampleMetric): def __init__(self, name, tags, rate=1.0, max_metric_samples=0): super(DistributionMetric, self).__init__(name, tags, MetricType.DISTRIBUTION, rate, max_metric_samples) -class TimingMetric(BufferedMetric): +class TimingMetric(MaxSampleMetric): def __init__(self, name, tags, rate=1.0, max_metric_samples=0): super(TimingMetric, self).__init__(name, tags, MetricType.TIMING, rate, max_metric_samples) diff --git a/datadog/dogstatsd/buffered_metrics_context.py b/datadog/dogstatsd/max_sample_metric_context.py similarity index 98% rename from datadog/dogstatsd/buffered_metrics_context.py rename to datadog/dogstatsd/max_sample_metric_context.py index b2be0068b..892b245d8 100644 --- a/datadog/dogstatsd/buffered_metrics_context.py +++ b/datadog/dogstatsd/max_sample_metric_context.py @@ -2,7 +2,7 @@ import random -class BufferedMetricContexts: +class MetricContexts: def __init__(self, buffered_metric_type, max_metric_samples=0): self.nb_context = 0 self.lock = Lock() From bca74482dc0efee12e15a054978bd2d1d446391f Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 13 Jan 2025 13:58:31 -0500 Subject: [PATCH 53/89] more renaming --- datadog/dogstatsd/max_sample_metric_context.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datadog/dogstatsd/max_sample_metric_context.py b/datadog/dogstatsd/max_sample_metric_context.py index 892b245d8..8f49b5124 100644 --- a/datadog/dogstatsd/max_sample_metric_context.py +++ b/datadog/dogstatsd/max_sample_metric_context.py @@ -2,13 +2,13 @@ import random -class MetricContexts: - def __init__(self, buffered_metric_type, max_metric_samples=0): +class MaxSampleMetricContexts: + def __init__(self, max_sample_metric_type, max_metric_samples=0): self.nb_context = 0 self.lock = Lock() self.values = {} self.max_metric_samples = max_metric_samples - self.buffered_metric_type = buffered_metric_type + self.max_sample_metric_type = max_sample_metric_type def flush(self): metrics = [] @@ -29,7 +29,7 @@ def sample(self, name, value, tags, rate, context_key): with self.lock: if context_key not in self.values: # Create a new metric if it doesn't exist - self.values[context_key] = self.buffered_metric_type(name, tags, rate, self.max_metric_samples) + self.values[context_key] = self.max_sample_metric_type(name, tags, rate, self.max_metric_samples) metric = self.values[context_key] if keeping_sample: metric.maybe_keep_sample(value) From 90b083d042afcef8a0b574ca5250faa0535355e5 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 13 Jan 2025 15:01:21 -0500 Subject: [PATCH 54/89] lint --- datadog/dogstatsd/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 44b38e838..0f9b53699 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -844,7 +844,6 @@ def flush_aggregated_metrics(self): m.name, m.metric_type, m.value, tags, m.rate, timestamp ) self._send(payload) - def gauge( self, From 22c2ac979921d17832ca711ee78249606956e7fc Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 13 Jan 2025 15:04:20 -0500 Subject: [PATCH 55/89] more renaming --- tests/unit/dogstatsd/test_aggregator.py | 20 +++++++++---------- ..._metrics.py => test_max_sample_metrics.py} | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) rename tests/unit/dogstatsd/{test_buffered_metrics.py => test_max_sample_metrics.py} (96%) diff --git a/tests/unit/dogstatsd/test_aggregator.py b/tests/unit/dogstatsd/test_aggregator.py index 4562b1340..6632801c0 100644 --- a/tests/unit/dogstatsd/test_aggregator.py +++ b/tests/unit/dogstatsd/test_aggregator.py @@ -23,16 +23,16 @@ def test_aggregator_sample(self): self.assertIn("setTest:tag1,tag2", self.aggregator.metrics_map[MetricType.SET]) self.aggregator.histogram("histogramTest", 21, tags, 1) - self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.HISTOGRAM].values), 1) - self.assertIn("histogramTest:tag1,tag2", self.aggregator.buffered_metrics_map[MetricType.HISTOGRAM].values) + self.assertEqual(len(self.aggregator.max_sample_metric_map[MetricType.HISTOGRAM].values), 1) + self.assertIn("histogramTest:tag1,tag2", self.aggregator.max_sample_metric_map[MetricType.HISTOGRAM].values) self.aggregator.distribution("distributionTest", 21, tags, 1) - self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.DISTRIBUTION].values), 1) - self.assertIn("distributionTest:tag1,tag2", self.aggregator.buffered_metrics_map[MetricType.DISTRIBUTION].values) + self.assertEqual(len(self.aggregator.max_sample_metric_map[MetricType.DISTRIBUTION].values), 1) + self.assertIn("distributionTest:tag1,tag2", self.aggregator.max_sample_metric_map[MetricType.DISTRIBUTION].values) self.aggregator.timing("timingTest", 21, tags, 1) - self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.TIMING].values), 1) - self.assertIn("timingTest:tag1,tag2", self.aggregator.buffered_metrics_map[MetricType.TIMING].values) + self.assertEqual(len(self.aggregator.max_sample_metric_map[MetricType.TIMING].values), 1) + self.assertIn("timingTest:tag1,tag2", self.aggregator.max_sample_metric_map[MetricType.TIMING].values) def test_aggregator_flush(self): tags = ["tag1", "tag2"] @@ -63,13 +63,13 @@ def test_aggregator_flush(self): self.aggregator.timing("timingTest2", 23, tags, 1) metrics = self.aggregator.flush_aggregated_metrics() - metrics.extend(self.aggregator.flush_aggregated_buffered_metrics()) + metrics.extend(self.aggregator.flush_aggregated_sampled_metrics()) self.assertEqual(len(self.aggregator.metrics_map[MetricType.GAUGE]), 0) self.assertEqual(len(self.aggregator.metrics_map[MetricType.COUNT]), 0) self.assertEqual(len(self.aggregator.metrics_map[MetricType.SET]), 0) - self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.HISTOGRAM].values), 0) - self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.DISTRIBUTION].values), 0) - self.assertEqual(len(self.aggregator.buffered_metrics_map[MetricType.TIMING].values), 0) + self.assertEqual(len(self.aggregator.max_sample_metric_map[MetricType.HISTOGRAM].values), 0) + self.assertEqual(len(self.aggregator.max_sample_metric_map[MetricType.DISTRIBUTION].values), 0) + self.assertEqual(len(self.aggregator.max_sample_metric_map[MetricType.TIMING].values), 0) self.assertEqual(len(metrics), 16) metrics.sort(key=lambda m: (m.metric_type, m.name, m.value)) diff --git a/tests/unit/dogstatsd/test_buffered_metrics.py b/tests/unit/dogstatsd/test_max_sample_metrics.py similarity index 96% rename from tests/unit/dogstatsd/test_buffered_metrics.py rename to tests/unit/dogstatsd/test_max_sample_metrics.py index ecf4114b5..e5b10c96b 100644 --- a/tests/unit/dogstatsd/test_buffered_metrics.py +++ b/tests/unit/dogstatsd/test_max_sample_metrics.py @@ -1,8 +1,8 @@ import unittest -from datadog.dogstatsd.buffered_metrics import HistogramMetric, DistributionMetric, TimingMetric +from datadog.dogstatsd.max_sample_metric import HistogramMetric, DistributionMetric, TimingMetric from datadog.dogstatsd.metric_types import MetricType -class TestBufferedMetric(unittest.TestCase): +class TestMaxSampleMetric(unittest.TestCase): def test_new_histogram_metric(self): s = HistogramMetric(name="test", tags="tag1,tag2", max_metric_samples=0, rate=1.0) From 08d150aefac8113037556d2901c1ef665c962600 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 13 Jan 2025 15:06:07 -0500 Subject: [PATCH 56/89] lint --- datadog/dogstatsd/base.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 0f9b53699..316276b23 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -837,12 +837,9 @@ def flush_aggregated_metrics(self): for m in sampled_metrics: if self._telemetry: self.metrics_count += 1 - timestamp = 0 tags = self._add_constant_tags(m.tags) - payload = self._serialize_metric( - m.name, m.metric_type, m.value, tags, m.rate, timestamp - ) + payload = self._serialize_metric(m.name, m.metric_type, m.value, tags, m.rate, timestamp) self._send(payload) def gauge( From dfd1a293ea5c8ddb969e0d48c8948dc212e2d193 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 13 Jan 2025 15:07:36 -0500 Subject: [PATCH 57/89] lint --- datadog/dogstatsd/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 316276b23..74c71368d 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -839,7 +839,7 @@ def flush_aggregated_metrics(self): self.metrics_count += 1 timestamp = 0 tags = self._add_constant_tags(m.tags) - payload = self._serialize_metric(m.name, m.metric_type, m.value, tags, m.rate, timestamp) + payload = self._serialize_metric(m.name, m.metric_type, m.value, tags, m.rate, timestamp) self._send(payload) def gauge( From 5c86590097b45893c6445636387b9fd703b20cc6 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 13 Jan 2025 15:39:39 -0500 Subject: [PATCH 58/89] use statsd_max_samples_per_context to set the max samples per context for Histogram, Distribution and Timing metrics --- datadog/__init__.py | 8 +++++++- datadog/dogstatsd/aggregator.py | 14 +++++++++----- datadog/dogstatsd/base.py | 7 ++++--- datadog/dogstatsd/max_sample_metric_context.py | 7 +++---- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/datadog/__init__.py b/datadog/__init__.py index dec93813a..55595b6ff 100644 --- a/datadog/__init__.py +++ b/datadog/__init__.py @@ -43,6 +43,7 @@ def initialize( statsd_use_default_route=False, # type: bool statsd_socket_path=None, # type: Optional[str] statsd_namespace=None, # type: Optional[str] + statsd_max_samples_per_context=0, # type: Optional[int] statsd_constant_tags=None, # type: Optional[List[str]] return_raw_response=False, # type: bool hostname_from_config=True, # type: bool @@ -82,6 +83,10 @@ def initialize( (default: True). :type statsd_disable_aggregation: boolean + :param statsd_max_samples_per_context: Set the max samples per context for Histogram, + Distribution and Timing metrics. Use with the statsd_disable_aggregation set to False. + :type statsd_max_samples_per_context: int + :param statsd_aggregation_flush_interval: If aggregation is enabled, set the flush interval for aggregation/buffering (default: 0.3 seconds) @@ -139,10 +144,11 @@ def initialize( if statsd_constant_tags: statsd.constant_tags += statsd_constant_tags + print("inside __init__", statsd_max_samples_per_context) if statsd_disable_aggregation: statsd.disable_aggregation() else: - statsd.enable_aggregation(statsd_aggregation_flush_interval) + statsd.enable_aggregation(statsd_aggregation_flush_interval, statsd_max_samples_per_context) statsd.disable_buffering = statsd_disable_buffering api._return_raw_response = return_raw_response diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index fa6df290f..6bfdc9abc 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -14,16 +14,16 @@ class Aggregator(object): - def __init__(self, max_metric_samples=0): + def __init__(self): self.metrics_map = { MetricType.COUNT: {}, MetricType.GAUGE: {}, MetricType.SET: {}, } self.max_sample_metric_map = { - MetricType.HISTOGRAM: MaxSampleMetricContexts(HistogramMetric, max_metric_samples), - MetricType.DISTRIBUTION: MaxSampleMetricContexts(DistributionMetric, max_metric_samples), - MetricType.TIMING: MaxSampleMetricContexts(TimingMetric, max_metric_samples) + MetricType.HISTOGRAM: MaxSampleMetricContexts(HistogramMetric), + MetricType.DISTRIBUTION: MaxSampleMetricContexts(DistributionMetric), + MetricType.TIMING: MaxSampleMetricContexts(TimingMetric) } self._locks = { MetricType.COUNT: threading.RLock(), @@ -40,6 +40,9 @@ def flush_aggregated_metrics(self): for metric in current_metrics.values(): metrics.extend(metric.get_data() if isinstance(metric, SetMetric) else [metric]) return metrics + + def set_max_samples_per_context(self, max_samples_per_context=0): + self.max_samples_per_context = max_samples_per_context def flush_aggregated_sampled_metrics(self): metrics = [] @@ -100,6 +103,7 @@ def add_max_sample_metric( ): if rate is None: rate = 1 + print("inside the add sample function", self.max_samples_per_context) context_key = self.get_context(name, tags) metric_context = self.max_sample_metric_map[metric_type] - return metric_context.sample(name, value, tags, rate, context_key) + return metric_context.sample(name, value, tags, rate, context_key, self.max_samples_per_context) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 74c71368d..5b2c17c30 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -160,7 +160,7 @@ def __init__( telemetry_port=None, # type: Union[str, int] telemetry_socket_path=None, # type: Text max_buffer_len=0, # type: int - max_metric_samples_per_context=0, # type: int + max_metric_samples_per_context=0, # type: int container_id=None, # type: Optional[Text] origin_detection_enabled=True, # type: bool socket_timeout=0, # type: Optional[float] @@ -455,7 +455,7 @@ def __init__( self._flush_interval = flush_interval self._flush_thread = None self._flush_thread_stop = threading.Event() - self.aggregator = Aggregator(max_metric_samples_per_context) + self.aggregator = Aggregator() # Indicates if the process is about to fork, so we shouldn't start any new threads yet. self._forking = False @@ -648,10 +648,11 @@ def disable_aggregation(self): self._stop_flush_thread() log.debug("Statsd aggregation is disabled") - def enable_aggregation(self, flush_interval=DEFAULT_BUFFERING_FLUSH_INTERVAL): + def enable_aggregation(self, flush_interval=DEFAULT_BUFFERING_FLUSH_INTERVAL, max_samples_per_context=0): with self._config_lock: if not self._disable_aggregation: return + self.aggregator.set_max_samples_per_context(max_samples_per_context) self._disable_aggregation = False self._flush_interval = flush_interval if self._disable_buffering: diff --git a/datadog/dogstatsd/max_sample_metric_context.py b/datadog/dogstatsd/max_sample_metric_context.py index 8f49b5124..1f0d12cb4 100644 --- a/datadog/dogstatsd/max_sample_metric_context.py +++ b/datadog/dogstatsd/max_sample_metric_context.py @@ -3,11 +3,10 @@ class MaxSampleMetricContexts: - def __init__(self, max_sample_metric_type, max_metric_samples=0): + def __init__(self, max_sample_metric_type): self.nb_context = 0 self.lock = Lock() self.values = {} - self.max_metric_samples = max_metric_samples self.max_sample_metric_type = max_sample_metric_type def flush(self): @@ -23,13 +22,13 @@ def flush(self): self.nb_context += len(copiedValues) return metrics - def sample(self, name, value, tags, rate, context_key): + def sample(self, name, value, tags, rate, context_key, max_samples_per_context): """Sample a metric and store it if it meets the criteria.""" keeping_sample = self.should_sample(rate) with self.lock: if context_key not in self.values: # Create a new metric if it doesn't exist - self.values[context_key] = self.max_sample_metric_type(name, tags, rate, self.max_metric_samples) + self.values[context_key] = self.max_sample_metric_type(name, tags, rate, max_samples_per_context) metric = self.values[context_key] if keeping_sample: metric.maybe_keep_sample(value) From 0276bfa7631c6cf0ec1a87aa954aaa5f835cb083 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Mon, 13 Jan 2025 15:42:03 -0500 Subject: [PATCH 59/89] lint --- datadog/__init__.py | 2 +- datadog/dogstatsd/aggregator.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datadog/__init__.py b/datadog/__init__.py index 55595b6ff..c1149c454 100644 --- a/datadog/__init__.py +++ b/datadog/__init__.py @@ -43,7 +43,7 @@ def initialize( statsd_use_default_route=False, # type: bool statsd_socket_path=None, # type: Optional[str] statsd_namespace=None, # type: Optional[str] - statsd_max_samples_per_context=0, # type: Optional[int] + statsd_max_samples_per_context=0, # type: Optional[int] statsd_constant_tags=None, # type: Optional[List[str]] return_raw_response=False, # type: bool hostname_from_config=True, # type: bool diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index 6bfdc9abc..469c844cb 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -40,7 +40,7 @@ def flush_aggregated_metrics(self): for metric in current_metrics.values(): metrics.extend(metric.get_data() if isinstance(metric, SetMetric) else [metric]) return metrics - + def set_max_samples_per_context(self, max_samples_per_context=0): self.max_samples_per_context = max_samples_per_context From e50a85fffad1e375d2a4f7da7d60f6570aafa18c Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 14 Jan 2025 11:20:38 -0500 Subject: [PATCH 60/89] set max_samples_per_context through Aggregator constructor --- datadog/__init__.py | 1 - datadog/dogstatsd/aggregator.py | 4 ++-- datadog/dogstatsd/base.py | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/datadog/__init__.py b/datadog/__init__.py index c1149c454..fee5eae7e 100644 --- a/datadog/__init__.py +++ b/datadog/__init__.py @@ -144,7 +144,6 @@ def initialize( if statsd_constant_tags: statsd.constant_tags += statsd_constant_tags - print("inside __init__", statsd_max_samples_per_context) if statsd_disable_aggregation: statsd.disable_aggregation() else: diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index 469c844cb..6ad38334d 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -14,7 +14,8 @@ class Aggregator(object): - def __init__(self): + def __init__(self, max_samples_per_context=0): + self.max_samples_per_context = max_samples_per_context self.metrics_map = { MetricType.COUNT: {}, MetricType.GAUGE: {}, @@ -103,7 +104,6 @@ def add_max_sample_metric( ): if rate is None: rate = 1 - print("inside the add sample function", self.max_samples_per_context) context_key = self.get_context(name, tags) metric_context = self.max_sample_metric_map[metric_type] return metric_context.sample(name, value, tags, rate, context_key, self.max_samples_per_context) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 5b2c17c30..ad69f0c54 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -455,7 +455,7 @@ def __init__( self._flush_interval = flush_interval self._flush_thread = None self._flush_thread_stop = threading.Event() - self.aggregator = Aggregator() + self.aggregator = Aggregator(max_metric_samples_per_context) # Indicates if the process is about to fork, so we shouldn't start any new threads yet. self._forking = False @@ -833,7 +833,7 @@ def flush_aggregated_metrics(self): self._report(m.name, m.metric_type, m.value, m.tags, m.rate, m.timestamp) sampled_metrics = self.aggregator.flush_aggregated_sampled_metrics() - if self._enabled is not True: + if not self._enabled: return for m in sampled_metrics: if self._telemetry: From f6d963deb9deb20c5882c7dfced80a33f9a5e006 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 14 Jan 2025 14:17:43 -0500 Subject: [PATCH 61/89] add comments --- datadog/__init__.py | 2 +- datadog/dogstatsd/base.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/datadog/__init__.py b/datadog/__init__.py index fee5eae7e..2e2633dd6 100644 --- a/datadog/__init__.py +++ b/datadog/__init__.py @@ -88,7 +88,7 @@ def initialize( :type statsd_max_samples_per_context: int :param statsd_aggregation_flush_interval: If aggregation is enabled, set the flush interval for - aggregation/buffering + aggregation/buffering (This feature is experimental) (default: 0.3 seconds) :type statsd_aggregation_flush_interval: float diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index ad69f0c54..b532051d5 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -240,6 +240,11 @@ def __init__( :disable_aggregation: If true, metrics (Count, Gauge, Set) are no longer aggregated by the client :type disable_aggregation: bool + :max_metric_samples_per_context: Sets the maximum amount of samples for Histogram, Distribution + and Timings metrics (default 0). This feature should be used alongside aggregation. This feature + is experimental. + :type max_metric_samples_per_context: int + :disable_buffering: If set, metrics are no longered buffered by the client and all data is sent synchronously to the server :type disable_buffering: bool From 4dbaba51c0adb4c537b465f369e6c9df1955ca9a Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 14 Jan 2025 14:32:38 -0500 Subject: [PATCH 62/89] lint --- datadog/dogstatsd/base.py | 2 +- datadog/dogstatsd/max_sample_metric.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index b532051d5..ac0cf21ba 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -240,7 +240,7 @@ def __init__( :disable_aggregation: If true, metrics (Count, Gauge, Set) are no longer aggregated by the client :type disable_aggregation: bool - :max_metric_samples_per_context: Sets the maximum amount of samples for Histogram, Distribution + :max_metric_samples_per_context: Sets the maximum amount of samples for Histogram, Distribution and Timings metrics (default 0). This feature should be used alongside aggregation. This feature is experimental. :type max_metric_samples_per_context: int diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index 69b1d9ad2..2ba56adab 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -31,6 +31,7 @@ def maybe_keep_sample(self, value): self.total_metric_samples += 1 else: self.sample(value) + print("len is ", len(self.data)) def skip_sample(self): self.total_metric_samples += 1 From b2f714bd933186d2f595542d9fbce3879e3076d4 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Tue, 14 Jan 2025 14:37:00 -0500 Subject: [PATCH 63/89] remove print --- datadog/dogstatsd/max_sample_metric.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index 2ba56adab..69b1d9ad2 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -31,7 +31,6 @@ def maybe_keep_sample(self, value): self.total_metric_samples += 1 else: self.sample(value) - print("len is ", len(self.data)) def skip_sample(self): self.total_metric_samples += 1 From b46b5390e44cd3e0bb413a920a280d9d236f404e Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 15 Jan 2025 12:38:12 -0500 Subject: [PATCH 64/89] update base.py to not use new feature unless max_samples_per_context is set --- datadog/dogstatsd/base.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index ac0cf21ba..a185a0517 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -982,10 +982,10 @@ def histogram( >>> statsd.histogram("uploaded.file.size", 1445) >>> statsd.histogram("album.photo.count", 26, tags=["gender:female"]) """ - if self._disable_aggregation: - self._report(metric, "h", value, tags, sample_rate) - else: + if not self._disable_aggregation and self.aggregator.max_samples_per_context != 0: self.aggregator.histogram(metric, value, tags, sample_rate) + else: + self._report(metric, "h", value, tags, sample_rate) def distribution( self, @@ -1000,10 +1000,10 @@ def distribution( >>> statsd.distribution("uploaded.file.size", 1445) >>> statsd.distribution("album.photo.count", 26, tags=["gender:female"]) """ - if self._disable_aggregation: - self._report(metric, "d", value, tags, sample_rate) - else: + if not self._disable_aggregation and self.aggregator.max_samples_per_context != 0: self.aggregator.distribution(metric, value, tags, sample_rate) + else: + self._report(metric, "d", value, tags, sample_rate) def timing( self, @@ -1017,11 +1017,10 @@ def timing( >>> statsd.timing("query.response.time", 1234) """ - - if self._disable_aggregation: - self._report(metric, "ms", value, tags, sample_rate) - else: + if not self._disable_aggregation and self.aggregator.max_samples_per_context != 0: self.aggregator.timing(metric, value, tags, sample_rate) + else: + self._report(metric, "ms", value, tags, sample_rate) def timed(self, metric=None, tags=None, sample_rate=None, use_ms=None): """ From b80da4c8a0ad188beb774f056074f20677a569ca Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 15 Jan 2025 13:18:46 -0500 Subject: [PATCH 65/89] remove comment --- datadog/dogstatsd/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index a185a0517..a349c0ddd 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -274,10 +274,6 @@ def __init__( depending on the connection type. :type max_buffer_len: integer - :param max_metric_samples: Maximum number of metric samples for buffered - metrics (Histogram, Distribution, Timing) - :type max_metric_samples: integer - :param disable_telemetry: Should client telemetry be disabled :type disable_telemetry: boolean @@ -838,6 +834,8 @@ def flush_aggregated_metrics(self): self._report(m.name, m.metric_type, m.value, m.tags, m.rate, m.timestamp) sampled_metrics = self.aggregator.flush_aggregated_sampled_metrics() + for m in sampled_metrics: + self._report(m.name, m.metric_type, m.value, m.tags, m.rate, m.timestamp) if not self._enabled: return for m in sampled_metrics: @@ -983,8 +981,10 @@ def histogram( >>> statsd.histogram("album.photo.count", 26, tags=["gender:female"]) """ if not self._disable_aggregation and self.aggregator.max_samples_per_context != 0: + print("Aggregated histogram") self.aggregator.histogram(metric, value, tags, sample_rate) else: + print("Regular histogram") self._report(metric, "h", value, tags, sample_rate) def distribution( From c27bf8d3aedd350ea70690f6ddf5a2c4a7166893 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 15 Jan 2025 13:20:33 -0500 Subject: [PATCH 66/89] add flag for _report function to enable/disable sampling --- datadog/dogstatsd/base.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index a349c0ddd..091166877 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -835,16 +835,7 @@ def flush_aggregated_metrics(self): sampled_metrics = self.aggregator.flush_aggregated_sampled_metrics() for m in sampled_metrics: - self._report(m.name, m.metric_type, m.value, m.tags, m.rate, m.timestamp) - if not self._enabled: - return - for m in sampled_metrics: - if self._telemetry: - self.metrics_count += 1 - timestamp = 0 - tags = self._add_constant_tags(m.tags) - payload = self._serialize_metric(m.name, m.metric_type, m.value, tags, m.rate, timestamp) - self._send(payload) + self._report(m.name, m.metric_type, m.value, m.tags, m.rate, m.timestamp, False) def gauge( self, @@ -1124,7 +1115,7 @@ def _serialize_metric( ("|T" + text(timestamp)) if timestamp > 0 else "", ) - def _report(self, metric, metric_type, value, tags, sample_rate, timestamp=0): + def _report(self, metric, metric_type, value, tags, sample_rate, timestamp=0, sampling=True): """ Create a metric packet and send it. @@ -1140,11 +1131,12 @@ def _report(self, metric, metric_type, value, tags, sample_rate, timestamp=0): if self._telemetry: self.metrics_count += 1 - if sample_rate is None: - sample_rate = self.default_sample_rate + if sampling: + if sample_rate is None: + sample_rate = self.default_sample_rate - if sample_rate != 1 and random() > sample_rate: - return + if sample_rate != 1 and random() > sample_rate: + return # timestamps (protocol v1.3) only allowed on gauges and counts allows_timestamp = metric_type == MetricType.GAUGE or metric_type == MetricType.COUNT From 52fd25fe45255723e127f17f7bc8f8b55e3326b7 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 15 Jan 2025 15:44:28 -0500 Subject: [PATCH 67/89] fix one off error --- datadog/dogstatsd/max_sample_metric.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index 69b1d9ad2..c3b89fd87 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -21,17 +21,19 @@ def sample(self, value): def maybe_keep_sample(self, value): if self.max_metric_samples > 0: - if self.stored_metric_samples >= self.max_metric_samples: + self.total_metric_samples += 1 + + if self.stored_metric_samples < self.max_metric_samples: + self.data.append(value) + self.stored_metric_samples += 1 + else: i = random.randint(0, self.total_metric_samples - 1) if i < self.max_metric_samples: self.data[i] = value - else: - self.data.append(value) - self.stored_metric_samples += 1 - self.total_metric_samples += 1 else: self.sample(value) + def skip_sample(self): self.total_metric_samples += 1 From ec9a48720b528fb914a1b9c2263334507701ae12 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Wed, 15 Jan 2025 15:54:09 -0500 Subject: [PATCH 68/89] remove unused code --- datadog/dogstatsd/max_sample_metric_context.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/datadog/dogstatsd/max_sample_metric_context.py b/datadog/dogstatsd/max_sample_metric_context.py index 1f0d12cb4..a0fafe7a4 100644 --- a/datadog/dogstatsd/max_sample_metric_context.py +++ b/datadog/dogstatsd/max_sample_metric_context.py @@ -4,7 +4,6 @@ class MaxSampleMetricContexts: def __init__(self, max_sample_metric_type): - self.nb_context = 0 self.lock = Lock() self.values = {} self.max_sample_metric_type = max_sample_metric_type @@ -19,7 +18,6 @@ def flush(self): for _, metric in copiedValues.items(): metrics.append(metric.flush()) - self.nb_context += len(copiedValues) return metrics def sample(self, name, value, tags, rate, context_key, max_samples_per_context): From 363d9406766d04667a88a3d2996dc2f4ed856902 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 16 Jan 2025 12:23:47 -0500 Subject: [PATCH 69/89] use specified rate --- datadog/dogstatsd/max_sample_metric.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index c3b89fd87..f4910cd58 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -38,14 +38,8 @@ def skip_sample(self): self.total_metric_samples += 1 def flush(self): - total_metric_samples = self.total_metric_samples - if self.specified_rate != 1.0: - rate = self.specified_rate - else: - rate = self.stored_metric_samples / total_metric_samples - return [ - MetricAggregator(self.name, self.tags, rate, self.metric_type, value) + MetricAggregator(self.name, self.tags, self.specified_rate, self.metric_type, value) for value in self.data ] From f274b9efd5b585bd73b13c08edc92de8995794e9 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Thu, 16 Jan 2025 15:47:59 -0500 Subject: [PATCH 70/89] prelocate data in array --- datadog/dogstatsd/max_sample_metric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index f4910cd58..ee92f8e2c 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -10,7 +10,7 @@ def __init__(self, name, tags, metric_type, specified_rate=1.0, max_metric_sampl self.metric_type = metric_type self.max_metric_samples = max_metric_samples self.specified_rate = specified_rate - self.data = [] + self.data = [None] * max_metric_samples if max_metric_samples > 0 else [] self.stored_metric_samples = 0 self.total_metric_samples = 0 From 810353a50cf9cb66bda5c1b432564f38a207fe4f Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 17 Jan 2025 01:34:43 -0500 Subject: [PATCH 71/89] change append --- datadog/dogstatsd/max_sample_metric.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index ee92f8e2c..ac4c58b4f 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -15,16 +15,18 @@ def __init__(self, name, tags, metric_type, specified_rate=1.0, max_metric_sampl self.total_metric_samples = 0 def sample(self, value): - self.data.append(value) + if self.max_metric_samples == 0: + self.data.append(value) + else: + self.data[self.stored_metric_samples] = value self.stored_metric_samples += 1 self.total_metric_samples += 1 def maybe_keep_sample(self, value): if self.max_metric_samples > 0: self.total_metric_samples += 1 - if self.stored_metric_samples < self.max_metric_samples: - self.data.append(value) + self.data[self.stored_metric_samples] = value self.stored_metric_samples += 1 else: i = random.randint(0, self.total_metric_samples - 1) From 351a8ccd43c2952b872d02f85f74e83ae4f507d8 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 17 Jan 2025 01:36:00 -0500 Subject: [PATCH 72/89] lint --- datadog/dogstatsd/max_sample_metric.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index ac4c58b4f..0ecd670aa 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -24,7 +24,7 @@ def sample(self, value): def maybe_keep_sample(self, value): if self.max_metric_samples > 0: - self.total_metric_samples += 1 + self.total_metric_samples += 1 if self.stored_metric_samples < self.max_metric_samples: self.data[self.stored_metric_samples] = value self.stored_metric_samples += 1 @@ -35,7 +35,6 @@ def maybe_keep_sample(self, value): else: self.sample(value) - def skip_sample(self): self.total_metric_samples += 1 From a94afa5bb5e43e5890724fd2fd6abaab0d9246c7 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 17 Jan 2025 01:38:26 -0500 Subject: [PATCH 73/89] lint x2 --- datadog/dogstatsd/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index 091166877..dd0326eda 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -1131,7 +1131,7 @@ def _report(self, metric, metric_type, value, tags, sample_rate, timestamp=0, sa if self._telemetry: self.metrics_count += 1 - if sampling: + if sampling: if sample_rate is None: sample_rate = self.default_sample_rate From d3343460308063d15d796ff5dc20c52ce8d80cbd Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 17 Jan 2025 01:46:38 -0500 Subject: [PATCH 74/89] modify test --- tests/unit/dogstatsd/test_max_sample_metrics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/dogstatsd/test_max_sample_metrics.py b/tests/unit/dogstatsd/test_max_sample_metrics.py index e5b10c96b..2bc0ac710 100644 --- a/tests/unit/dogstatsd/test_max_sample_metrics.py +++ b/tests/unit/dogstatsd/test_max_sample_metrics.py @@ -84,7 +84,6 @@ def test_maybe_keep_sample(self): s.maybe_keep_sample(456) s.maybe_keep_sample(789) self.assertEqual(len(s.data), 2) - self.assertFalse(123 in s.data and 456 in s.data) self.assertEqual(s.name, "test") self.assertEqual(s.tags, "tag1,tag2") self.assertEqual(s.specified_rate, 1.0) From ac7c86d6739f861ccafca3dcc07019ef28ace1c6 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 17 Jan 2025 01:53:34 -0500 Subject: [PATCH 75/89] rerun tests --- tests/unit/dogstatsd/test_max_sample_metrics.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/dogstatsd/test_max_sample_metrics.py b/tests/unit/dogstatsd/test_max_sample_metrics.py index 2bc0ac710..c15d9ab07 100644 --- a/tests/unit/dogstatsd/test_max_sample_metrics.py +++ b/tests/unit/dogstatsd/test_max_sample_metrics.py @@ -83,6 +83,7 @@ def test_maybe_keep_sample(self): s.maybe_keep_sample(123) s.maybe_keep_sample(456) s.maybe_keep_sample(789) + self.assertEqual(len(s.data), 2) self.assertEqual(s.name, "test") self.assertEqual(s.tags, "tag1,tag2") From a4c055a28e8680c4e11288e848143ef09251e774 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 17 Jan 2025 07:43:04 -0500 Subject: [PATCH 76/89] ensure value is not None --- datadog/dogstatsd/max_sample_metric.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index 0ecd670aa..8ff4b6305 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -42,6 +42,7 @@ def flush(self): return [ MetricAggregator(self.name, self.tags, self.specified_rate, self.metric_type, value) for value in self.data + if value is not None ] From e3e4c6d81878790d2ff177fbfd04b8141d004565 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 17 Jan 2025 07:46:31 -0500 Subject: [PATCH 77/89] change loop --- datadog/dogstatsd/max_sample_metric.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index 8ff4b6305..590b5b16c 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -39,11 +39,10 @@ def skip_sample(self): self.total_metric_samples += 1 def flush(self): - return [ - MetricAggregator(self.name, self.tags, self.specified_rate, self.metric_type, value) - for value in self.data - if value is not None - ] + values = [None] * self.stored_metric_samples + for i in range(self.stored_metric_samples): + values[i] = MetricAggregator(self.name, self.tags, self.specified_rate, self.metric_type, self.data[i]) + return values class HistogramMetric(MaxSampleMetric): From aa64e240e1a52ae30c847f9362cc3b1c745d1160 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 17 Jan 2025 11:18:25 -0500 Subject: [PATCH 78/89] use a deep copy --- datadog/dogstatsd/max_sample_metric_context.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datadog/dogstatsd/max_sample_metric_context.py b/datadog/dogstatsd/max_sample_metric_context.py index a0fafe7a4..0e53a55f6 100644 --- a/datadog/dogstatsd/max_sample_metric_context.py +++ b/datadog/dogstatsd/max_sample_metric_context.py @@ -1,5 +1,6 @@ from threading import Lock import random +import copy class MaxSampleMetricContexts: @@ -12,7 +13,7 @@ def flush(self): metrics = [] """Flush the metrics and reset the stored values.""" with self.lock: - copiedValues = self.values.copy() + copiedValues = copy.deepcopy(self.values) self.values.clear() self.values = {} for _, metric in copiedValues.items(): From 721b37589615c332d58c01b6d1163a4180648aec Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 17 Jan 2025 11:21:30 -0500 Subject: [PATCH 79/89] remove unecessary code --- datadog/dogstatsd/max_sample_metric_context.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datadog/dogstatsd/max_sample_metric_context.py b/datadog/dogstatsd/max_sample_metric_context.py index 0e53a55f6..c03e3f6f5 100644 --- a/datadog/dogstatsd/max_sample_metric_context.py +++ b/datadog/dogstatsd/max_sample_metric_context.py @@ -15,7 +15,6 @@ def flush(self): with self.lock: copiedValues = copy.deepcopy(self.values) self.values.clear() - self.values = {} for _, metric in copiedValues.items(): metrics.append(metric.flush()) From a9c55ce584e811a27dafd7b3883946687f00b1b6 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 17 Jan 2025 11:26:16 -0500 Subject: [PATCH 80/89] rerun tests --- datadog/dogstatsd/aggregator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datadog/dogstatsd/aggregator.py b/datadog/dogstatsd/aggregator.py index 6ad38334d..5ade9d936 100644 --- a/datadog/dogstatsd/aggregator.py +++ b/datadog/dogstatsd/aggregator.py @@ -40,6 +40,7 @@ def flush_aggregated_metrics(self): self.metrics_map[metric_type] = {} for metric in current_metrics.values(): metrics.extend(metric.get_data() if isinstance(metric, SetMetric) else [metric]) + return metrics def set_max_samples_per_context(self, max_samples_per_context=0): From 0c551ed317a1744576599d6ebd5ca67cf7ce6578 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 17 Jan 2025 14:36:27 -0500 Subject: [PATCH 81/89] add lock --- datadog/dogstatsd/max_sample_metric.py | 32 +++++++++++++++----------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index 590b5b16c..238bc1f9b 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -1,12 +1,14 @@ import random from datadog.dogstatsd.metric_types import MetricType from datadog.dogstatsd.metrics import MetricAggregator +from threading import Lock class MaxSampleMetric(object): def __init__(self, name, tags, metric_type, specified_rate=1.0, max_metric_samples=0): self.name = name self.tags = tags + self.lock = Lock() self.metric_type = metric_type self.max_metric_samples = max_metric_samples self.specified_rate = specified_rate @@ -23,26 +25,28 @@ def sample(self, value): self.total_metric_samples += 1 def maybe_keep_sample(self, value): - if self.max_metric_samples > 0: - self.total_metric_samples += 1 - if self.stored_metric_samples < self.max_metric_samples: - self.data[self.stored_metric_samples] = value - self.stored_metric_samples += 1 + with self.lock: + if self.max_metric_samples > 0: + self.total_metric_samples += 1 + if self.stored_metric_samples < self.max_metric_samples: + self.data[self.stored_metric_samples] = value + self.stored_metric_samples += 1 + else: + i = random.randint(0, self.total_metric_samples - 1) + if i < self.max_metric_samples: + self.data[i] = value else: - i = random.randint(0, self.total_metric_samples - 1) - if i < self.max_metric_samples: - self.data[i] = value - else: - self.sample(value) + self.sample(value) def skip_sample(self): self.total_metric_samples += 1 def flush(self): - values = [None] * self.stored_metric_samples - for i in range(self.stored_metric_samples): - values[i] = MetricAggregator(self.name, self.tags, self.specified_rate, self.metric_type, self.data[i]) - return values + with self.lock: + values = [None] * self.stored_metric_samples + for i in range(self.stored_metric_samples): + values[i] = MetricAggregator(self.name, self.tags, self.specified_rate, self.metric_type, self.data[i]) + return values class HistogramMetric(MaxSampleMetric): From f13b48cdc266cd57d215a5fb209d0cf1ec4bc694 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 17 Jan 2025 14:41:52 -0500 Subject: [PATCH 82/89] remove deep copy --- datadog/dogstatsd/max_sample_metric_context.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/datadog/dogstatsd/max_sample_metric_context.py b/datadog/dogstatsd/max_sample_metric_context.py index c03e3f6f5..f874eccfb 100644 --- a/datadog/dogstatsd/max_sample_metric_context.py +++ b/datadog/dogstatsd/max_sample_metric_context.py @@ -1,6 +1,5 @@ from threading import Lock import random -import copy class MaxSampleMetricContexts: @@ -12,12 +11,9 @@ def __init__(self, max_sample_metric_type): def flush(self): metrics = [] """Flush the metrics and reset the stored values.""" - with self.lock: - copiedValues = copy.deepcopy(self.values) - self.values.clear() - for _, metric in copiedValues.items(): + for _, metric in self.values.items(): metrics.append(metric.flush()) - + self.values = {} return metrics def sample(self, name, value, tags, rate, context_key, max_samples_per_context): From 60143c63c8f5fb2c21ea55f67a8a521fadc115f6 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 17 Jan 2025 16:18:42 -0500 Subject: [PATCH 83/89] locks :) --- datadog/dogstatsd/max_sample_metric.py | 21 +++++++++---------- .../dogstatsd/max_sample_metric_context.py | 8 +++++-- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index 238bc1f9b..f1d5b5939 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -25,18 +25,17 @@ def sample(self, value): self.total_metric_samples += 1 def maybe_keep_sample(self, value): - with self.lock: - if self.max_metric_samples > 0: - self.total_metric_samples += 1 - if self.stored_metric_samples < self.max_metric_samples: - self.data[self.stored_metric_samples] = value - self.stored_metric_samples += 1 - else: - i = random.randint(0, self.total_metric_samples - 1) - if i < self.max_metric_samples: - self.data[i] = value + if self.max_metric_samples > 0: + self.total_metric_samples += 1 + if self.stored_metric_samples < self.max_metric_samples: + self.data[self.stored_metric_samples] = value + self.stored_metric_samples += 1 else: - self.sample(value) + i = random.randint(0, self.total_metric_samples - 1) + if i < self.max_metric_samples: + self.data[i] = value + else: + self.sample(value) def skip_sample(self): self.total_metric_samples += 1 diff --git a/datadog/dogstatsd/max_sample_metric_context.py b/datadog/dogstatsd/max_sample_metric_context.py index f874eccfb..92c53ebf5 100644 --- a/datadog/dogstatsd/max_sample_metric_context.py +++ b/datadog/dogstatsd/max_sample_metric_context.py @@ -11,9 +11,11 @@ def __init__(self, max_sample_metric_type): def flush(self): metrics = [] """Flush the metrics and reset the stored values.""" - for _, metric in self.values.items(): + with self.lock: + temp = self.values + self.values = {} + for _, metric in temp.items(): metrics.append(metric.flush()) - self.values = {} return metrics def sample(self, name, value, tags, rate, context_key, max_samples_per_context): @@ -24,10 +26,12 @@ def sample(self, name, value, tags, rate, context_key, max_samples_per_context): # Create a new metric if it doesn't exist self.values[context_key] = self.max_sample_metric_type(name, tags, rate, max_samples_per_context) metric = self.values[context_key] + metric.lock.acquire() if keeping_sample: metric.maybe_keep_sample(value) else: metric.skip_sample() + metric.lock.release() def should_sample(self, rate): """Determine if a sample should be kept based on the specified rate.""" From 5f3bb5bfc3cb9c380abcdc073a3438c5a26cab25 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 24 Jan 2025 09:20:41 -0500 Subject: [PATCH 84/89] add unsafe to method name --- datadog/dogstatsd/max_sample_metric.py | 2 +- datadog/dogstatsd/max_sample_metric_context.py | 2 +- tests/unit/dogstatsd/test_max_sample_metrics.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index f1d5b5939..d40dbae47 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -24,7 +24,7 @@ def sample(self, value): self.stored_metric_samples += 1 self.total_metric_samples += 1 - def maybe_keep_sample(self, value): + def maybe_keep_sample_work_unsafe(self, value): if self.max_metric_samples > 0: self.total_metric_samples += 1 if self.stored_metric_samples < self.max_metric_samples: diff --git a/datadog/dogstatsd/max_sample_metric_context.py b/datadog/dogstatsd/max_sample_metric_context.py index 92c53ebf5..a7cfe60d3 100644 --- a/datadog/dogstatsd/max_sample_metric_context.py +++ b/datadog/dogstatsd/max_sample_metric_context.py @@ -28,7 +28,7 @@ def sample(self, name, value, tags, rate, context_key, max_samples_per_context): metric = self.values[context_key] metric.lock.acquire() if keeping_sample: - metric.maybe_keep_sample(value) + metric.maybe_keep_sample_work_unsafe(value) else: metric.skip_sample() metric.lock.release() diff --git a/tests/unit/dogstatsd/test_max_sample_metrics.py b/tests/unit/dogstatsd/test_max_sample_metrics.py index c15d9ab07..e3c8b5305 100644 --- a/tests/unit/dogstatsd/test_max_sample_metrics.py +++ b/tests/unit/dogstatsd/test_max_sample_metrics.py @@ -78,11 +78,11 @@ def test_flush_timing_metric_sample(self): self.assertEqual(m.name, "test") self.assertEqual(m.tags, "tag1,tag2") - def test_maybe_keep_sample(self): + def test_maybe_keep_sample_work_unsafe(self): s = HistogramMetric(name="test", tags="tag1,tag2", rate=1.0, max_metric_samples=2) - s.maybe_keep_sample(123) - s.maybe_keep_sample(456) - s.maybe_keep_sample(789) + s.maybe_keep_sample_work_unsafe(123) + s.maybe_keep_sample_work_unsafe(456) + s.maybe_keep_sample_work_unsafe(789) self.assertEqual(len(s.data), 2) self.assertEqual(s.name, "test") From c1bc06d8172e2d7c2a37b1cfdc9beff677f0606b Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 24 Jan 2025 09:26:50 -0500 Subject: [PATCH 85/89] remove prints --- datadog/dogstatsd/base.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/datadog/dogstatsd/base.py b/datadog/dogstatsd/base.py index dd0326eda..1ca4e85f7 100644 --- a/datadog/dogstatsd/base.py +++ b/datadog/dogstatsd/base.py @@ -972,10 +972,8 @@ def histogram( >>> statsd.histogram("album.photo.count", 26, tags=["gender:female"]) """ if not self._disable_aggregation and self.aggregator.max_samples_per_context != 0: - print("Aggregated histogram") self.aggregator.histogram(metric, value, tags, sample_rate) else: - print("Regular histogram") self._report(metric, "h", value, tags, sample_rate) def distribution( From 911b94242739b1438e665f494aad139b7e711bcb Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 24 Jan 2025 09:55:56 -0500 Subject: [PATCH 86/89] change rate calculation --- datadog/dogstatsd/max_sample_metric.py | 3 ++- datadog/dogstatsd/max_sample_metric_context.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index d40dbae47..02cdad8c1 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -41,10 +41,11 @@ def skip_sample(self): self.total_metric_samples += 1 def flush(self): + rate = self.stored_metric_samples / self.total_metric_samples with self.lock: values = [None] * self.stored_metric_samples for i in range(self.stored_metric_samples): - values[i] = MetricAggregator(self.name, self.tags, self.specified_rate, self.metric_type, self.data[i]) + values[i] = MetricAggregator(self.name, self.tags, rate, self.metric_type, self.data[i]) return values diff --git a/datadog/dogstatsd/max_sample_metric_context.py b/datadog/dogstatsd/max_sample_metric_context.py index a7cfe60d3..11b4f9996 100644 --- a/datadog/dogstatsd/max_sample_metric_context.py +++ b/datadog/dogstatsd/max_sample_metric_context.py @@ -24,7 +24,7 @@ def sample(self, name, value, tags, rate, context_key, max_samples_per_context): with self.lock: if context_key not in self.values: # Create a new metric if it doesn't exist - self.values[context_key] = self.max_sample_metric_type(name, tags, rate, max_samples_per_context) + self.values[context_key] = self.max_sample_metric_type(name, tags, max_samples_per_context) metric = self.values[context_key] metric.lock.acquire() if keeping_sample: From cbbb938daa0c9fe9932187908401c2173aba1926 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 24 Jan 2025 11:24:10 -0500 Subject: [PATCH 87/89] use list comprehesion --- datadog/dogstatsd/max_sample_metric.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index 02cdad8c1..f9d046314 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -39,14 +39,15 @@ def maybe_keep_sample_work_unsafe(self, value): def skip_sample(self): self.total_metric_samples += 1 - + def flush(self): rate = self.stored_metric_samples / self.total_metric_samples with self.lock: - values = [None] * self.stored_metric_samples - for i in range(self.stored_metric_samples): - values[i] = MetricAggregator(self.name, self.tags, rate, self.metric_type, self.data[i]) - return values + return [ + MetricAggregator(self.name, self.tags, rate, self.metric_type, self.data[i]) + for i in range(self.stored_metric_samples) + ] + class HistogramMetric(MaxSampleMetric): From d3ba22381224371eaa694d698b0de5761c1d2f1d Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 24 Jan 2025 13:26:58 -0500 Subject: [PATCH 88/89] lint --- datadog/dogstatsd/max_sample_metric.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index f9d046314..45618ff3c 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -39,7 +39,7 @@ def maybe_keep_sample_work_unsafe(self, value): def skip_sample(self): self.total_metric_samples += 1 - + def flush(self): rate = self.stored_metric_samples / self.total_metric_samples with self.lock: @@ -48,8 +48,6 @@ def flush(self): for i in range(self.stored_metric_samples) ] - - class HistogramMetric(MaxSampleMetric): def __init__(self, name, tags, rate=1.0, max_metric_samples=0): super(HistogramMetric, self).__init__(name, tags, MetricType.HISTOGRAM, rate, max_metric_samples) From 737d48b9ec12a22dc19d401befefee28ced6aa53 Mon Sep 17 00:00:00 2001 From: andrewqian2001datadog Date: Fri, 24 Jan 2025 13:38:46 -0500 Subject: [PATCH 89/89] lint x2 --- datadog/dogstatsd/max_sample_metric.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datadog/dogstatsd/max_sample_metric.py b/datadog/dogstatsd/max_sample_metric.py index 45618ff3c..3459eaaae 100644 --- a/datadog/dogstatsd/max_sample_metric.py +++ b/datadog/dogstatsd/max_sample_metric.py @@ -48,6 +48,7 @@ def flush(self): for i in range(self.stored_metric_samples) ] + class HistogramMetric(MaxSampleMetric): def __init__(self, name, tags, rate=1.0, max_metric_samples=0): super(HistogramMetric, self).__init__(name, tags, MetricType.HISTOGRAM, rate, max_metric_samples)