From 61b231d5fe8c80774091f5c3de3b992087bb37dd Mon Sep 17 00:00:00 2001 From: Jordan Conway Date: Tue, 29 Jul 2025 13:23:03 -0400 Subject: [PATCH 1/2] Updates to un-drift manual configs Signed-off-by: Jordan Conway --- integrations.tf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/integrations.tf b/integrations.tf index 4cdd3a3..6de4c15 100644 --- a/integrations.tf +++ b/integrations.tf @@ -16,7 +16,12 @@ resource "datadog_integration_aws_account" "pytorch" { lambda_forwarder {} } metrics_config { - namespace_filters {} + namespace_filters { + exclude_only = [ + "AWS/SQS", + "AWS/ElasticMapReduce", + ] + } } resources_config {} traces_config { From 8e4cbb1750870bc8afd47586654cb197e6428f60 Mon Sep 17 00:00:00 2001 From: Jordan Conway Date: Tue, 29 Jul 2025 14:09:38 -0400 Subject: [PATCH 2/2] Fixup monitor import from LF account Signed-off-by: Jordan Conway --- monitors.tf | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/monitors.tf b/monitors.tf index 3837027..5ae8a1b 100644 --- a/monitors.tf +++ b/monitors.tf @@ -28,33 +28,30 @@ resource "datadog_monitor" "ci_retry_deadletter" { resource "datadog_monitor" "all_queues_anomaly" { - name = "Queue **{{queuename.name}}** has a high number of visible messages" - message = <<-MSG - The number of visible messages in `{{queuename.name}}` is outside of the typical range. - MSG - priority = 5 - - type = "query alert" - query = <<-QUERY - avg(last_1w): - anomalies( - avg:aws.sqs.approximate_number_of_messages_visible{project:pytorch/pytorch} by {queuename,region}, - 'basic', 2, direction='both', interval=3600, alert_window='last_1d', count_default_zero='true' - ) >= 1 - QUERY - - include_tags = true - on_missing_data = "default" + evaluation_delay = 900 require_full_window = false - + monitor_thresholds { + critical = 1 + critical_recovery = 0 + warning = 0.9 + } monitor_threshold_windows { recovery_window = "last_15m" trigger_window = "last_1d" } - - monitor_thresholds { - critical = "1" - critical_recovery = "0" - warning = "0.9" - } + name = "Queue **{{queuename.name}}** has a high number of visible messages" + type = "query alert" + priority = 5 + query = <= 1 +EOT + message = <