Skip to content

Commit 6b30e27

Browse files
committed
Add initial monitors
1 parent 986fd38 commit 6b30e27

18 files changed

+641
-0
lines changed

.terraform-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1.0.3

.terraform.lock.hcl

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

batches-compiled-percent-variables.tf

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
variable "batches_compiled_percent_enabled" {
2+
type = bool
3+
default = true
4+
}
5+
6+
variable "batches_compiled_percent_warning" {
7+
type = number
8+
default = 10
9+
}
10+
11+
variable "batches_compiled_percent_critical" {
12+
type = number
13+
default = 20
14+
}
15+
16+
variable "batches_compiled_percent_evaluation_period" {
17+
type = string
18+
default = "last_1d"
19+
}
20+
21+
variable "batches_compiled_percent_note" {
22+
type = string
23+
default = ""
24+
}
25+
26+
variable "batches_compiled_percent_docs" {
27+
type = string
28+
default = <<-EOT
29+
When this metric is high, a lot of queries need to be recompiled. Consider parameterizing more queries by using stored procedures, using forced parameterization or allocating more memory.
30+
EOT
31+
}
32+
33+
variable "batches_compiled_percent_filter_override" {
34+
type = string
35+
default = ""
36+
}
37+
38+
variable "batches_compiled_percent_alerting_enabled" {
39+
type = bool
40+
default = true
41+
}
42+
43+
variable "batches_compiled_percent_priority" {
44+
description = "Number from 1 (high) to 5 (low)."
45+
46+
type = number
47+
default = 4
48+
}

batches-compiled-percent.tf

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
locals {
2+
batches_compiled_percent_filter = coalesce(
3+
var.batches_compiled_percent_filter_override,
4+
var.filter_str
5+
)
6+
}
7+
8+
module "batches_compiled_percent" {
9+
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.6.2"
10+
11+
name = "SQL Server - Percentage of batches requiring compilation"
12+
query = "avg(${var.batches_compiled_percent_evaluation_period}):(max:sqlserver.stats.sql_compilations{${local.batches_compiled_percent_filter}} by {host} / max:sqlserver.stats.batch_requests{${local.batches_compiled_percent_filter}} by {host}) * 100 >= ${var.batches_compiled_percent_critical}"
13+
14+
alert_message = "Database {{database.name}} on {{host.name}} has a high percent of batches requiring compilation"
15+
recovery_message = "Database {{database.name}} on {{host.name}} has a normal percent of batches requiring compilation"
16+
17+
# monitor level vars
18+
enabled = var.batches_compiled_percent_enabled
19+
alerting_enabled = var.batches_compiled_percent_alerting_enabled
20+
warning_threshold = var.batches_compiled_percent_warning
21+
critical_threshold = var.batches_compiled_percent_critical
22+
priority = var.batches_compiled_percent_priority
23+
docs = var.batches_compiled_percent_docs
24+
note = var.batches_compiled_percent_note
25+
26+
# module level vars
27+
env = var.alert_env
28+
service = var.service
29+
notification_channel = var.notification_channel
30+
additional_tags = var.additional_tags
31+
locked = var.locked
32+
name_prefix = var.name_prefix
33+
name_suffix = var.name_suffix
34+
}

buffer-cache-hit-ratio-variables.tf

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
variable "buffer_cache_hit_ratio_enabled" {
2+
type = bool
3+
default = true
4+
}
5+
6+
variable "buffer_cache_hit_ratio_warning" {
7+
type = number
8+
default = 90
9+
}
10+
11+
variable "buffer_cache_hit_ratio_critical" {
12+
type = number
13+
default = 75
14+
}
15+
16+
variable "buffer_cache_hit_ratio_evaluation_period" {
17+
type = string
18+
default = "last_1d"
19+
}
20+
21+
variable "buffer_cache_hit_ratio_note" {
22+
type = string
23+
default = ""
24+
}
25+
26+
variable "buffer_cache_hit_ratio_docs" {
27+
type = string
28+
default = <<-EOT
29+
When this metric is low, pages are often read from disk. Consider allocating more memory.
30+
EOT
31+
}
32+
33+
variable "buffer_cache_hit_ratio_filter_override" {
34+
type = string
35+
default = ""
36+
}
37+
38+
variable "buffer_cache_hit_ratio_alerting_enabled" {
39+
type = bool
40+
default = true
41+
}
42+
43+
variable "buffer_cache_hit_ratio_priority" {
44+
description = "Number from 1 (high) to 5 (low)."
45+
46+
type = number
47+
default = 4
48+
}

buffer-cache-hit-ratio.tf

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
locals {
2+
buffer_cache_hit_ratio_filter = coalesce(
3+
var.buffer_cache_hit_ratio_filter_override,
4+
var.filter_str
5+
)
6+
}
7+
8+
module "buffer_cache_hit_ratio" {
9+
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.6.2"
10+
11+
name = "SQL Server - Buffer cache hit ratio"
12+
query = "avg(${var.buffer_cache_hit_ratio_evaluation_period}):min:sqlserver.buffer.cache_hit_ratio{${local.buffer_cache_hit_ratio_filter}} by {host} * 100 < ${var.buffer_cache_hit_ratio_critical}"
13+
14+
alert_message = "Buffer cache hit ratio on {{host.name}} has increased above {{threshold}} ({{value}})"
15+
recovery_message = "Buffer cache hit ratio on {{host.name}} has recovered ({{value}})"
16+
17+
# monitor level vars
18+
enabled = var.buffer_cache_hit_ratio_enabled
19+
alerting_enabled = var.buffer_cache_hit_ratio_alerting_enabled
20+
warning_threshold = var.buffer_cache_hit_ratio_warning
21+
critical_threshold = var.buffer_cache_hit_ratio_critical
22+
priority = var.buffer_cache_hit_ratio_priority
23+
docs = var.buffer_cache_hit_ratio_docs
24+
note = var.buffer_cache_hit_ratio_note
25+
26+
# module level vars
27+
env = var.alert_env
28+
service = var.service
29+
notification_channel = var.notification_channel
30+
additional_tags = var.additional_tags
31+
locked = var.locked
32+
name_prefix = var.name_prefix
33+
name_suffix = var.name_suffix
34+
}

can-connect-variables.tf

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
variable "can_connect_enabled" {
2+
type = bool
3+
default = true
4+
}
5+
6+
variable "can_connect_alerting_enabled" {
7+
type = bool
8+
default = true
9+
}
10+
11+
variable "can_connect_warning" {
12+
type = number
13+
default = 1
14+
}
15+
16+
variable "can_connect_critical" {
17+
type = number
18+
default = 1
19+
}
20+
21+
variable "can_connect_priority" {
22+
type = number
23+
default = 1
24+
}
25+
26+
variable "can_connect_docs" {
27+
type = string
28+
default = ""
29+
}
30+
31+
variable "can_connect_note" {
32+
type = string
33+
default = ""
34+
}

can-connect.tf

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
module "can_connect" {
2+
source = "[email protected]:kabisa/terraform-datadog-service-check-monitor.git?ref=1.2.0"
3+
4+
name = "SQL Server - Can connect"
5+
check_name = "sqlserver.can_connect"
6+
by_tags = ["host", "db"]
7+
include_tags = local.service_check_include_tags
8+
exclude_tags = local.service_check_exclude_tags
9+
10+
alert_message = "Can not connect to database {{database.name}} on {{host.name}}"
11+
recovery_message = "Can connect to database {{database.name}} on {{host.name}}"
12+
13+
# monitor level vars
14+
enabled = var.can_connect_enabled
15+
alerting_enabled = var.can_connect_alerting_enabled
16+
warning_threshold = var.can_connect_warning
17+
critical_threshold = var.can_connect_critical
18+
priority = var.can_connect_priority
19+
docs = var.can_connect_docs
20+
note = var.can_connect_note
21+
22+
# module level vars
23+
env = var.alert_env
24+
service = var.service
25+
notification_channel = var.notification_channel
26+
additional_tags = var.additional_tags
27+
locked = var.locked
28+
name_prefix = var.name_prefix
29+
name_suffix = var.name_suffix
30+
}

database-state-variables.tf

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
variable "database_state_enabled" {
2+
type = bool
3+
default = true
4+
}
5+
6+
variable "database_state_warning" {
7+
type = number
8+
default = 1
9+
}
10+
11+
variable "database_state_critical" {
12+
type = number
13+
default = 5
14+
}
15+
16+
variable "database_state_evaluation_period" {
17+
type = string
18+
default = "last_5m"
19+
}
20+
21+
variable "database_state_note" {
22+
type = string
23+
default = ""
24+
}
25+
26+
variable "database_state_docs" {
27+
type = string
28+
default = ""
29+
}
30+
31+
variable "database_state_filter_override" {
32+
type = string
33+
default = ""
34+
}
35+
36+
variable "database_state_alerting_enabled" {
37+
type = bool
38+
default = true
39+
}
40+
41+
variable "database_state_priority" {
42+
description = "Number from 1 (high) to 5 (low)."
43+
44+
type = number
45+
default = 1
46+
}

database-state.tf

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
locals {
2+
database_state_filter = coalesce(
3+
var.database_state_filter_override,
4+
var.filter_str
5+
)
6+
}
7+
8+
module "database_state" {
9+
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.6.2"
10+
11+
name = "SQL Server - Database state"
12+
query = "max(${var.database_state_evaluation_period}):max:sqlserver.database.state{${local.database_state_filter}} by {host,database,database_state_desc} >= ${var.database_state_critical}"
13+
14+
alert_message = "Database {{database.name}} on {{host.name}} is in abnormal state {{database_state_desc.name}}"
15+
recovery_message = "Database {{database.name}} on {{host.name}} is in back in state {{database_state_desc.name}}"
16+
17+
# monitor level vars
18+
enabled = var.database_state_enabled
19+
alerting_enabled = var.database_state_alerting_enabled
20+
warning_threshold = var.database_state_warning
21+
critical_threshold = var.database_state_critical
22+
priority = var.database_state_priority
23+
docs = var.database_state_docs
24+
note = var.database_state_note
25+
26+
# module level vars
27+
env = var.alert_env
28+
service = var.service
29+
notification_channel = var.notification_channel
30+
additional_tags = var.additional_tags
31+
locked = var.locked
32+
name_prefix = var.name_prefix
33+
name_suffix = var.name_suffix
34+
}

0 commit comments

Comments
 (0)