Skip to content

Commit fc626f2

Browse files
authored
Add function app 4xx and 5xx alerting (#250)
1 parent 269a046 commit fc626f2

File tree

2 files changed

+140
-15
lines changed

2 files changed

+140
-15
lines changed
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# 4xx Error Alert for Azure Function App - these are client errors, usually invalid requests (400) or authentication issues (401, 403)
2+
resource "azurerm_monitor_metric_alert" "function_4xx" {
3+
count = var.enable_alerting == true ? 1 : 0
4+
5+
name = "${azurerm_linux_function_app.function_app.name}-4xx-errors"
6+
resource_group_name = var.resource_group_name_monitoring != null ? var.resource_group_name_monitoring : var.resource_group_name
7+
scopes = [azurerm_linux_function_app.function_app.id] # Point to your function app
8+
description = "Action will be triggered when 4xx errors exceed ${var.alert_4xx_threshold}"
9+
window_size = var.alert_window_size
10+
frequency = local.alert_frequency
11+
severity = 2 # Warning
12+
13+
criteria {
14+
metric_namespace = "Microsoft.Web/sites"
15+
metric_name = "Http4xx"
16+
aggregation = "Total" # Count total 4xx errors
17+
operator = "GreaterThan"
18+
threshold = var.alert_4xx_threshold
19+
}
20+
21+
action {
22+
action_group_id = var.action_group_id
23+
}
24+
25+
lifecycle {
26+
ignore_changes = [
27+
tags
28+
]
29+
}
30+
}
31+
32+
# 5xx error alert - these are server errors and more serious than 4xx errors
33+
resource "azurerm_monitor_metric_alert" "function_5xx" {
34+
count = var.enable_alerting == true ? 1 : 0
35+
36+
name = "${azurerm_linux_function_app.function_app.name}-5xx-errors"
37+
resource_group_name = var.resource_group_name_monitoring != null ? var.resource_group_name_monitoring : var.resource_group_name
38+
scopes = [azurerm_linux_function_app.function_app.id] # Point to your function app
39+
description = "Action will be triggered when 5xx errors exceed ${var.alert_5xx_threshold}"
40+
window_size = var.alert_window_size
41+
frequency = local.alert_frequency
42+
severity = 2 # Warning
43+
44+
criteria {
45+
metric_namespace = "Microsoft.Web/sites"
46+
metric_name = "Http5xx"
47+
aggregation = "Total" # Count total 5xx errors
48+
operator = "GreaterThan"
49+
threshold = var.alert_5xx_threshold
50+
}
51+
52+
action {
53+
action_group_id = var.action_group_id
54+
}
55+
56+
lifecycle {
57+
ignore_changes = [
58+
tags
59+
]
60+
}
61+
}

infrastructure/modules/function-app/variables.tf

Lines changed: 79 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -161,11 +161,6 @@ variable "location" {
161161
description = "The location/region where the Function App is created."
162162
}
163163

164-
variable "log_analytics_workspace_id" {
165-
type = string
166-
description = "id of the log analytics workspace to send resource logging to via diagnostic settings"
167-
}
168-
169164
variable "minimum_tls_version" {
170165
type = string
171166
default = "1.2" # Possible versions: TLS1.0", "TLS1.1", "TLS1.2
@@ -182,16 +177,6 @@ variable "http2_enabled" {
182177
default = false
183178
}
184179

185-
variable "monitor_diagnostic_setting_function_app_enabled_logs" {
186-
type = list(string)
187-
description = "Controls what logs will be enabled for the function app"
188-
}
189-
190-
variable "monitor_diagnostic_setting_function_app_metrics" {
191-
type = list(string)
192-
description = "Controls what metrics will be enabled for the function app"
193-
}
194-
195180
variable "private_endpoint_properties" {
196181
description = "Consolidated properties for the Function App Private Endpoint."
197182
type = object({
@@ -277,3 +262,82 @@ variable "worker_32bit" {
277262
type = bool
278263
description = "Should the Windows Function App use a 32-bit worker process. Defaults to true"
279264
}
265+
266+
267+
/* --------------------------------------------------------------------------------------------------
268+
Monitoring and Diagnostics Variables
269+
-------------------------------------------------------------------------------------------------- */
270+
271+
variable "resource_group_name_monitoring" {
272+
type = string
273+
description = "The name of the resource group in which to create the Monitoring resources for the App Service Plan. Changing this forces a new resource to be created."
274+
default = null
275+
}
276+
277+
variable "action_group_id" {
278+
type = string
279+
description = "The ID of the Action Group to use for alerts."
280+
default = null
281+
}
282+
283+
variable "alert_4xx_threshold" {
284+
type = number
285+
description = "The threshold for 4xx errors to trigger the alert."
286+
default = 10
287+
}
288+
289+
variable "alert_5xx_threshold" {
290+
type = number
291+
description = "The threshold for 4xx errors to trigger the alert."
292+
default = 10
293+
}
294+
295+
variable "alert_window_size" {
296+
type = string
297+
nullable = false
298+
default = "PT5M"
299+
validation {
300+
condition = contains(["PT1M", "PT5M", "PT15M", "PT30M", "PT1H", "PT6H", "PT12H"], var.alert_window_size)
301+
error_message = "The alert_window_size must be one of: PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H"
302+
}
303+
description = "The period of time that is used to monitor alert activity e.g. PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H. The interval between checks is adjusted accordingly."
304+
}
305+
306+
variable "enable_alerting" {
307+
description = "Whether monitoring and alerting is enabled for the App Service Plan."
308+
type = bool
309+
default = false
310+
}
311+
312+
variable "log_analytics_workspace_id" {
313+
type = string
314+
description = "id of the log analytics workspace to send resource logging to via diagnostic settings"
315+
}
316+
317+
variable "monitor_diagnostic_setting_function_app_enabled_logs" {
318+
type = list(string)
319+
description = "Controls what logs will be enabled for the function app"
320+
}
321+
322+
variable "monitor_diagnostic_setting_function_app_metrics" {
323+
type = list(string)
324+
description = "Controls what metrics will be enabled for the function app"
325+
}
326+
327+
variable "severity" {
328+
type = number
329+
description = "Severity of the alert. 0 = Critical, 1 = Error, 2 = Warning, 3 = Informational, 4 = Verbose. Default is 3."
330+
default = 3
331+
}
332+
333+
locals {
334+
alert_frequency_map = {
335+
PT5M = "PT1M"
336+
PT15M = "PT1M"
337+
PT30M = "PT1M"
338+
PT1H = "PT1M"
339+
PT6H = "PT5M"
340+
PT12H = "PT5M"
341+
}
342+
alert_frequency = local.alert_frequency_map[var.alert_window_size]
343+
}

0 commit comments

Comments
 (0)