Skip to content

Commit 6032c6d

Browse files
authored
feat: Default scrape interval (#49)
* Enable higher but configurable scrape intervals * Use global parameters per job level * Remove scrape limit
1 parent 27e9cc8 commit 6032c6d

File tree

7 files changed

+115
-69
lines changed

7 files changed

+115
-69
lines changed

examples/existing-cluster-with-base-and-infra/main.tf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ module "workloads_infra" {
8585
managed_prometheus_workspace_endpoint = module.eks_observability_accelerator.managed_prometheus_workspace_endpoint
8686
managed_prometheus_workspace_region = module.eks_observability_accelerator.managed_prometheus_workspace_region
8787

88+
# optional, defaults to 60s interval and 15s timeout
89+
prometheus_config = {
90+
global_scrape_interval = "60s"
91+
global_scrape_timeout = "15s"
92+
}
93+
8894
tags = local.tags
8995

9096
depends_on = [

modules/workloads/infra/README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,12 @@ This module is inspired from the open source [kube-prometheus-stack](https://git
6666
| <a name="input_helm_config"></a> [helm\_config](#input\_helm\_config) | Helm Config for Prometheus | `any` | `{}` | no |
6767
| <a name="input_irsa_iam_permissions_boundary"></a> [irsa\_iam\_permissions\_boundary](#input\_irsa\_iam\_permissions\_boundary) | IAM permissions boundary for IRSA roles | `string` | `""` | no |
6868
| <a name="input_irsa_iam_role_path"></a> [irsa\_iam\_role\_path](#input\_irsa\_iam\_role\_path) | IAM role path for IRSA roles | `string` | `"/"` | no |
69-
| <a name="input_ksm_config"></a> [ksm\_config](#input\_ksm\_config) | Kube State metrics configuration | <pre>object({<br> create_namespace = bool<br> k8s_namespace = string<br> helm_chart_name = string<br> helm_chart_version = string<br> helm_release_name = string<br> helm_repo_url = string<br> helm_settings = map(string)<br> helm_values = map(any)<br> })</pre> | <pre>{<br> "create_namespace": true,<br> "helm_chart_name": "kube-state-metrics",<br> "helm_chart_version": "4.16.0",<br> "helm_release_name": "kube-state-metrics",<br> "helm_repo_url": "https://prometheus-community.github.io/helm-charts",<br> "helm_settings": {},<br> "helm_values": {},<br> "k8s_namespace": "kube-system"<br>}</pre> | no |
69+
| <a name="input_ksm_config"></a> [ksm\_config](#input\_ksm\_config) | Kube State metrics configuration | <pre>object({<br> create_namespace = bool<br> k8s_namespace = string<br> helm_chart_name = string<br> helm_chart_version = string<br> helm_release_name = string<br> helm_repo_url = string<br> helm_settings = map(string)<br> helm_values = map(any)<br><br> scrape_interval = string<br> scrape_timeout = string<br> })</pre> | <pre>{<br> "create_namespace": true,<br> "helm_chart_name": "kube-state-metrics",<br> "helm_chart_version": "4.16.0",<br> "helm_release_name": "kube-state-metrics",<br> "helm_repo_url": "https://prometheus-community.github.io/helm-charts",<br> "helm_settings": {},<br> "helm_values": {},<br> "k8s_namespace": "kube-system",<br> "scrape_interval": "60s",<br> "scrape_timeout": "15s"<br>}</pre> | no |
7070
| <a name="input_managed_prometheus_workspace_endpoint"></a> [managed\_prometheus\_workspace\_endpoint](#input\_managed\_prometheus\_workspace\_endpoint) | Amazon Managed Prometheus Workspace Endpoint | `string` | `null` | no |
7171
| <a name="input_managed_prometheus_workspace_id"></a> [managed\_prometheus\_workspace\_id](#input\_managed\_prometheus\_workspace\_id) | Amazon Managed Prometheus Workspace ID | `string` | `null` | no |
7272
| <a name="input_managed_prometheus_workspace_region"></a> [managed\_prometheus\_workspace\_region](#input\_managed\_prometheus\_workspace\_region) | Amazon Managed Prometheus Workspace's Region | `string` | `null` | no |
73-
| <a name="input_ne_config"></a> [ne\_config](#input\_ne\_config) | Node exporter configuration | <pre>object({<br> create_namespace = bool<br> k8s_namespace = string<br> helm_chart_name = string<br> helm_chart_version = string<br> helm_release_name = string<br> helm_repo_url = string<br> helm_settings = map(string)<br> helm_values = map(any)<br> })</pre> | <pre>{<br> "create_namespace": true,<br> "helm_chart_name": "prometheus-node-exporter",<br> "helm_chart_version": "2.0.3",<br> "helm_release_name": "prometheus-node-exporter",<br> "helm_repo_url": "https://prometheus-community.github.io/helm-charts",<br> "helm_settings": {},<br> "helm_values": {},<br> "k8s_namespace": "prometheus-node-exporter"<br>}</pre> | no |
73+
| <a name="input_ne_config"></a> [ne\_config](#input\_ne\_config) | Node exporter configuration | <pre>object({<br> create_namespace = bool<br> k8s_namespace = string<br> helm_chart_name = string<br> helm_chart_version = string<br> helm_release_name = string<br> helm_repo_url = string<br> helm_settings = map(string)<br> helm_values = map(any)<br><br> scrape_interval = string<br> scrape_timeout = string<br> })</pre> | <pre>{<br> "create_namespace": true,<br> "helm_chart_name": "prometheus-node-exporter",<br> "helm_chart_version": "2.0.3",<br> "helm_release_name": "prometheus-node-exporter",<br> "helm_repo_url": "https://prometheus-community.github.io/helm-charts",<br> "helm_settings": {},<br> "helm_values": {},<br> "k8s_namespace": "prometheus-node-exporter",<br> "scrape_interval": "60s",<br> "scrape_timeout": "60s"<br>}</pre> | no |
74+
| <a name="input_prometheus_config"></a> [prometheus\_config](#input\_prometheus\_config) | Controls default values such as scrape interval, timeouts and ports globally | <pre>object({<br> global_scrape_interval = string<br> global_scrape_timeout = string<br> })</pre> | <pre>{<br> "global_scrape_interval": "60s",<br> "global_scrape_timeout": "15s"<br>}</pre> | no |
7475
| <a name="input_tags"></a> [tags](#input\_tags) | Additional tags (e.g. `map('BusinessUnit`,`XYZ`) | `map(string)` | `{}` | no |
7576

7677
## Outputs

modules/workloads/infra/main.tf

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ module "helm_addon" {
4141
{
4242
name = local.name
4343
chart = "${path.module}/otel-config"
44-
version = "0.2.0"
44+
version = "0.3.0"
4545
namespace = local.namespace
4646
description = "ADOT helm Chart deployment configuration"
4747
},
@@ -58,28 +58,16 @@ module "helm_addon" {
5858
value = var.managed_prometheus_workspace_region
5959
},
6060
{
61-
name = "prometheusMetricsEndpoint"
62-
value = "metrics"
63-
},
64-
{
65-
name = "prometheusMetricsPort"
66-
value = 8888
67-
},
68-
{
69-
name = "scrapeInterval"
70-
value = "15s"
71-
},
72-
{
73-
name = "scrapeTimeout"
74-
value = "10s"
61+
name = "ekscluster"
62+
value = local.context.eks_cluster_id
7563
},
7664
{
77-
name = "scrapeSampleLimit"
78-
value = 1000
65+
name = "globalScrapeInterval"
66+
value = var.prometheus_config.global_scrape_interval
7967
},
8068
{
81-
name = "ekscluster"
82-
value = local.context.eks_cluster_id
69+
name = "globalScrapeTimeout"
70+
value = var.prometheus_config.global_scrape_timeout
8371
},
8472
]
8573

modules/workloads/infra/otel-config/templates/opentelemetrycollector.yaml

Lines changed: 32 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@ spec:
1111
prometheus:
1212
config:
1313
global:
14-
scrape_interval: {{ .Values.scrapeInterval }}
15-
scrape_timeout: {{ .Values.scrapeTimeout }}
14+
scrape_interval: {{ .Values.globalScrapeInterval }}
15+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
1616
external_labels:
1717
cluster: {{ .Values.ekscluster }}
1818
scrape_configs:
1919
- job_name: 'kubernetes-kubelet'
20+
scrape_interval: {{ .Values.globalScrapeInterval }}
21+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
2022
scheme: https
2123
tls_config:
2224
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
@@ -52,9 +54,8 @@ spec:
5254
replacement: /api/v1/nodes/$${1}/proxy/metrics/cadvisor
5355
- job_name: serviceMonitor/default/kube-prometheus-stack-prometheus-node-exporter/0
5456
honor_timestamps: true
55-
scrape_interval: 30s
56-
scrape_timeout: 10s
57-
metrics_path: /metrics
57+
scrape_interval: {{ .Values.globalScrapeInterval }}
58+
scrape_timeout: {{ .Values.globalScrapeInterval }}
5859
scheme: http
5960
follow_redirects: true
6061
enable_http2: true
@@ -156,9 +157,8 @@ spec:
156157
- default
157158
- job_name: serviceMonitor/default/kube-prometheus-stack-prometheus/0
158159
honor_timestamps: true
159-
scrape_interval: 30s
160-
scrape_timeout: 10s
161-
metrics_path: /metrics
160+
scrape_interval: {{ .Values.globalScrapeInterval }}
161+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
162162
scheme: http
163163
follow_redirects: true
164164
enable_http2: true
@@ -260,9 +260,8 @@ spec:
260260
- job_name: serviceMonitor/default/kube-prometheus-stack-operator/0
261261
honor_labels: true
262262
honor_timestamps: true
263-
scrape_interval: 30s
264-
scrape_timeout: 10s
265-
metrics_path: /metrics
263+
scrape_interval: {{ .Values.globalScrapeInterval }}
264+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
266265
scheme: https
267266
tls_config:
268267
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
@@ -362,8 +361,8 @@ spec:
362361
- job_name: serviceMonitor/default/kube-prometheus-stack-kubelet/2
363362
honor_labels: true
364363
honor_timestamps: true
365-
scrape_interval: 30s
366-
scrape_timeout: 10s
364+
scrape_interval: {{ .Values.globalScrapeInterval }}
365+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
367366
metrics_path: /metrics/probes
368367
scheme: https
369368
authorization:
@@ -479,8 +478,8 @@ spec:
479478
- job_name: serviceMonitor/default/kube-prometheus-stack-kubelet/1
480479
honor_labels: true
481480
honor_timestamps: true
482-
scrape_interval: 30s
483-
scrape_timeout: 10s
481+
scrape_interval: {{ .Values.globalScrapeInterval }}
482+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
484483
metrics_path: /metrics/cadvisor
485484
scheme: https
486485
authorization:
@@ -596,9 +595,8 @@ spec:
596595
- job_name: serviceMonitor/default/kube-prometheus-stack-kubelet/0
597596
honor_labels: true
598597
honor_timestamps: true
599-
scrape_interval: 30s
600-
scrape_timeout: 10s
601-
metrics_path: /metrics
598+
scrape_interval: {{ .Values.globalScrapeInterval }}
599+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
602600
scheme: https
603601
authorization:
604602
type: Bearer
@@ -713,9 +711,8 @@ spec:
713711
- job_name: serviceMonitor/default/kube-prometheus-stack-kube-state-metrics/0
714712
honor_labels: true
715713
honor_timestamps: true
716-
scrape_interval: 30s
717-
scrape_timeout: 10s
718-
metrics_path: /metrics
714+
scrape_interval: {{ .Values.globalScrapeInterval }}
715+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
719716
scheme: http
720717
follow_redirects: true
721718
enable_http2: true
@@ -817,9 +814,8 @@ spec:
817814
- default
818815
- job_name: serviceMonitor/default/kube-prometheus-stack-kube-scheduler/0
819816
honor_timestamps: true
820-
scrape_interval: 30s
821-
scrape_timeout: 10s
822-
metrics_path: /metrics
817+
scrape_interval: {{ .Values.globalScrapeInterval }}
818+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
823819
scheme: http
824820
authorization:
825821
type: Bearer
@@ -924,9 +920,8 @@ spec:
924920
- kube-system
925921
- job_name: serviceMonitor/default/kube-prometheus-stack-kube-proxy/0
926922
honor_timestamps: true
927-
scrape_interval: 30s
928-
scrape_timeout: 10s
929-
metrics_path: /metrics
923+
scrape_interval: {{ .Values.globalScrapeInterval }}
924+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
930925
scheme: http
931926
authorization:
932927
type: Bearer
@@ -1031,9 +1026,8 @@ spec:
10311026
- kube-system
10321027
- job_name: serviceMonitor/default/kube-prometheus-stack-kube-etcd/0
10331028
honor_timestamps: true
1034-
scrape_interval: 30s
1035-
scrape_timeout: 10s
1036-
metrics_path: /metrics
1029+
scrape_interval: {{ .Values.globalScrapeInterval }}
1030+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
10371031
scheme: http
10381032
authorization:
10391033
type: Bearer
@@ -1138,9 +1132,8 @@ spec:
11381132
- kube-system
11391133
- job_name: serviceMonitor/default/kube-prometheus-stack-kube-controller-manager/0
11401134
honor_timestamps: true
1141-
scrape_interval: 30s
1142-
scrape_timeout: 10s
1143-
metrics_path: /metrics
1135+
scrape_interval: {{ .Values.globalScrapeInterval }}
1136+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
11441137
scheme: http
11451138
authorization:
11461139
type: Bearer
@@ -1245,9 +1238,8 @@ spec:
12451238
- kube-system
12461239
- job_name: serviceMonitor/default/kube-prometheus-stack-coredns/0
12471240
honor_timestamps: true
1248-
scrape_interval: 30s
1249-
scrape_timeout: 10s
1250-
metrics_path: /metrics
1241+
scrape_interval: {{ .Values.globalScrapeInterval }}
1242+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
12511243
scheme: http
12521244
authorization:
12531245
type: Bearer
@@ -1350,9 +1342,8 @@ spec:
13501342
- kube-system
13511343
- job_name: serviceMonitor/default/kube-prometheus-stack-apiserver/0
13521344
honor_timestamps: true
1353-
scrape_interval: 30s
1354-
scrape_timeout: 10s
1355-
metrics_path: /metrics
1345+
scrape_interval: {{ .Values.globalScrapeInterval }}
1346+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
13561347
scheme: https
13571348
authorization:
13581349
type: Bearer
@@ -1455,9 +1446,8 @@ spec:
14551446
- default
14561447
- job_name: serviceMonitor/default/kube-prometheus-stack-alertmanager/0
14571448
honor_timestamps: true
1458-
scrape_interval: 30s
1459-
scrape_timeout: 10s
1460-
metrics_path: /metrics
1449+
scrape_interval: {{ .Values.globalScrapeInterval }}
1450+
scrape_timeout: {{ .Values.globalScrapeTimeout }}
14611451
scheme: http
14621452
follow_redirects: true
14631453
enable_http2: true
Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,43 @@
11
ampurl: ${amp_url}
22
region: ${region}
3-
prometheusMetricsEndpoint: ${prometheus_metrics_endpoint}
4-
prometheusMetricsPort: ${prometheus_metrics_port}
5-
scrapeInterval: ${scrape_interval}
6-
scrapeTimeout: ${scrape_timeout}
7-
scrapeSampleLimit: ${scrape_sample_limit}
83
ekscluster: ${eks_cluster}
4+
5+
globalScrapeTimeout: ${global_scrape_timeout}
6+
globalScrapeSampleLimit: ${global_scrape_sample_limit}
7+
# TODO: enable after terraform 1.3 as defaults will be optional
8+
9+
#nodeExporterScrapeInterval: ${node_exporter_scrape_interval}
10+
#nodeExporterScrapeTimeout: ${node_exporter_scrape_timeout}
11+
12+
#kubeletScrapeInterval: ${kubelet_scrape_interval}
13+
#kubeletScrapeTimeout: ${kubelet_scrape_timeout}
14+
15+
#operatorScrapeInterval: ${operator_scrape_interval}
16+
#operatorScrapeTimeout: ${operator_scrape_timeout}
17+
18+
#kubeletScrapeInterval: ${operator_scrape_interval}
19+
#kubeletScrapeTimeout: ${operator_scrape_timeout}
20+
21+
#ksmScrapeInterval: ${ksm_scrape_interval}
22+
#ksmScrapeTimeout: ${ksm_scrape_timeout}
23+
24+
#schedulerScrapeInterval: ${scheduler_scrape_interval}
25+
#schedulerScrapeTimeout: ${scheduler_scrape_timeout}
26+
27+
#proxyScrapeInterval: ${proxy_scrape_interval}
28+
#proxyScrapeTimeout: ${proxy_scrape_timeout}
29+
30+
#etcdScrapeInterval: ${etcd_scrape_interval}
31+
#etcdScrapeTimeout: ${etcd_scrape_timeout}
32+
33+
#controllerManagerScrapeInterval: ${controller_manager_scrape_interval}
34+
#controllerManagerScrapeTimeout: ${controller_manager_scrape_timeout}
35+
36+
#corednsScrapeInterval: ${coredns_scrape_interval}
37+
#corednsScrapeTimeout: ${coredns_scrape_timeout}
38+
39+
#apiserverScrapeInterval: ${apiserver_scrape_interval}
40+
#apiserverScrapeTimeout: ${apiserver_scrape_timeout}
41+
42+
#alertmanagerScrapeInterval: ${alertmnager_scrape_interval}
43+
#alertmanagerScrapeTimeout: ${alertmnager_scrape_timeout}

modules/workloads/infra/variables.tf

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ variable "ksm_config" {
7878
helm_repo_url = string
7979
helm_settings = map(string)
8080
helm_values = map(any)
81+
82+
scrape_interval = string
83+
scrape_timeout = string
8184
})
8285

8386
default = {
@@ -89,6 +92,9 @@ variable "ksm_config" {
8992
helm_settings = {}
9093
helm_values = {}
9194
k8s_namespace = "kube-system"
95+
96+
scrape_interval = "60s"
97+
scrape_timeout = "15s"
9298
}
9399
nullable = false
94100
}
@@ -110,6 +116,9 @@ variable "ne_config" {
110116
helm_repo_url = string
111117
helm_settings = map(string)
112118
helm_values = map(any)
119+
120+
scrape_interval = string
121+
scrape_timeout = string
113122
})
114123

115124
default = {
@@ -121,11 +130,29 @@ variable "ne_config" {
121130
helm_settings = {}
122131
helm_values = {}
123132
k8s_namespace = "prometheus-node-exporter"
133+
134+
scrape_interval = "60s"
135+
scrape_timeout = "60s"
124136
}
125137
nullable = false
126138
}
139+
127140
variable "tags" {
128141
description = "Additional tags (e.g. `map('BusinessUnit`,`XYZ`)"
129142
type = map(string)
130143
default = {}
131144
}
145+
146+
variable "prometheus_config" {
147+
description = "Controls default values such as scrape interval, timeouts and ports globally"
148+
type = object({
149+
global_scrape_interval = string
150+
global_scrape_timeout = string
151+
})
152+
153+
default = {
154+
global_scrape_interval = "60s"
155+
global_scrape_timeout = "15s"
156+
}
157+
nullable = false
158+
}

modules/workloads/java/main.tf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ data "aws_partition" "current" {}
99
module "helm_addon" {
1010
source = "github.com/aws-ia/terraform-aws-eks-blueprints//modules/kubernetes-addons/helm-addon?ref=v4.8.1"
1111

12-
1312
helm_config = merge(
1413
{
1514
name = local.name

0 commit comments

Comments
 (0)