Skip to content

Commit 739b581

Browse files
authored
K8s operator metrics (#185)
* Add kube admin metrics * Update chart version * Variable verbosity level for adot collector
1 parent a7633d1 commit 739b581

File tree

6 files changed

+36
-223
lines changed

6 files changed

+36
-223
lines changed

modules/eks-monitoring/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ See examples using this Terraform modules in the **Amazon EKS** section of [this
6565

6666
| Name | Description | Type | Default | Required |
6767
|------|-------------|------|---------|:--------:|
68+
| <a name="input_adot_loglevel"></a> [adot\_loglevel](#input\_adot\_loglevel) | Verbosity level for ADOT collector logs | `string` | `"warn"` | no |
6869
| <a name="input_custom_metrics_config"></a> [custom\_metrics\_config](#input\_custom\_metrics\_config) | Configuration object to enable custom metrics collection | <pre>object({<br> ports = list(number)<br> # paths = optional(list(string), ["/metrics"])<br> # list of samples to be dropped by label prefix, ex: go_ -> discards go_.*<br> dropped_series_prefixes = list(string)<br> })</pre> | <pre>{<br> "dropped_series_prefixes": [<br> "unspecified"<br> ],<br> "ports": []<br>}</pre> | no |
6970
| <a name="input_eks_cluster_id"></a> [eks\_cluster\_id](#input\_eks\_cluster\_id) | EKS Cluster Id | `string` | n/a | yes |
7071
| <a name="input_enable_alerting_rules"></a> [enable\_alerting\_rules](#input\_enable\_alerting\_rules) | Enables or disables Managed Prometheus alerting rules | `bool` | `true` | no |

modules/eks-monitoring/main.tf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ module "helm_addon" {
105105
name = "globalScrapeTimeout"
106106
value = var.prometheus_config.global_scrape_timeout
107107
},
108+
{
109+
name = "adotLoglevel"
110+
value = var.adot_loglevel
111+
},
108112
{
109113
name = "accountId"
110114
value = local.context.aws_caller_identity_account_id

modules/eks-monitoring/otel-config/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ apiVersion: v2
22
name: opentelemetry
33
description: A Helm chart to install otel operator
44
type: application
5-
version: 0.5.0
6-
appVersion: 0.5.0
5+
version: 0.6.0
6+
appVersion: 0.6.0

modules/eks-monitoring/otel-config/templates/opentelemetrycollector.yaml

Lines changed: 21 additions & 221 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,26 @@ spec:
8383
regex: $K8S_NODE_NAME
8484
source_labels: [__meta_kubernetes_node_name]
8585
{{ end }}
86+
- job_name: 'kube-admin'
87+
scheme: https
88+
tls_config:
89+
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
90+
insecure_skip_verify: true
91+
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
92+
kubernetes_sd_configs:
93+
- role: node
94+
relabel_configs:
95+
- target_label: __address__
96+
replacement: kubernetes.default.svc.cluster.local:443
97+
{{ if .Values.enableTracing }}
98+
- action: keep
99+
regex: $K8S_NODE_NAME
100+
source_labels: [__meta_kubernetes_node_name]
101+
{{ end }}
102+
metric_relabel_configs:
103+
- action: keep
104+
source_labels: [__name__]
105+
regex: 'apiserver_(request_duration_seconds|storage_list_duration_seconds|admission_controller_admission_duration_seconds|flowcontrol_request_wait_duration_seconds).*|apiserver_(admission_webhook_fail_open_count|tls_handshake_errors_total|request_total)|rest_client_request_duration_seconds.*|rest_client_requests_total|etcd_(request_duration_seconds|db_total_size_in_bytes).*'
86106
- job_name: serviceMonitor/default/kube-prometheus-stack-prometheus-node-exporter/0
87107
honor_timestamps: true
88108
scrape_interval: {{ .Values.globalScrapeInterval }}
@@ -1100,117 +1120,6 @@ spec:
11001120
own_namespace: false
11011121
names:
11021122
- kube-system
1103-
- job_name: serviceMonitor/default/kube-prometheus-stack-kube-etcd/0
1104-
honor_timestamps: true
1105-
scrape_interval: {{ .Values.globalScrapeInterval }}
1106-
scrape_timeout: {{ .Values.globalScrapeTimeout }}
1107-
scheme: http
1108-
authorization:
1109-
type: Bearer
1110-
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
1111-
follow_redirects: true
1112-
enable_http2: true
1113-
relabel_configs:
1114-
- source_labels: [job]
1115-
separator: ;
1116-
regex: (.*)
1117-
target_label: __tmp_prometheus_job_name
1118-
replacement: $$1
1119-
action: replace
1120-
- source_labels: [__meta_kubernetes_service_label_app, __meta_kubernetes_service_labelpresent_app]
1121-
separator: ;
1122-
regex: (kube-prometheus-stack-kube-etcd);true
1123-
replacement: $$1
1124-
action: keep
1125-
- source_labels: [__meta_kubernetes_service_label_release, __meta_kubernetes_service_labelpresent_release]
1126-
separator: ;
1127-
regex: (kube-prometheus-stack);true
1128-
replacement: $$1
1129-
action: keep
1130-
- source_labels: [__meta_kubernetes_endpoint_port_name]
1131-
separator: ;
1132-
regex: http-metrics
1133-
replacement: $$1
1134-
action: keep
1135-
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
1136-
separator: ;
1137-
regex: Node;(.*)
1138-
target_label: node
1139-
replacement: $$1
1140-
action: replace
1141-
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
1142-
separator: ;
1143-
regex: Pod;(.*)
1144-
target_label: pod
1145-
replacement: $$1
1146-
action: replace
1147-
- source_labels: [__meta_kubernetes_namespace]
1148-
separator: ;
1149-
regex: (.*)
1150-
target_label: namespace
1151-
replacement: $$1
1152-
action: replace
1153-
- source_labels: [__meta_kubernetes_service_name]
1154-
separator: ;
1155-
regex: (.*)
1156-
target_label: service
1157-
replacement: $$1
1158-
action: replace
1159-
- source_labels: [__meta_kubernetes_pod_name]
1160-
separator: ;
1161-
regex: (.*)
1162-
target_label: pod
1163-
replacement: $$1
1164-
action: replace
1165-
- source_labels: [__meta_kubernetes_pod_container_name]
1166-
separator: ;
1167-
regex: (.*)
1168-
target_label: container
1169-
replacement: $$1
1170-
action: replace
1171-
- source_labels: [__meta_kubernetes_service_name]
1172-
separator: ;
1173-
regex: (.*)
1174-
target_label: job
1175-
replacement: $$1
1176-
action: replace
1177-
- source_labels: [__meta_kubernetes_service_label_jobLabel]
1178-
separator: ;
1179-
regex: (.+)
1180-
target_label: job
1181-
replacement: $$1
1182-
action: replace
1183-
- separator: ;
1184-
regex: (.*)
1185-
target_label: endpoint
1186-
replacement: http-metrics
1187-
action: replace
1188-
- source_labels: [__address__]
1189-
separator: ;
1190-
regex: (.*)
1191-
modulus: 1
1192-
target_label: __tmp_hash
1193-
replacement: $$1
1194-
action: hashmod
1195-
- source_labels: [__tmp_hash]
1196-
separator: ;
1197-
regex: "0"
1198-
replacement: $$1
1199-
action: keep
1200-
{{ if .Values.enableTracing }}
1201-
- action: keep
1202-
regex: $K8S_NODE_NAME
1203-
source_labels: [__meta_kubernetes_endpoint_node_name]
1204-
{{ end }}
1205-
kubernetes_sd_configs:
1206-
- role: endpoints
1207-
kubeconfig_file: ""
1208-
follow_redirects: true
1209-
enable_http2: true
1210-
namespaces:
1211-
own_namespace: false
1212-
names:
1213-
- kube-system
12141123
- job_name: serviceMonitor/default/kube-prometheus-stack-kube-controller-manager/0
12151124
honor_timestamps: true
12161125
scrape_interval: {{ .Values.globalScrapeInterval }}
@@ -1431,115 +1340,6 @@ spec:
14311340
own_namespace: false
14321341
names:
14331342
- kube-system
1434-
- job_name: serviceMonitor/default/kube-prometheus-stack-apiserver/0
1435-
honor_timestamps: true
1436-
scrape_interval: {{ .Values.globalScrapeInterval }}
1437-
scrape_timeout: {{ .Values.globalScrapeTimeout }}
1438-
scheme: https
1439-
authorization:
1440-
type: Bearer
1441-
credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
1442-
tls_config:
1443-
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
1444-
server_name: kubernetes
1445-
follow_redirects: true
1446-
enable_http2: true
1447-
relabel_configs:
1448-
- source_labels: [job]
1449-
separator: ;
1450-
regex: (.*)
1451-
target_label: __tmp_prometheus_job_name
1452-
replacement: $$1
1453-
action: replace
1454-
- source_labels: [__meta_kubernetes_service_label_component, __meta_kubernetes_service_labelpresent_component]
1455-
separator: ;
1456-
regex: (kubernetes);true
1457-
replacement: $$1
1458-
action: keep
1459-
- source_labels: [__meta_kubernetes_endpoint_port_name]
1460-
separator: ;
1461-
regex: https
1462-
replacement: $$1
1463-
action: keep
1464-
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
1465-
separator: ;
1466-
regex: Node;(.*)
1467-
target_label: node
1468-
replacement: $$1
1469-
action: replace
1470-
- source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
1471-
separator: ;
1472-
regex: Pod;(.*)
1473-
target_label: pod
1474-
replacement: $$1
1475-
action: replace
1476-
- source_labels: [__meta_kubernetes_namespace]
1477-
separator: ;
1478-
regex: (.*)
1479-
target_label: namespace
1480-
replacement: $$1
1481-
action: replace
1482-
- source_labels: [__meta_kubernetes_service_name]
1483-
separator: ;
1484-
regex: (.*)
1485-
target_label: service
1486-
replacement: $$1
1487-
action: replace
1488-
- source_labels: [__meta_kubernetes_pod_name]
1489-
separator: ;
1490-
regex: (.*)
1491-
target_label: pod
1492-
replacement: $$1
1493-
action: replace
1494-
- source_labels: [__meta_kubernetes_pod_container_name]
1495-
separator: ;
1496-
regex: (.*)
1497-
target_label: container
1498-
replacement: $$1
1499-
action: replace
1500-
- source_labels: [__meta_kubernetes_service_name]
1501-
separator: ;
1502-
regex: (.*)
1503-
target_label: job
1504-
replacement: $$1
1505-
action: replace
1506-
- source_labels: [__meta_kubernetes_service_label_component]
1507-
separator: ;
1508-
regex: (.+)
1509-
target_label: job
1510-
replacement: $$1
1511-
action: replace
1512-
- separator: ;
1513-
regex: (.*)
1514-
target_label: endpoint
1515-
replacement: https
1516-
action: replace
1517-
- source_labels: [__address__]
1518-
separator: ;
1519-
regex: (.*)
1520-
modulus: 1
1521-
target_label: __tmp_hash
1522-
replacement: $$1
1523-
action: hashmod
1524-
- source_labels: [__tmp_hash]
1525-
separator: ;
1526-
regex: "0"
1527-
replacement: $$1
1528-
action: keep
1529-
{{ if .Values.enableTracing }}
1530-
- action: keep
1531-
regex: $K8S_NODE_NAME
1532-
source_labels: [__meta_kubernetes_endpoint_node_name]
1533-
{{ end }}
1534-
kubernetes_sd_configs:
1535-
- role: endpoints
1536-
kubeconfig_file: ""
1537-
follow_redirects: true
1538-
enable_http2: true
1539-
namespaces:
1540-
own_namespace: false
1541-
names:
1542-
- default
15431343
- job_name: serviceMonitor/default/kube-prometheus-stack-alertmanager/0
15441344
honor_timestamps: true
15451345
scrape_interval: {{ .Values.globalScrapeInterval }}
@@ -1776,7 +1576,7 @@ spec:
17761576
resource_to_telemetry_conversion:
17771577
enabled: true
17781578
logging:
1779-
loglevel: warn
1579+
loglevel: {{ .Values.adotLoglevel }}
17801580
extensions:
17811581
sigv4auth:
17821582
region: {{ .Values.region }}

modules/eks-monitoring/otel-config/values.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,5 @@ javaPrometheusMetricsEndpoint: ${java_prometheus_metrics_endpoint}
2323
enableNginx: ${enable_nginx}
2424
nginxScrapeSampleLimit: ${nginx_scrape_sample_limit}
2525
nginxPrometheusMetricsEndpoint: ${nginx_prometheus_metrics_endpoint}
26+
27+
adotLoglevel: ${adot_loglevel}

modules/eks-monitoring/variables.tf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ variable "irsa_iam_permissions_boundary" {
3333
default = null
3434
}
3535

36+
variable "adot_loglevel" {
37+
description = "Verbosity level for ADOT collector logs"
38+
type = string
39+
default = "warn"
40+
}
41+
3642
variable "managed_prometheus_workspace_endpoint" {
3743
description = "Amazon Managed Prometheus Workspace Endpoint"
3844
type = string

0 commit comments

Comments
 (0)