Skip to content

Commit 0035605

Browse files
committed
Split Prometheus rules for OpenFaaS CE and Pro
The goal of this commit is to simplify maintaining configurations for Pro and CE for Prometheus. Tested that either file was outputted via helm template with the --show-only flag when toggling openfaasPro=true/false. Tested e2e with CE - showing autoscaling with alert-manager Tested e2e with Pro with Kind and hey, with autoscaling taking place, and RAM/CPU, provider and function metrics showing up. Signed-off-by: Alex Ellis (OpenFaaS Ltd) <[email protected]>
1 parent cc893bc commit 0035605

File tree

4 files changed

+183
-127
lines changed

4 files changed

+183
-127
lines changed

chart/openfaas/templates/gateway-provider-svc.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
---
1+
{{- if .Values.openfaasPro }}
22
apiVersion: v1
33
kind: Service
44
metadata:
@@ -22,3 +22,5 @@ spec:
2222
protocol: TCP
2323
selector:
2424
app: gateway
25+
26+
{{- end }}
Lines changed: 3 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{{- $functionNs := default .Release.Namespace .Values.functionNamespace }}
22

33
{{- if .Values.prometheus.create }}
4+
{{- if eq .Values.openfaasPro false }}
45
---
56
kind: ConfigMap
67
apiVersion: v1
@@ -23,9 +24,6 @@ data:
2324
2425
rule_files:
2526
- 'alert.rules.yml'
26-
{{- if .Values.openfaasPro }}
27-
- 'prometheus-rules.yml'
28-
{{- end }}
2927
3028
alerting:
3129
alertmanagers:
@@ -39,49 +37,14 @@ data:
3937
static_configs:
4038
- targets: ['localhost:9090']
4139
42-
# Capture endpoints in the openfaas namespace with a scrape annotation
43-
# such as the gateway-provider service.
44-
- job_name: 'openfaas-endpoints'
45-
kubernetes_sd_configs:
46-
- role: endpoints
47-
namespaces:
48-
names:
49-
- {{ .Release.Namespace }}
50-
relabel_configs:
51-
- action: labelmap
52-
regex: __meta_kubernetes_service_label_(.+)
53-
- source_labels: [__meta_kubernetes_namespace]
54-
action: replace
55-
target_label: kubernetes_namespace
56-
- source_labels: [__meta_kubernetes_service_name]
57-
action: replace
58-
target_label: kubernetes_name
59-
60-
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
61-
separator: ;
62-
regex: ([^:]+)(?::\d+)?;(\d+)
63-
target_label: __address__
64-
replacement: $1:$2
65-
action: replace
66-
67-
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
68-
action: keep
69-
regex: true
70-
# - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_port]
71-
# action: keep
72-
# regex: true
73-
74-
- job_name: 'kubernetes-pods'
40+
- job_name: 'openfaas-pods'
7541
scrape_interval: 5s
7642
honor_labels: false
7743
kubernetes_sd_configs:
7844
- role: pod
7945
namespaces:
8046
names:
8147
- {{ .Release.Namespace }}
82-
{{- if ne $functionNs (.Release.Namespace | toString) }}
83-
- {{ $functionNs }}
84-
{{- end }}
8548
relabel_configs:
8649
- action: labelmap
8750
regex: __meta_kubernetes_pod_label_(.+)
@@ -99,68 +62,11 @@ data:
9962
regex: ([^:]+)(?::\d+)?;(\d+)
10063
replacement: $1:$2
10164
target_label: __address__
102-
- action: replace
103-
regex: (.+)
104-
source_labels:
105-
- __meta_kubernetes_pod_annotation_prometheus_io_path
106-
target_label: __metrics_path__
107-
108-
{{- if .Values.openfaasPro }}
109-
110-
- job_name: 'kubernetes-resource-metrics'
111-
scrape_interval: 10s
112-
scheme: https
113-
tls_config:
114-
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
115-
insecure_skip_verify: true
116-
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
117-
kubernetes_sd_configs:
118-
- role: node
119-
relabel_configs:
120-
- action: labelmap
121-
regex: __meta_kubernetes_node_label_(.+)
122-
- target_label: __address__
123-
replacement: kubernetes.default.svc:443
124-
- source_labels: [__meta_kubernetes_node_name]
125-
regex: (.+)
126-
target_label: __metrics_path__
127-
replacement: /api/v1/nodes/${1}/proxy/metrics/resource
128-
metric_relabel_configs:
129-
- source_labels: [__name__]
130-
regex: (pod)_(cpu|memory)_(.+)
131-
action: keep
132-
# Exclude container metrics
133-
- source_labels: [__name__]
134-
regex: container_(.+)
135-
action: drop
136-
- action: replace
137-
source_labels:
138-
- namespace
139-
regex: '(.*)'
140-
replacement: '$1'
141-
target_label: kubernetes_namespace
142-
# Output deployment name from Pod
143-
- action: replace
144-
source_labels:
145-
- pod
146-
regex: '^([0-9a-zA-Z-]+)+(-[0-9a-zA-Z]+-[0-9a-zA-Z]+)$'
147-
replacement: '$1'
148-
target_label: deployment_name
149-
# Output fully-qualified function name fn.ns
150-
- source_labels: [deployment_name, kubernetes_namespace]
151-
separator: ";"
152-
regex: '(.*);(.*)'
153-
replacement: '${1}.${2}'
154-
target_label: "function_name"
155-
{{- end }}
15665
15766
alert.rules.yml: |
15867
groups:
15968
- name: openfaas
16069
rules:
161-
- alert: service_down
162-
expr: up == 0
163-
{{- if eq .Values.openfaasPro false }}
16470
- alert: APIHighInvocationRate
16571
expr: sum(rate(gateway_function_invocation_total{code="200"}[10s])) BY (function_name) > 5
16672
for: 5s
@@ -170,35 +76,6 @@ data:
17076
annotations:
17177
description: High invocation total on "{{ "{{" }}$labels.function_name{{ "}}" }}"
17278
summary: High invocation total on "{{ "{{" }}$labels.function_name{{ "}}" }}"
173-
{{- end }}
174-
175-
{{- if .Values.openfaasPro }}
176-
177-
prometheus-rules.yml: |
178-
groups:
179-
- name: load
180-
rules:
181-
- record: job:function_current_load:sum
182-
expr: sum by (function_name) ( rate( gateway_function_invocation_total{}[30s] ) ) and avg by (function_name) (gateway_service_target_load{scaling_type="rps"}) > 1
183-
labels:
184-
scaling_type: rps
185-
186-
- record: job:function_current_load:sum
187-
expr: sum by (function_name) ( max_over_time( gateway_function_invocation_inflight[45s:5s])) and on (function_name) avg by(function_name) (gateway_service_target_load{scaling_type="capacity"}) > bool 1
188-
labels:
189-
scaling_type: capacity
190-
191-
- record: job:function_current_load:sum
192-
expr: sum(irate ( pod_cpu_usage_seconds_total{}[1m])*1000) by (function_name) * on (function_name) avg by (function_name) (gateway_service_target_load{scaling_type="cpu"} > bool 1 )
193-
labels:
194-
scaling_type: cpu
195-
196-
- name: recently_started_1m
197-
interval: 10s
198-
rules:
199-
- record: job:function_current_started:max_sum
200-
expr: max_over_time(sum by (function_name) (rate( gateway_function_invocation_started{}[1m]))[1m:5s]) > 0
201-
202-
{{- end }}
20379
20480
{{- end }}
81+
{{- end }}

chart/openfaas/templates/prometheus-dep.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,11 @@ spec:
8282
name: prometheus-config
8383
subPath: prometheus-rules.yml
8484
{{- end }}
85+
{{- if eq .Values.openfaasPro false }}
8586
- mountPath: /etc/prometheus/alert.rules.yml
8687
name: prometheus-config
8788
subPath: alert.rules.yml
89+
{{- end}}
8890
- mountPath: /prometheus/data
8991
name: prom-data
9092
volumes:
@@ -95,9 +97,11 @@ spec:
9597
- key: prometheus.yml
9698
path: prometheus.yml
9799
mode: 0644
100+
{{- if eq .Values.openfaasPro false }}
98101
- key: alert.rules.yml
99102
path: alert.rules.yml
100103
mode: 0644
104+
{{- end }}
101105
{{- if .Values.openfaasPro }}
102106
- key: prometheus-rules.yml
103107
path: prometheus-rules.yml
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
{{- $functionNs := default .Release.Namespace .Values.functionNamespace }}
2+
3+
# License: OpenFaaS Pro EULA
4+
# Any use, modification or coping without an OpenFaaS Pro license is prohibited
5+
# All rights reserved OpenFaaS Ltd 2023
6+
7+
{{- if .Values.prometheus.create }}
8+
{{- if .Values.openfaasPro }}
9+
---
10+
kind: ConfigMap
11+
apiVersion: v1
12+
metadata:
13+
labels:
14+
app: {{ template "openfaas.name" . }}
15+
chart: {{ .Chart.Name }}-{{ .Chart.Version }}
16+
component: prometheus-config
17+
heritage: {{ .Release.Service }}
18+
release: {{ .Release.Name }}
19+
name: prometheus-config
20+
namespace: {{ .Release.Namespace | quote }}
21+
data:
22+
prometheus.yml: |
23+
global:
24+
scrape_interval: 15s
25+
evaluation_interval: 15s
26+
external_labels:
27+
monitor: 'faas-monitor'
28+
29+
rule_files:
30+
- 'prometheus-rules.yml'
31+
32+
scrape_configs:
33+
- job_name: 'prometheus'
34+
scrape_interval: 10s
35+
static_configs:
36+
- targets: ['localhost:9090']
37+
38+
# Capture endpoints in the openfaas namespace with a scrape annotation
39+
# such as the gateway-provider service.
40+
- job_name: 'openfaas-endpoints'
41+
kubernetes_sd_configs:
42+
- role: endpoints
43+
namespaces:
44+
names:
45+
- {{ .Release.Namespace }}
46+
relabel_configs:
47+
- action: labelmap
48+
regex: __meta_kubernetes_service_label_(.+)
49+
- source_labels: [__meta_kubernetes_namespace]
50+
action: replace
51+
target_label: kubernetes_namespace
52+
- source_labels: [__meta_kubernetes_service_name]
53+
action: replace
54+
target_label: kubernetes_name
55+
56+
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
57+
separator: ;
58+
regex: ([^:]+)(?::\d+)?;(\d+)
59+
target_label: __address__
60+
replacement: $1:$2
61+
action: replace
62+
63+
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
64+
action: keep
65+
regex: true
66+
67+
- job_name: 'kubernetes-pods'
68+
scrape_interval: 5s
69+
honor_labels: false
70+
kubernetes_sd_configs:
71+
- role: pod
72+
namespaces:
73+
names:
74+
- {{ .Release.Namespace }}
75+
{{- if ne $functionNs (.Release.Namespace | toString) }}
76+
- {{ $functionNs }}
77+
{{- end }}
78+
relabel_configs:
79+
- action: labelmap
80+
regex: __meta_kubernetes_pod_label_(.+)
81+
- source_labels: [__meta_kubernetes_namespace]
82+
action: replace
83+
target_label: kubernetes_namespace
84+
- source_labels: [__meta_kubernetes_pod_name]
85+
action: replace
86+
target_label: kubernetes_pod_name
87+
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
88+
action: keep
89+
regex: true
90+
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
91+
action: replace
92+
regex: ([^:]+)(?::\d+)?;(\d+)
93+
replacement: $1:$2
94+
target_label: __address__
95+
- action: replace
96+
regex: (.+)
97+
source_labels:
98+
- __meta_kubernetes_pod_annotation_prometheus_io_path
99+
target_label: __metrics_path__
100+
101+
- job_name: 'kubernetes-resource-metrics'
102+
scrape_interval: 10s
103+
scheme: https
104+
tls_config:
105+
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
106+
insecure_skip_verify: true
107+
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
108+
kubernetes_sd_configs:
109+
- role: node
110+
relabel_configs:
111+
- action: labelmap
112+
regex: __meta_kubernetes_node_label_(.+)
113+
- target_label: __address__
114+
replacement: kubernetes.default.svc:443
115+
- source_labels: [__meta_kubernetes_node_name]
116+
regex: (.+)
117+
target_label: __metrics_path__
118+
replacement: /api/v1/nodes/${1}/proxy/metrics/resource
119+
metric_relabel_configs:
120+
- source_labels: [__name__]
121+
regex: (pod)_(cpu|memory)_(.+)
122+
action: keep
123+
# Exclude container metrics
124+
- source_labels: [__name__]
125+
regex: container_(.+)
126+
action: drop
127+
- action: replace
128+
source_labels:
129+
- namespace
130+
regex: '(.*)'
131+
replacement: '$1'
132+
target_label: kubernetes_namespace
133+
# Output deployment name from Pod
134+
- action: replace
135+
source_labels:
136+
- pod
137+
regex: '^([0-9a-zA-Z-]+)+(-[0-9a-zA-Z]+-[0-9a-zA-Z]+)$'
138+
replacement: '$1'
139+
target_label: deployment_name
140+
# Output fully-qualified function name fn.ns
141+
- source_labels: [deployment_name, kubernetes_namespace]
142+
separator: ";"
143+
regex: '(.*);(.*)'
144+
replacement: '${1}.${2}'
145+
target_label: "function_name"
146+
147+
prometheus-rules.yml: |
148+
groups:
149+
- name: load
150+
rules:
151+
- record: job:function_current_load:sum
152+
expr: sum by (function_name) ( rate( gateway_function_invocation_total{}[30s] ) ) and avg by (function_name) (gateway_service_target_load{scaling_type="rps"}) > 1
153+
labels:
154+
scaling_type: rps
155+
156+
- record: job:function_current_load:sum
157+
expr: sum by (function_name) ( max_over_time( gateway_function_invocation_inflight[45s:5s])) and on (function_name) avg by(function_name) (gateway_service_target_load{scaling_type="capacity"}) > bool 1
158+
labels:
159+
scaling_type: capacity
160+
161+
- record: job:function_current_load:sum
162+
expr: sum(irate ( pod_cpu_usage_seconds_total{}[1m])*1000) by (function_name) * on (function_name) avg by (function_name) (gateway_service_target_load{scaling_type="cpu"} > bool 1 )
163+
labels:
164+
scaling_type: cpu
165+
166+
- name: recently_started_1m
167+
interval: 10s
168+
rules:
169+
- record: job:function_current_started:max_sum
170+
expr: max_over_time(sum by (function_name) (rate( gateway_function_invocation_started{}[1m]))[1m:5s]) > 0
171+
172+
{{- end }}
173+
{{- end }}

0 commit comments

Comments
 (0)