Skip to content

Commit 468ecef

Browse files
authored
Added probe for wc-ingress configured via the blackbox exporter (#2755)
1 parent a64ddf1 commit 468ecef

File tree

12 files changed

+392
-1
lines changed

12 files changed

+392
-1
lines changed

config/common-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,7 @@ externalTrafficPolicy:
609609

610610
## Nginx ingress controller configuration
611611
ingressNginx:
612+
subDomain: ingress-nginx
612613
controller:
613614
allowSnippetAnnotations: false
614615
resources:

config/sc-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,6 +1207,7 @@ prometheusBlackboxExporter:
12071207
fluentd: true
12081208
thanosQuery: true
12091209
thanosReceiver: true
1210+
wc: true
12101211

12111212
welcomingDashboard:
12121213
# If you want to add extra text to the grafana/opensearch "welcoming dashboards"

config/wc-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,9 +309,9 @@ prometheusBlackboxExporter:
309309
gatekeeper: true
310310
falco: true
311311
sc: true
312+
wc: true
312313

313314
ingressNginx:
314-
subDomain: ingress-nginx
315315
controller:
316316
## Kubernetes service configuration.
317317
service:

helmfile.d/values/prometheus-blackbox-exporter-sc.yaml.gotmpl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,17 @@ config:
5353
preferred_ip_protocol: "ip4"
5454
tls_config:
5555
insecure_skip_verify: true
56+
http_404:
57+
prober: http
58+
timeout: 5s
59+
http:
60+
# we are not logged in, just checking that it can be reached
61+
valid_status_codes: [404]
62+
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
63+
follow_redirects: true
64+
preferred_ip_protocol: "ip4"
65+
tls_config:
66+
insecure_skip_verify: {{ not .Values.global.verifyTls }}
5667
{{- if or (eq .Values.harbor.database.type "external") (eq .Values.harbor.redis.type "external") }}
5768
tcp_connect:
5869
prober: tcp
@@ -151,6 +162,13 @@ serviceMonitor:
151162
scrapeTimeout: 30s
152163
module: http_401
153164
{{- end }}
165+
{{- if .Values.prometheusBlackboxExporter.targets.wc}}
166+
- name: wc-ingress-probe
167+
url: https://{{ .Values.ingressNginx.subDomain }}.{{ .Values.global.baseDomain }}
168+
interval: 60s
169+
scrapeTimeout: 30s
170+
module: http_404
171+
{{- end }}
154172

155173
{{- with .Values.prometheusBlackboxExporter.hostAliases }}
156174
hostAliases:

helmfile.d/values/prometheus-blackbox-exporter-wc.yaml.gotmpl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,17 @@ config:
5050
preferred_ip_protocol: "ip4"
5151
tls_config:
5252
insecure_skip_verify: true
53+
http_404:
54+
prober: http
55+
timeout: 5s
56+
http:
57+
# we are not logged in, just checking that it can be reached
58+
valid_status_codes: [404]
59+
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
60+
follow_redirects: true
61+
preferred_ip_protocol: "ip4"
62+
tls_config:
63+
insecure_skip_verify: {{ not .Values.global.verifyTls }}
5364
tcp_connect:
5465
prober: tcp
5566
service:
@@ -96,6 +107,14 @@ serviceMonitor:
96107
module: http_2xx
97108
{{- end }}
98109

110+
{{- if .Values.prometheusBlackboxExporter.targets.wc}}
111+
- name: wc-ingress-probe
112+
url: https://{{ .Values.ingressNginx.subDomain }}.{{ .Values.global.baseDomain }}
113+
interval: 60s
114+
scrapeTimeout: 30s
115+
module: http_404
116+
{{- end }}
117+
99118
{{- if .Values.prometheusBlackboxExporter.targets.sc}}
100119
{{- $scDomain := default .Values.global.baseDomain .Values.global.scDomain }}
101120
{{- $scOpsDomain := default .Values.global.opsDomain .Values.global.scOpsDomain }}

migration/template/prepare/00-template.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ source "${ROOT}/scripts/migration/lib.sh"
1919
# - yq_move <common|sc|wc> <source> <destination>
2020
# - yq_remove <common|sc|wc> <target>
2121
# - yq_add <common|sc|wc> <destination> <value>
22+
# - yq_move_to_file <common|sc|wc> <source> <common|sc|wc> <destination>
2223

2324
# Note: 00-template.sh will be skipped by the upgrade command
2425
log_info "no operation: this is a template"

migration/v0.50/README.md

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
# Upgrade to v0.50.x
2+
3+
> [!WARNING]
4+
> Upgrade only supported from v0.49.x.
5+
6+
<!--
7+
Notice to developers on writing migration steps:
8+
9+
- Migration steps:
10+
- are written per minor version and placed in a subdirectory of the migration directory with the name `vX.Y/`,
11+
- are written to be idempotent and usable no matter which patch version you are upgrading from and to,
12+
- are documented in this document to be able to run them manually,
13+
- are divided into prepare and apply steps:
14+
- Prepare steps:
15+
- are placed in the `prepare/` directory,
16+
- may **only** modify the configuration of the environment,
17+
- may **not** modify the state of the environment,
18+
- steps are run in order of their names use two digit prefixes.
19+
- Apply steps:
20+
- are placed in the `apply/` directory,
21+
- may **only** modify the state of the environment,
22+
- may **not** modify the configuration of the environment,
23+
- are run in order of their names use two digit prefixes,
24+
- are run with the argument `execute` on upgrade and should return 1 on failure and 2 on successful internal rollback,
25+
- are rerun with the argument `rollback` on execute failure and should return 1 on failure.
26+
27+
For prepare the init step is given.
28+
For apply the bootstrap and the apply steps are given, it is expected that releases upgraded in custom steps are excluded from the apply step.
29+
30+
Upgrades of components that are dependent on each other should be done within the same snippet to easily manage the upgrade to a working state and to be able to rollback to a working state.
31+
32+
Steps should use the `scripts/migration/lib.sh` which will provide helper functions, see the file for available helper functions.
33+
This script expects the `ROOT` environment variable to be set pointing to the root of the repository.
34+
As with all scripts in this repository `CK8S_CONFIG_PATH` is expected to be set.
35+
-->
36+
37+
## Prerequisites
38+
39+
- [ ] Read through the changelog to check if there are any changes you need to be aware of. Read through the release notes, Platform Administrator notices, Application Developer notices, and Security notice.
40+
- [ ] Notify the users (if any) before the upgrade starts;
41+
- [ ] Check if there are any pending changes to the environment;
42+
- [ ] Check the state of the environment, pods, nodes and backup jobs:
43+
44+
```bash
45+
./bin/ck8s test sc|wc
46+
./bin/ck8s ops kubectl sc|wc get pods -A -o custom-columns=NAMESPACE:metadata.namespace,POD:metadata.name,READY-false:status.containerStatuses[*].ready,REASON:status.containerStatuses[*].state.terminated.reason | grep false | grep -v Completed
47+
./bin/ck8s ops kubectl sc|wc get nodes
48+
./bin/ck8s ops kubectl sc|wc get jobs -A
49+
./bin/ck8s ops helm sc|wc list -A --all
50+
./bin/ck8s ops velero sc|wc get backup
51+
```
52+
53+
- [ ] Silence the notifications for the alerts. e.g you can use [alertmanager silences](https://prometheus.io/docs/alerting/latest/alertmanager/#silences);
54+
55+
## Automatic method
56+
57+
1. Pull the latest changes and switch to the correct branch:
58+
59+
```bash
60+
git pull
61+
git switch -d v0.50.x
62+
```
63+
64+
1. Prepare upgrade - _non-disruptive_
65+
66+
> _Done before maintenance window._
67+
68+
```bash
69+
./bin/ck8s upgrade both v0.50 prepare
70+
71+
# check if the netpol IPs need to be updated
72+
./bin/ck8s update-ips both dry-run
73+
# if you agree with the changes apply
74+
./bin/ck8s update-ips both apply
75+
```
76+
77+
> **Note:**
78+
> It is possible to upgrade `wc` and `sc` clusters separately by replacing `both` when running the `upgrade` command, e.g. the following will only upgrade the workload cluster:
79+
80+
```bash
81+
./bin/ck8s upgrade wc v0.50 prepare
82+
./bin/ck8s upgrade wc v0.50 apply
83+
```
84+
85+
1. Apply upgrade - _disruptive_
86+
87+
> _Done during maintenance window._
88+
89+
```bash
90+
./bin/ck8s upgrade both v0.50 apply
91+
```
92+
93+
## Manual method
94+
95+
### Prepare upgrade - _non-disruptive_
96+
97+
> _Done before maintenance window._
98+
99+
1. Pull the latest changes and switch to the correct branch:
100+
101+
```bash
102+
git pull
103+
git switch -d v0.50.x
104+
```
105+
106+
1. Set whether or not upgrade should be prepared for `both` clusters or for one of `sc` or `wc`:
107+
108+
```bash
109+
export CK8S_CLUSTER=<wc|sc|both>
110+
```
111+
112+
1. Update apps configuration:
113+
114+
This will take a backup into `backups/` before modifying any files.
115+
116+
```bash
117+
./bin/ck8s init ${CK8S_CLUSTER}
118+
# or
119+
./migration/v0.50/prepare/50-init.sh
120+
121+
# check if the netpol IPs need to be updated
122+
./bin/ck8s update-ips ${CK8S_CLUSTER} dry-run
123+
# if you agree with the changes apply
124+
./bin/ck8s update-ips ${CK8S_CLUSTER} apply
125+
```
126+
127+
### Apply upgrade - _disruptive_
128+
129+
> _Done during maintenance window._
130+
131+
1. Set whether or not upgrade should be applied for `both` clusters or for one of `sc` or `wc`:
132+
133+
```bash
134+
export CK8S_CLUSTER=<wc|sc|both>
135+
```
136+
137+
1. Upgrade applications:
138+
139+
```bash
140+
./bin/ck8s apply {sc|wc}
141+
# or
142+
./migration/v0.50/apply/80-apply.sh execute
143+
```
144+
145+
## Postrequisite
146+
147+
- [ ] Check the state of the environment, pods and nodes:
148+
149+
```bash
150+
./bin/ck8s test sc|wc
151+
./bin/ck8s ops kubectl sc|wc get pods -A -o custom-columns=NAMESPACE:metadata.namespace,POD:metadata.name,READY-false:status.containerStatuses[*].ready,REASON:status.containerStatuses[*].state.terminated.reason | grep false | grep -v Completed
152+
./bin/ck8s ops kubectl sc|wc get nodes
153+
./bin/ck8s ops helm sc|wc list -A --all
154+
```
155+
156+
- [ ] Enable the notifications for the alerts;
157+
- [ ] Notify the users (if any) when the upgrade is complete;
158+
159+
> [!NOTE]
160+
> Additionally it is good to check:
161+
>
162+
> - if any alerts generated by the upgrade didn't close;
163+
> - if you can login to Grafana, Opensearch or Harbor;
164+
> - you can see fresh metrics and logs.
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#!/usr/bin/env bash
2+
3+
ROOT="$(readlink -f "$(dirname "${0}")/../../../")"
4+
5+
# shellcheck source=scripts/migration/lib.sh
6+
source "${ROOT}/scripts/migration/lib.sh"
7+
8+
# functions currently available in the library:
9+
# - logging:
10+
# - log_info(_no_newline) <message>
11+
# - log_warn(_no_newline) <message>
12+
# - log_error(_no_newline) <message>
13+
# - log_fatal <message> # this will call "exit 1"
14+
#
15+
# - kubectl
16+
# # Use kubectl with kubeconfig set
17+
# - kubectl_do <sc|wc> <kubectl args...>
18+
# # Perform kubectl delete, will not cause errors if the resource is missing
19+
# - kubectl_delete <sc|wc> <resource> <namespace> <name>
20+
#
21+
# - helm
22+
# # Use helm with kubeconfig set
23+
# - helm_do <sc|wc> <helm args...>
24+
# # Checks if a release is installed
25+
# - helm_installed <sc|wc> <namespace> <release>
26+
# # Uninstalls a release if it is installed
27+
# - helm_uninstall <sc|wc> <namespace> <release>
28+
#
29+
# - helmfile
30+
# # Use helmfile with kubeconfig set
31+
# - helmfile_do <sc|wc> <helmfile args...>
32+
# # For selector args all will be prefixed with "-l"
33+
# # List releases matching the selector
34+
# - helmfile_list <sc|wc> <selectors...>
35+
# # Apply releases matching the selector
36+
# - helmfile_apply <sc|wc> <selectors...>
37+
# # Check for changes on releases matching the selector
38+
# - helmfile_change <sc|wc> <selectors...>
39+
# # Destroy releases matching the selector
40+
# - helmfile_destroy <sc|wc> <selectors...>
41+
# # Replaces the releases matching the selector, performing destroy and apply on each release individually
42+
# - helmfile_replace <sc|wc> <selectors...>
43+
# # Upgrades the releases matching the selector, performing automatic rollback on failure set "CK8S_ROLLBACK=false" to disable
44+
# - helmfile_upgrade <sc|wc> <selectors...>
45+
46+
run() {
47+
case "${1:-}" in
48+
execute)
49+
# Note: 00-template.sh will be skipped by the upgrade command
50+
log_info "no operation: this is a template"
51+
52+
if [[ "${CK8S_CLUSTER}" =~ ^(sc|both)$ ]]; then
53+
log_info "operation on service cluster"
54+
fi
55+
if [[ "${CK8S_CLUSTER}" =~ ^(wc|both)$ ]]; then
56+
log_info "operation on workload cluster"
57+
fi
58+
;;
59+
rollback)
60+
log_warn "rollback not implemented"
61+
62+
# if [[ "${CK8S_CLUSTER}" =~ ^(sc|both)$ ]]; then
63+
# log_info "rollback operation on service cluster"
64+
# fi
65+
# if [[ "${CK8S_CLUSTER}" =~ ^(wc|both)$ ]]; then
66+
# log_info "rollback operation on workload cluster"
67+
# fi
68+
;;
69+
*)
70+
log_fatal "usage: \"${0}\" <execute|rollback>"
71+
;;
72+
esac
73+
}
74+
75+
run "${@}"

migration/v0.50/apply/80-apply.sh

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env bash
2+
3+
ROOT="$(readlink -f "$(dirname "${0}")/../../../")"
4+
5+
# shellcheck source=scripts/migration/lib.sh
6+
source "${ROOT}/scripts/migration/lib.sh"
7+
8+
# Add selector filters if covered by other snippets.
9+
# Example: "app!=something"
10+
declare -a skipped
11+
skipped=(
12+
)
13+
declare -a skipped_sc
14+
skipped_sc=(
15+
)
16+
declare -a skipped_wc
17+
skipped_wc=(
18+
)
19+
20+
run() {
21+
case "${1:-}" in
22+
execute)
23+
local -a filters
24+
local selector
25+
26+
if [[ "${CK8S_CLUSTER}" =~ ^(sc|both)$ ]]; then
27+
filters=("${skipped[@]}" "${skipped_sc[@]}")
28+
selector="${filters[*]:-"app!=null"}"
29+
helmfile_upgrade sc "${selector// /,}"
30+
fi
31+
32+
if [[ "${CK8S_CLUSTER}" =~ ^(wc|both)$ ]]; then
33+
filters=("${skipped[@]}" "${skipped_wc[@]}")
34+
selector="${filters[*]:-"app!=null"}"
35+
helmfile_upgrade wc "${selector// /,}"
36+
fi
37+
;;
38+
39+
rollback)
40+
log_warn "rollback not implemented"
41+
42+
# if [[ "${CK8S_CLUSTER}" =~ ^(sc|both)$ ]]; then
43+
# log_info "rollback operation on service cluster"
44+
# fi
45+
# if [[ "${CK8S_CLUSTER}" =~ ^(wc|both)$ ]]; then
46+
# log_info "rollback operation on workload cluster"
47+
# fi
48+
;;
49+
50+
*)
51+
log_fatal "usage: \"${0}\" <execute|rollback>"
52+
;;
53+
esac
54+
}
55+
56+
run "${@}"

0 commit comments

Comments
 (0)