|
1 | 1 | #!/bin/bash
|
2 | 2 |
|
3 |
| -set -e -u -o pipefail -x |
| 3 | +set -e -u -o pipefail -vx |
4 | 4 |
|
5 | 5 | source scripts/config
|
6 | 6 |
|
7 | 7 | if ! grep -q "${kind_cluster_name}" <<< $(kind get clusters); then
|
8 | 8 | kind create cluster --name "${kind_cluster_name}"
|
9 | 9 | fi
|
10 | 10 |
|
11 |
| -mkdir -p .cache |
12 |
| -if [ ! -f .cache/kube-prometheus.yaml ]; then |
13 |
| - kustomize build https://github.com/prometheus-operator/kube-prometheus -o .cache/kube-prometheus.yaml |
| 11 | +# Kustomize lacks support for caching remote bases |
| 12 | +# per https://github.com/kubernetes-sigs/kustomize/issues/1431 |
| 13 | +mkdir -p .cache/kube-prometheus |
| 14 | +if [ ! -f .cache/kube-prometheus/kube-prometheus.yaml ]; then |
| 15 | + kustomize build https://github.com/prometheus-operator/kube-prometheus -o .cache/kube-prometheus/kube-prometheus.yaml |
14 | 16 | fi
|
| 17 | +if [ -f .cache/kube-prometheus/kustomization.yaml ]; then |
| 18 | + rm .cache/kube-prometheus/kustomization.yaml |
| 19 | +fi |
| 20 | +(cd .cache/kube-prometheus && kustomize create && kustomize edit add resource kube-prometheus.yaml) |
15 | 21 |
|
16 |
| -# because kubectl apply still doesn't know how to wait for CRDs |
17 |
| -# before applying the rest... |
18 |
| -yq '.|select(.kind == "CustomResourceDefinition")' .cache/kube-prometheus.yaml | "${kubectl[@]}" apply --server-side -f - |
19 |
| - |
20 |
| -"${kubectl[@]}" wait --for=condition=established --timeout=60s crd --all |
21 |
| - |
22 |
| -yq ' |
23 |
| - . |
24 |
| - |select( |
25 |
| - (.kind != "CustomResourceDefinition") |
26 |
| - and (.kind != "AlertManager") |
27 |
| - )' .cache/kube-prometheus.yaml |\ |
28 |
| -"${kubectl[@]}" apply --server-side --force-conflicts -f - |
29 |
| - |
30 |
| -# Scale down to 1 replica and don't deploy alertmanager, add resources, |
31 |
| -# enable some feature flags, and set a low GOMEMLIMIT |
32 |
| -"${kubectl[@]}" patch -n monitoring prometheus/k8s \ |
33 |
| - --type merge \ |
34 |
| - --patch-file /dev/stdin <<'__END__' |
35 |
| -apiVersion: monitoring.coreos.com/v1 |
36 |
| -kind: Prometheus |
37 |
| -spec: |
38 |
| - alerting: |
39 |
| - alertmanagers: [] |
40 |
| - replicas: 1 |
41 |
| - # args to pass to the prometheus container, see kubectl explain prometheus.spec.additionalArgs |
42 |
| - additionalArgs: |
43 |
| - - name: web.enable-admin-api |
44 |
| - - name: web.enable-remote-write-receiver |
45 |
| - # https://prometheus.io/docs/prometheus/latest/feature_flags/#memory-snapshot-on-shutdown |
46 |
| - # pointless when we aren't using a PV, but should move to it |
47 |
| - # for BA anyway |
48 |
| - - name: enable-feature |
49 |
| - value: memory-snapshot-on-shutdown |
50 |
| - # https://prometheus.io/docs/prometheus/latest/feature_flags/#extra-scrape-metrics |
51 |
| - - name: enable-feature |
52 |
| - value: extra-scrape-metrics |
53 |
| - # https://prometheus.io/docs/prometheus/latest/feature_flags/#per-step-stats |
54 |
| - - name: enable-feature |
55 |
| - value: promql-per-step-stats |
56 |
| - # https://prometheus.io/docs/prometheus/latest/feature_flags/#auto-gomemlimit |
57 |
| - - name: enable-feature |
58 |
| - value: auto-gomemlimit |
59 |
| - - name: auto-gomemlimit.ratio |
60 |
| - # an unusually low GOMEMLIMIT is set here deliberately. We're using |
61 |
| - # Prometheus to monitor Prometheus and don't really want to have to have 2 |
62 |
| - # separate instances, so one can be allowed to OOM. Plus we get better data |
63 |
| - # when we can still scrape it even if it's over-limits. So we're going to |
64 |
| - # set a really aggressive GOMEMLIMIT that makes it GC hard when it exceeds |
65 |
| - # 25% of its container limit, giving it tons of headroom to balloon into |
66 |
| - # without OOMing. |
67 |
| - value: "0.25" |
68 |
| - # https://prometheus.io/docs/prometheus/latest/feature_flags/#auto-gomaxprocs |
69 |
| - - name: enable-feature |
70 |
| - value: auto-gomaxprocs |
71 |
| - # https://prometheus.io/docs/prometheus/latest/feature_flags/#created-timestamps-zero-injection |
72 |
| - - name: enable-feature |
73 |
| - value: created-timestamp-zero-ingestion |
74 |
| - # this is strategically merged by the operator with the default spec, see kubectl explain prometheus.spec.containers |
75 |
| - containers: |
76 |
| - - name: config-reloader |
77 |
| - securityContext: |
78 |
| - runAsNonRoot: true |
79 |
| - runAsUser: 1000 |
80 |
| - allowPrivilegeEscalation: false |
81 |
| - privileged: false |
82 |
| - readOnlyRootFilesystem: true |
83 |
| - capabilities: |
84 |
| - drop: |
85 |
| - - ALL |
86 |
| - - name: prometheus |
87 |
| - securityContext: |
88 |
| - runAsNonRoot: true |
89 |
| - runAsUser: 1000 |
90 |
| - allowPrivilegeEscalation: false |
91 |
| - privileged: false |
92 |
| - readOnlyRootFilesystem: true |
93 |
| - capabilities: |
94 |
| - drop: |
95 |
| - - ALL |
96 |
| - resources: |
97 |
| - limits: |
98 |
| - cpu: 1000m |
99 |
| - # See the GOMEMLIMIT |
100 |
| - memory: 2000Mi |
101 |
| - requests: |
102 |
| - cpu: 500m |
103 |
| - memory: 2000Mi |
104 |
| -__END__ |
105 |
| - |
106 |
| - |
107 |
| -"${kubectl[@]}" wait --for=condition=available --timeout=300s deployment --all -n monitoring |
108 |
| -"${kubectl[@]}" wait --for=condition=ready --timeout=300s pod --all -n monitoring |
| 22 | +kustomize build kubernetes/kube-prometheus | kapp deploy -a kube-prometheus -f - -y |
109 | 23 |
|
110 | 24 | # vim: et ts=2 sw=2 sts=2 ft=bash ai
|
0 commit comments