diff --git a/charts/Makefile b/charts/Makefile index 39844abbf..56ca7f82b 100644 --- a/charts/Makefile +++ b/charts/Makefile @@ -4,49 +4,62 @@ REPO_BASE_DIR := $(shell git rev-parse --show-toplevel) include ${REPO_BASE_DIR}/scripts/common.Makefile include $(REPO_CONFIG_LOCATION) +# +# Vars +# + CONFIG_DIR := $(shell dirname $(REPO_CONFIG_LOCATION)) CHART_DIRS := $(wildcard $(REPO_BASE_DIR)/charts/*/) +HELMFILE_EXTRA_ARGS ?= + +HELMFILE := helmfile $(HELMFILE_EXTRA_ARGS) + +# +# Help Targets +# + .PHONY: .check-helmfile-installed .check-helmfile-installed: ## Checks if helmfile is installed @if ! command -v helmfile >/dev/null 2>&1; then \ echo "'helmfile' is not installed. Install it to continue ...";\ fi +# +# Artifacts +# + helmfile.yaml: simcore-charts/helmfile.yaml ## Copies the helmfile.yaml to the charts directory cp $(CONFIG_DIR)/$@ $(REPO_BASE_DIR)/charts/helmfile.yaml simcore-charts/helmfile.yaml: ## Copies the simcore helmfile to the charts directory cp $(CONFIG_DIR)/helmfile.simcore.yaml $(REPO_BASE_DIR)/charts/$@ +# +# Targets +# + .PHONY: helmfile-lint helmfile-lint: .check-helmfile-installed helmfile.yaml ## Lints the helmfile set -a; source $(REPO_CONFIG_LOCATION); set +a; \ - helmfile lint + $(HELMFILE) lint .PHONY: helmfile-apply helmfile-apply: .check-helmfile-installed helmfile.yaml ## Applies the helmfile configuration set -a; source $(REPO_CONFIG_LOCATION); set +a; \ - helmfile -f $(REPO_BASE_DIR)/charts/helmfile.yaml apply + $(HELMFILE) -f $(REPO_BASE_DIR)/charts/helmfile.yaml apply .PHONY: helmfile-sync helmfile-sync: .check-helmfile-installed helmfile.yaml ## Syncs the helmfile configuration (use `helmfile-apply` to deploy the app) set -a; source $(REPO_CONFIG_LOCATION); set +a; \ - helmfile -f $(REPO_BASE_DIR)/charts/helmfile.yaml sync + $(HELMFILE) -f $(REPO_BASE_DIR)/charts/helmfile.yaml sync .PHONY: helmfile-diff helmfile-diff: .check-helmfile-installed helmfile.yaml ## Shows the differences that would be applied by helmfile @set -a; source $(REPO_CONFIG_LOCATION); set +a; \ - helmfile -f $(REPO_BASE_DIR)/charts/helmfile.yaml diff + $(HELMFILE) -f $(REPO_BASE_DIR)/charts/helmfile.yaml diff .PHONY: helmfile-delete helmfile-delete: .check-helmfile-installed helmfile.yaml ## Deletes the helmfile configuration @set -a; source $(REPO_CONFIG_LOCATION); set +a; \ - helmfile -f $(REPO_BASE_DIR)/charts/helmfile.yaml delete - -.PHONY: up -up: helmfile-apply ## Start the stack - -.PHONY: leave -leave: ## Leaves kind cluster - kind delete clusters kind + $(HELMFILE) -f $(REPO_BASE_DIR)/charts/helmfile.yaml delete diff --git a/charts/cert-manager/templates/networkpolicy.yaml b/charts/cert-manager/templates/networkpolicy.yaml index 7d9c347cd..4025f5f2d 100644 --- a/charts/cert-manager/templates/networkpolicy.yaml +++ b/charts/cert-manager/templates/networkpolicy.yaml @@ -31,7 +31,7 @@ spec: - 172.16.0.0/12 - 192.168.0.0/16 ports: - - 6443 + - {{ .Values.kubeApiServerPort }} # 6. TCP: cert-manager (controller) -> DNS API endpoints (for ACME DNS01) - action: Allow protocol: TCP diff --git a/charts/cert-manager/values.common.yaml.gotmpl b/charts/cert-manager/values.common.yaml.gotmpl index ba7971087..8ad08456c 100644 --- a/charts/cert-manager/values.common.yaml.gotmpl +++ b/charts/cert-manager/values.common.yaml.gotmpl @@ -1,3 +1,5 @@ +kubeApiServerPort: {{ .Values.kubeApiServerPort }} + cert-manager: crds: enabled: true diff --git a/charts/kube-prometheus-stack/README.md b/charts/kube-prometheus-stack/README.md new file mode 100644 index 000000000..ebeda7474 --- /dev/null +++ b/charts/kube-prometheus-stack/README.md @@ -0,0 +1,19 @@ + +## High Availability + +Prometheus Server +* Issue asking how to configure it in `kube-prometheus-stack` https://github.com/prometheus-community/helm-charts/issues/6184 +* Prometheus Operator Documentation https://github.com/prometheus-operator/prometheus-operator/blob/v0.85.0/Documentation/platform/high-availability.md#prometheus + +Promethes Operator +* Not needed. See https://github.com/prometheus-operator/prometheus-operator/issues/2491 + +## FAQ + +How to expose workload metrics +* Use ServiceMonitor, PodMonitor or Running exporters. See https://github.com/prometheus-community/helm-charts/blob/kube-prometheus-stack-77.12.0/charts/kube-prometheus-stack/README.md#prometheusioscrape +* Make sure network policy of prometheus and workload all all necessary ingress and egress + - prometheus shall be able to egress for metrics and workload should allow ingress for metrics + +Pod Monitor vs Service Monitor: +* https://github.com/prometheus-operator/prometheus-operator/issues/3119 diff --git a/charts/kube-prometheus-stack/namespaces.yaml b/charts/kube-prometheus-stack/namespaces.yaml new file mode 100644 index 000000000..c4da2db8d --- /dev/null +++ b/charts/kube-prometheus-stack/namespaces.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring + labels: + pod-security.kubernetes.io/enforce: restricted + +--- + +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring-privileged + labels: + pod-security.kubernetes.io/enforce: privileged diff --git a/charts/kube-prometheus-stack/values.ebs-storage.yaml.gotmpl b/charts/kube-prometheus-stack/values.ebs-storage.yaml.gotmpl new file mode 100644 index 000000000..20ad4d375 --- /dev/null +++ b/charts/kube-prometheus-stack/values.ebs-storage.yaml.gotmpl @@ -0,0 +1,9 @@ +prometheus: + prometheusSpec: + storageSpec: + volumeClaimTemplate: + spec: + resources: + requests: + storage: 100Gi + storageClassName: "{{ .Values.ebsStorageClassName }}" diff --git a/charts/kube-prometheus-stack/values.kind-local-storage.yaml.gotmpl b/charts/kube-prometheus-stack/values.kind-local-storage.yaml.gotmpl new file mode 100644 index 000000000..5c25882e2 --- /dev/null +++ b/charts/kube-prometheus-stack/values.kind-local-storage.yaml.gotmpl @@ -0,0 +1,9 @@ +prometheus: + prometheusSpec: + storageSpec: + volumeClaimTemplate: + spec: + resources: + requests: + storage: 10Gi + storageClassName: "{{ .Values.kindDefaultStorageClassName }}" diff --git a/charts/kube-prometheus-stack/values.topolvm-storage.yaml.gotmpl b/charts/kube-prometheus-stack/values.topolvm-storage.yaml.gotmpl new file mode 100644 index 000000000..893e2f6cd --- /dev/null +++ b/charts/kube-prometheus-stack/values.topolvm-storage.yaml.gotmpl @@ -0,0 +1,9 @@ +prometheus: + prometheusSpec: + storageSpec: + volumeClaimTemplate: + spec: + resources: + requests: + storage: 100Gi + storageClassName: "{{ .Values.topolvmStorageClassName }}" diff --git a/charts/kube-prometheus-stack/values.yaml.gotmpl b/charts/kube-prometheus-stack/values.yaml.gotmpl new file mode 100644 index 000000000..867f18321 --- /dev/null +++ b/charts/kube-prometheus-stack/values.yaml.gotmpl @@ -0,0 +1,166 @@ +alertmanager: + enabled: false + +defaultRules: + create: false + +grafana: + enabled: false + +kubeApiServer: + enabled: false + +# container metrics (cpu / memory) +kubelet: + enabled: true + +kubeControllerManager: + enabled: false + +coreDns: + enabled: false + +kubeEtcd: + enabled: false + +kubeScheduler: + enabled: false + +kubeDns: + enabled: false + +kubeProxy: + enabled: false + +kubeStateMetrics: + enabled: false + +nodeExporter: + enabled: true + +prometheus-node-exporter: + namespaceOverride: "{{ .Release.Namespace }}-privileged" + +thanosRuler: + enabled: false + +prometheusOperator: + enabled: true + + networkPolicy: + enabled: true + flavor: kubernetes + + resources: + limits: + cpu: 1 + memory: 1Gi + requests: + cpu: 0.1 + memory: 256Mi + + nodeSelector: + ops: "true" + + tls: + internalPort: &prometheusOperatorInternalPort 10250 + +prometheus: + enabled: true + + service: + port: &prometheusServicePort 9090 + + networkPolicy: + enabled: true + flavor: kubernetes + + ingress: + - from: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: traefik + podSelector: + matchLabels: + app.kubernetes.io/name: traefik + ports: + - port: *prometheusServicePort + protocol: TCP + egress: + - ports: + # generic port for metrics + - port: 9100 + protocol: TCP + - ports: + # prometheus operator + - port: *prometheusOperatorInternalPort + protocol: TCP + - ports: + # kube api server + - port: {{ .Values.kubeApiServerPort }} + protocol: TCP + + # enable once object storage needed + thanosService: + enabled: false + + ingress: + enabled: true + ingressClassName: "" + annotations: + namespace: "{{ .Release.Namespace }}" + cert-manager.io/cluster-issuer: "cert-issuer" + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.middlewares: traefik-traefik-basic-auth@kubernetescrd # namespace + middleware name + tls: + - secretName: monitoring-tls + hosts: + - {{ requiredEnv "K8S_MONITORING_FQDN" }} + hosts: + - {{ requiredEnv "K8S_MONITORING_FQDN" }} + paths: + - &pathprefix /prometheus + pathType: Prefix + + prometheusSpec: + # Use 2+ for HA + replicas: 1 + + # Done for HA + # Needs to differentiate prometheus instances with the same setup + # https://github.com/prometheus-operator/prometheus-operator/blob/v0.85.0/Documentation/platform/high-availability.md#prometheus + # External Labels do not show up metrics. See https://github.com/prometheus-operator/prometheus-operator/issues/2918#issuecomment-567009499 + replicaExternalLabelName: "prometheus_replica" + + # Done for HA + # Enforce replicas running on different nodes + # Otherwise it does not make sense from HA perspective + podAntiAffinity: "hard" + + routePrefix: *pathprefix + + retention: 90d + + retentionSize: 100GiB + + scrapeInterval: 30s + scrapeTimeout: 10s + + nodeSelector: + ops: "true" + + # https://github.com/prometheus-community/helm-charts/blob/kube-prometheus-stack-77.12.0/charts/kube-prometheus-stack/README.md#prometheusioscrape + podMonitorSelectorNilUsesHelmValues: false + serviceMonitorSelectorNilUsesHelmValues: false + + resources: + requests: + memory: 2Gi + cpu: 1 + limits: + memory: 4Gi + cpu: 2 + + persistentVolumeClaimRetentionPolicy: + whenDeleted: Retain + whenScaled: Retain diff --git a/charts/portainer/templates/networkpolicy.yaml b/charts/portainer/templates/networkpolicy.yaml index 6b21b5102..ce8566439 100644 --- a/charts/portainer/templates/networkpolicy.yaml +++ b/charts/portainer/templates/networkpolicy.yaml @@ -13,7 +13,7 @@ spec: # connect to the Kubernetes API server destination: ports: - - 6443 + - {{ .Values.kubeApiServerPort }} nets: - 10.0.0.0/8 - 172.16.0.0/12 diff --git a/charts/portainer/values.yaml.gotmpl b/charts/portainer/values.yaml.gotmpl index 303d084ee..aa8be5a05 100644 --- a/charts/portainer/values.yaml.gotmpl +++ b/charts/portainer/values.yaml.gotmpl @@ -1,5 +1,7 @@ servicePort: &servicePort 9000 +kubeApiServerPort: {{ .Values.kubeApiServerPort }} + portainer: replicaCount: 1 diff --git a/charts/traefik/values.common.yaml.gotmpl b/charts/traefik/values.common.yaml.gotmpl index a40c8cc77..bb00686ee 100644 --- a/charts/traefik/values.common.yaml.gotmpl +++ b/charts/traefik/values.common.yaml.gotmpl @@ -35,3 +35,10 @@ affinity: # https://github.com/traefik/traefik-helm-chart/blob/v28.2.0/traefik/ app.kubernetes.io/name: '{{`{{ template "traefik.name" . }}`}}' app.kubernetes.io/instance: '{{ .Release.Name }}' topologyKey: kubernetes.io/hostname + +metrics: + prometheus: + service: + enabled: true + serviceMonitor: + enabled: true