diff --git a/addons/loki/scripts/check-index-gateway-ring.sh b/addons/loki/scripts/check-index-gateway-ring.sh new file mode 100755 index 000000000..32c087a94 --- /dev/null +++ b/addons/loki/scripts/check-index-gateway-ring.sh @@ -0,0 +1,37 @@ +#!/bin/sh +# check-index-gateway-ring.sh +# Check if index gateway ring has ACTIVE instances +# This script is used in startupProbe and readinessProbe +# Uses curl from tools volume (copied by initContainer) + +LOCAL_PORT="${SERVER_HTTP_PORT:-3100}" +CURL="/kb-tools/curl" + +# Check if curl is available +if [ ! -x "$CURL" ]; then + echo "curl not found at $CURL" + exit 1 +fi + +# Check if Loki service is ready +if ! "$CURL" -sf "http://localhost:${LOCAL_PORT}/ready" > /dev/null 2>&1; then + echo "Loki service not ready" + exit 1 +fi + +# Check index gateway ring for ACTIVE instances +RING_HTML=$("$CURL" -sf "http://localhost:${LOCAL_PORT}/indexgateway/ring" 2>/dev/null || echo "") +if [ -z "$RING_HTML" ]; then + echo "Cannot access index gateway ring endpoint" + exit 1 +fi + +# Check HTML for ACTIVE status instances +ACTIVE_COUNT=$(echo "$RING_HTML" | grep -o 'ACTIVE' | wc -l || echo "0") +if [ "$ACTIVE_COUNT" -eq "0" ]; then + echo "Index gateway ring is empty, no ACTIVE instances found" + exit 1 +fi + +echo "Index gateway ring is ready with $ACTIVE_COUNT ACTIVE instance(s)" +exit 0 diff --git a/addons/loki/scripts/wait-index-gateway-ring.sh b/addons/loki/scripts/wait-index-gateway-ring.sh new file mode 100755 index 000000000..55e3e3765 --- /dev/null +++ b/addons/loki/scripts/wait-index-gateway-ring.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# wait-index-gateway-ring.sh +# Wait for at least one index gateway instance to be ACTIVE in the ring +# This script is used as an init container for read/write components + +set -euo pipefail + +BACKEND_SVC="${KB_CLUSTER_NAME}-backend" +BACKEND_PORT="${SERVER_HTTP_PORT:-3100}" +MAX_WAIT="${MAX_WAIT:-300}" # 5 minutes default +ELAPSED=0 + +echo "Waiting for index gateway ring to be ready..." +echo "Backend service: ${BACKEND_SVC}.${KB_NAMESPACE}.svc.${CLUSTER_DOMAIN}:${BACKEND_PORT}" +echo "Max wait time: ${MAX_WAIT} seconds" + +while [ $ELAPSED -lt $MAX_WAIT ]; do + # Check if backend service is accessible + if curl -sf "http://${BACKEND_SVC}.${KB_NAMESPACE}.svc.${CLUSTER_DOMAIN}:${BACKEND_PORT}/ready" > /dev/null 2>&1; then + # Check ring for ACTIVE instances (parse HTML) + RING_HTML=$(curl -sf "http://${BACKEND_SVC}.${KB_NAMESPACE}.svc.${CLUSTER_DOMAIN}:${BACKEND_PORT}/indexgateway/ring" 2>/dev/null || echo "") + if [ -n "$RING_HTML" ]; then + ACTIVE_COUNT=$(echo "$RING_HTML" | grep -o 'ACTIVE' | wc -l || echo "0") + if [ "$ACTIVE_COUNT" -gt "0" ]; then + echo "Index gateway ring is ready with $ACTIVE_COUNT ACTIVE instance(s)" + exit 0 + fi + fi + fi + echo "Waiting for index gateway ring... ($ELAPSED/$MAX_WAIT seconds)" + sleep 5 + ELAPSED=$((ELAPSED + 5)) +done + +echo "Timeout waiting for index gateway ring after $MAX_WAIT seconds" +exit 1 diff --git a/addons/loki/templates/_helpers.tpl b/addons/loki/templates/_helpers.tpl index 66917afa7..66b676816 100644 --- a/addons/loki/templates/_helpers.tpl +++ b/addons/loki/templates/_helpers.tpl @@ -100,6 +100,7 @@ Docker image name {{- if .Values.enterprise.enabled -}}{{- include "loki.enterpriseImage" . -}}{{- else -}}{{- include "loki.lokiImage" . -}}{{- end -}} {{- end -}} + {{/* write fullname */}} @@ -275,6 +276,23 @@ Define loki write component definition regular expression name prefix ^loki-write- {{- end -}} +{{/* +Define loki scripts configMap template name +*/}} +{{- define "loki.scriptsTemplate" -}} +loki-scripts-{{ .Chart.Version }} +{{- end -}} + +{{/* +Generate loki scripts configmap +*/}} +{{- define "loki.extend.scripts" -}} +{{- range $path, $_ := $.Files.Glob "scripts/**" }} +{{ $path | base }}: |- +{{- $.Files.Get $path | nindent 2 }} +{{- end }} +{{- end }} + {{/* object storage serviceRef declarations */}} diff --git a/addons/loki/templates/cmpd-read.yaml b/addons/loki/templates/cmpd-read.yaml index de4532863..6024f0f86 100644 --- a/addons/loki/templates/cmpd-read.yaml +++ b/addons/loki/templates/cmpd-read.yaml @@ -37,6 +37,26 @@ spec: topologyKey: kubernetes.io/hostname securityContext: fsGroup: 10001 + initContainers: + - name: wait-index-gateway + imagePullPolicy: {{ .Values.images.curl.pullPolicy }} + command: + - /bin/sh + - -c + - | + # Copy curl to tools volume for use in probes + cp /bin/curl /kb-tools/curl + + # Execute wait script + /kb-scripts/wait-index-gateway-ring.sh + env: + - name: MAX_WAIT + value: "300" + volumeMounts: + - name: scripts + mountPath: /kb-scripts + - name: tools + mountPath: /kb-tools containers: - name: read imagePullPolicy: {{ .Values.images.pullPolicy }} @@ -69,18 +89,26 @@ spec: - containerPort: {{ .Values.server.httpMemberlistPort }} name: http-memberlist protocol: TCP + startupProbe: + exec: + command: + - /kb-scripts/check-index-gateway-ring.sh + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 + successThreshold: 1 + failureThreshold: 60 readinessProbe: - failureThreshold: 3 - httpGet: - path: /ready - port: http-metrics - scheme: HTTP + exec: + command: + - /kb-scripts/check-index-gateway-ring.sh initialDelaySeconds: 15 periodSeconds: 10 + timeoutSeconds: 3 successThreshold: 1 - timeoutSeconds: 1 + failureThreshold: 3 volumeMounts: - - mountPath: /etc/loki/config + - mountPath: /etc/loki/config name: config - mountPath: /etc/loki/runtime-config name: runtime-config @@ -88,6 +116,13 @@ spec: name: tmp - mountPath: /var/loki name: data + - mountPath: /kb-scripts + name: scripts + - mountPath: /kb-tools + name: tools + volumes: + - emptyDir: {} + name: tools configs: - name: loki-config template: loki-tpl @@ -99,6 +134,12 @@ spec: volumeName: runtime-config namespace: {{ .Release.Namespace }} restartOnFileChange: true + scripts: + - name: loki-scripts + template: {{ include "loki.scriptsTemplate" . }} + namespace: {{ .Release.Namespace }} + volumeName: scripts + defaultMode: 0555 vars: - name: SERVER_HTTP_PORT value: {{ .Values.server.httpMetricsPort | quote }} diff --git a/addons/loki/templates/cmpd-write.yaml b/addons/loki/templates/cmpd-write.yaml index a3873e41d..173f9b698 100644 --- a/addons/loki/templates/cmpd-write.yaml +++ b/addons/loki/templates/cmpd-write.yaml @@ -37,6 +37,26 @@ spec: topologyKey: kubernetes.io/hostname securityContext: fsGroup: 10001 + initContainers: + - name: wait-index-gateway + imagePullPolicy: {{ .Values.images.curl.pullPolicy }} + command: + - /bin/sh + - -c + - | + # Copy curl to tools volume for use in probes + cp /bin/curl /kb-tools/curl + + # Execute wait script + /kb-scripts/wait-index-gateway-ring.sh + env: + - name: MAX_WAIT + value: "300" + volumeMounts: + - name: scripts + mountPath: /kb-scripts + - name: tools + mountPath: /kb-tools containers: - name: write imagePullPolicy: {{ .Values.images.pullPolicy }} @@ -66,18 +86,26 @@ spec: - containerPort: {{ .Values.server.httpMemberlistPort }} name: http-memberlist protocol: TCP + startupProbe: + exec: + command: + - /kb-scripts/check-index-gateway-ring.sh + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 + successThreshold: 1 + failureThreshold: 60 readinessProbe: - failureThreshold: 3 - httpGet: - path: /ready - port: http-metrics - scheme: HTTP + exec: + command: + - /kb-scripts/check-index-gateway-ring.sh initialDelaySeconds: 15 periodSeconds: 10 + timeoutSeconds: 3 successThreshold: 1 - timeoutSeconds: 1 + failureThreshold: 3 volumeMounts: - - mountPath: /etc/loki/config + - mountPath: /etc/loki/config name: config - mountPath: /etc/loki/runtime-config name: runtime-config @@ -85,6 +113,13 @@ spec: name: tmp - mountPath: /var/loki name: data + - mountPath: /kb-scripts + name: scripts + - mountPath: /kb-tools + name: tools + volumes: + - emptyDir: {} + name: tools configs: - name: loki-config template: loki-tpl @@ -96,6 +131,12 @@ spec: volumeName: runtime-config namespace: {{ .Release.Namespace }} restartOnFileChange: true + scripts: + - name: loki-scripts + template: {{ include "loki.scriptsTemplate" . }} + namespace: {{ .Release.Namespace }} + volumeName: scripts + defaultMode: 0555 vars: - name: SERVER_HTTP_PORT value: {{ .Values.server.httpMetricsPort | quote }} diff --git a/addons/loki/templates/cmpv.yaml b/addons/loki/templates/cmpv.yaml index b8ff86f48..d869864a8 100644 --- a/addons/loki/templates/cmpv.yaml +++ b/addons/loki/templates/cmpv.yaml @@ -29,10 +29,12 @@ spec: serviceVersion: 1.0.0 images: write: {{ .Values.images.registry | default "docker.io" }}/{{ .Values.images.repository }}:{{ .Values.images.tag }} + wait-index-gateway: {{ .Values.images.registry | default "docker.io" }}/{{ .Values.images.curl.repository }}:{{ .Values.images.curl.tag }} - name: read-1.0.0 serviceVersion: 1.0.0 images: read: {{ .Values.images.registry | default "docker.io" }}/{{ .Values.images.repository }}:{{ .Values.images.tag }} + wait-index-gateway: {{ .Values.images.registry | default "docker.io" }}/{{ .Values.images.curl.repository }}:{{ .Values.images.curl.tag }} - name: backend-1.0.0 serviceVersion: 1.0.0 images: diff --git a/addons/loki/templates/scripts.yaml b/addons/loki/templates/scripts.yaml new file mode 100644 index 000000000..4bcad142e --- /dev/null +++ b/addons/loki/templates/scripts.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "loki.scriptsTemplate" . }} + labels: + {{- include "loki.labels" . | nindent 4 }} + annotations: + {{- include "loki.annotations" . | nindent 4 }} +data: + {{- with include "loki.extend.scripts" . }} + {{- . | nindent 2 }} + {{- end }} diff --git a/addons/loki/values.yaml b/addons/loki/values.yaml index a13ea5698..600e33118 100644 --- a/addons/loki/values.yaml +++ b/addons/loki/values.yaml @@ -11,6 +11,11 @@ images: tag: 1.24-alpine repository: nginxinc/nginx-unprivileged pullPolicy: IfNotPresent + # Curl image for init container + curl: + repository: apecloud/curl-jq + tag: 0.1.0 + pullPolicy: IfNotPresent nameOverride: "" fullnameOverride: ""