diff --git a/addons/loki/scripts/check-index-gateway-ring.sh b/addons/loki/scripts/check-index-gateway-ring.sh new file mode 100755 index 000000000..32c087a94 --- /dev/null +++ b/addons/loki/scripts/check-index-gateway-ring.sh @@ -0,0 +1,37 @@ +#!/bin/sh +# check-index-gateway-ring.sh +# Check if index gateway ring has ACTIVE instances +# This script is used in startupProbe and readinessProbe +# Uses curl from tools volume (copied by initContainer) + +LOCAL_PORT="${SERVER_HTTP_PORT:-3100}" +CURL="/kb-tools/curl" + +# Check if curl is available +if [ ! -x "$CURL" ]; then + echo "curl not found at $CURL" + exit 1 +fi + +# Check if Loki service is ready +if ! "$CURL" -sf "http://localhost:${LOCAL_PORT}/ready" > /dev/null 2>&1; then + echo "Loki service not ready" + exit 1 +fi + +# Check index gateway ring for ACTIVE instances +RING_HTML=$("$CURL" -sf "http://localhost:${LOCAL_PORT}/indexgateway/ring" 2>/dev/null || echo "") +if [ -z "$RING_HTML" ]; then + echo "Cannot access index gateway ring endpoint" + exit 1 +fi + +# Check HTML for ACTIVE status instances +ACTIVE_COUNT=$(echo "$RING_HTML" | grep -o 'ACTIVE' | wc -l || echo "0") +if [ "$ACTIVE_COUNT" -eq "0" ]; then + echo "Index gateway ring is empty, no ACTIVE instances found" + exit 1 +fi + +echo "Index gateway ring is ready with $ACTIVE_COUNT ACTIVE instance(s)" +exit 0 diff --git a/addons/loki/scripts/wait-index-gateway-ring.sh b/addons/loki/scripts/wait-index-gateway-ring.sh new file mode 100755 index 000000000..55e3e3765 --- /dev/null +++ b/addons/loki/scripts/wait-index-gateway-ring.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# wait-index-gateway-ring.sh +# Wait for at least one index gateway instance to be ACTIVE in the ring +# This script is used as an init container for read/write components + +set -euo pipefail + +BACKEND_SVC="${KB_CLUSTER_NAME}-backend" +BACKEND_PORT="${SERVER_HTTP_PORT:-3100}" +MAX_WAIT="${MAX_WAIT:-300}" # 5 minutes default +ELAPSED=0 + +echo "Waiting for index gateway ring to be ready..." +echo "Backend service: ${BACKEND_SVC}.${KB_NAMESPACE}.svc.${CLUSTER_DOMAIN}:${BACKEND_PORT}" +echo "Max wait time: ${MAX_WAIT} seconds" + +while [ $ELAPSED -lt $MAX_WAIT ]; do + # Check if backend service is accessible + if curl -sf "http://${BACKEND_SVC}.${KB_NAMESPACE}.svc.${CLUSTER_DOMAIN}:${BACKEND_PORT}/ready" > /dev/null 2>&1; then + # Check ring for ACTIVE instances (parse HTML) + RING_HTML=$(curl -sf "http://${BACKEND_SVC}.${KB_NAMESPACE}.svc.${CLUSTER_DOMAIN}:${BACKEND_PORT}/indexgateway/ring" 2>/dev/null || echo "") + if [ -n "$RING_HTML" ]; then + ACTIVE_COUNT=$(echo "$RING_HTML" | grep -o 'ACTIVE' | wc -l || echo "0") + if [ "$ACTIVE_COUNT" -gt "0" ]; then + echo "Index gateway ring is ready with $ACTIVE_COUNT ACTIVE instance(s)" + exit 0 + fi + fi + fi + echo "Waiting for index gateway ring... ($ELAPSED/$MAX_WAIT seconds)" + sleep 5 + ELAPSED=$((ELAPSED + 5)) +done + +echo "Timeout waiting for index gateway ring after $MAX_WAIT seconds" +exit 1 diff --git a/addons/loki/templates/_helpers.tpl b/addons/loki/templates/_helpers.tpl index b0fb8adf6..5dbf31e3c 100644 --- a/addons/loki/templates/_helpers.tpl +++ b/addons/loki/templates/_helpers.tpl @@ -108,6 +108,7 @@ Docker image name {{- if .Values.enterprise.enabled -}}{{- include "loki.enterpriseImage" . -}}{{- else -}}{{- include "loki.lokiImage" . -}}{{- end -}} {{- end -}} + {{/* write fullname */}} @@ -226,6 +227,16 @@ query-scheduler fullname {{ include "loki.fullname" . }}-query-scheduler {{- end }} +{{/* +Generate loki scripts configmap +*/}} +{{- define "loki.extend.scripts" -}} +{{- range $path, $_ := $.Files.Glob "scripts/**" }} +{{ $path | base }}: |- +{{- $.Files.Get $path | nindent 2 }} +{{- end }} +{{- end }} + {{/* object storage serviceRef declarations */}} diff --git a/addons/loki/templates/cmpd-read.yaml b/addons/loki/templates/cmpd-read.yaml index 9964b6614..4d151171c 100644 --- a/addons/loki/templates/cmpd-read.yaml +++ b/addons/loki/templates/cmpd-read.yaml @@ -33,20 +33,28 @@ spec: matchLabels: app.kubernetes.io/component: read topologyKey: kubernetes.io/hostname + securityContext: + fsGroup: 10001 initContainers: - - name: init - imagePullPolicy: {{ .Values.images.pullPolicy }} - image: {{ .Values.images.registry | default "docker.io" }}/{{ .Values.images.repository }}:{{ .Values.images.tag }} - securityContext: - runAsUser: 0 - privileged: true + - name: wait-index-gateway + imagePullPolicy: {{ .Values.images.curl.pullPolicy }} command: - /bin/sh - -c - - chown loki:loki /var/loki + - | + # Copy curl to tools volume for use in probes + cp /bin/curl /kb-tools/curl + + # Execute wait script + /kb-scripts/wait-index-gateway-ring.sh + env: + - name: MAX_WAIT + value: "300" volumeMounts: - - mountPath: /var/loki - name: data + - name: scripts + mountPath: /kb-scripts + - name: tools + mountPath: /kb-tools containers: - name: read imagePullPolicy: {{ .Values.images.pullPolicy }} @@ -79,18 +87,26 @@ spec: - containerPort: {{ .Values.server.httpMemberlistPort }} name: http-memberlist protocol: TCP + startupProbe: + exec: + command: + - /kb-scripts/check-index-gateway-ring.sh + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 + successThreshold: 1 + failureThreshold: 60 readinessProbe: - failureThreshold: 3 - httpGet: - path: /ready - port: http-metrics - scheme: HTTP - initialDelaySeconds: 30 + exec: + command: + - /kb-scripts/check-index-gateway-ring.sh + initialDelaySeconds: 15 periodSeconds: 10 + timeoutSeconds: 3 successThreshold: 1 - timeoutSeconds: 1 + failureThreshold: 3 volumeMounts: - - mountPath: /etc/loki/config + - mountPath: /etc/loki/config name: config - mountPath: /etc/loki/runtime-config name: runtime-config @@ -98,6 +114,13 @@ spec: name: tmp - mountPath: /var/loki name: data + - mountPath: /kb-scripts + name: scripts + - mountPath: /kb-tools + name: tools + volumes: + - emptyDir: {} + name: tools configs: - name: loki-config templateRef: loki-tpl @@ -108,7 +131,15 @@ spec: templateRef: loki-runtime-tpl volumeName: runtime-config namespace: {{ .Release.Namespace }} + scripts: + - name: loki-scripts + templateRef: loki-scripts + namespace: {{ .Release.Namespace }} + volumeName: scripts + defaultMode: 0555 vars: + - name: CLUSTER_DOMAIN + value: {{ .Values.global.clusterDomain}} - name: SERVER_HTTP_PORT value: {{ .Values.server.httpMetricsPort | quote }} - name: SERVER_GRPC_PORT diff --git a/addons/loki/templates/cmpd-write.yaml b/addons/loki/templates/cmpd-write.yaml index 047503878..e94323249 100644 --- a/addons/loki/templates/cmpd-write.yaml +++ b/addons/loki/templates/cmpd-write.yaml @@ -33,20 +33,28 @@ spec: matchLabels: app.kubernetes.io/component: write topologyKey: kubernetes.io/hostname + securityContext: + fsGroup: 10001 initContainers: - - name: init - imagePullPolicy: {{ .Values.images.pullPolicy }} - image: {{ .Values.images.registry | default "docker.io" }}/{{ .Values.images.repository }}:{{ .Values.images.tag }} - securityContext: - runAsUser: 0 - privileged: true + - name: wait-index-gateway + imagePullPolicy: {{ .Values.images.curl.pullPolicy }} command: - /bin/sh - -c - - chown loki:loki /var/loki + - | + # Copy curl to tools volume for use in probes + cp /bin/curl /kb-tools/curl + + # Execute wait script + /kb-scripts/wait-index-gateway-ring.sh + env: + - name: MAX_WAIT + value: "300" volumeMounts: - - mountPath: /var/loki - name: data + - name: scripts + mountPath: /kb-scripts + - name: tools + mountPath: /kb-tools containers: - name: write imagePullPolicy: {{ .Values.images.pullPolicy }} @@ -76,18 +84,26 @@ spec: - containerPort: {{ .Values.server.httpMemberlistPort }} name: http-memberlist protocol: TCP + startupProbe: + exec: + command: + - /kb-scripts/check-index-gateway-ring.sh + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 + successThreshold: 1 + failureThreshold: 60 readinessProbe: - failureThreshold: 3 - httpGet: - path: /ready - port: http-metrics - scheme: HTTP - initialDelaySeconds: 30 + exec: + command: + - /kb-scripts/check-index-gateway-ring.sh + initialDelaySeconds: 15 periodSeconds: 10 + timeoutSeconds: 3 successThreshold: 1 - timeoutSeconds: 1 + failureThreshold: 3 volumeMounts: - - mountPath: /etc/loki/config + - mountPath: /etc/loki/config name: config - mountPath: /etc/loki/runtime-config name: runtime-config @@ -95,6 +111,13 @@ spec: name: tmp - mountPath: /var/loki name: data + - mountPath: /kb-scripts + name: scripts + - mountPath: /kb-tools + name: tools + volumes: + - emptyDir: {} + name: tools configs: - name: loki-config templateRef: loki-tpl @@ -105,7 +128,15 @@ spec: templateRef: loki-runtime-tpl volumeName: runtime-config namespace: {{ .Release.Namespace }} + scripts: + - name: loki-scripts + templateRef: loki-scripts + namespace: {{ .Release.Namespace }} + volumeName: scripts + defaultMode: 0555 vars: + - name: CLUSTER_DOMAIN + value: {{ .Values.global.clusterDomain}} - name: SERVER_HTTP_PORT value: {{ .Values.server.httpMetricsPort | quote }} - name: SERVER_GRPC_PORT diff --git a/addons/loki/templates/cmpv.yaml b/addons/loki/templates/cmpv.yaml index 31bdb9b85..d3ad81a7a 100644 --- a/addons/loki/templates/cmpv.yaml +++ b/addons/loki/templates/cmpv.yaml @@ -27,10 +27,12 @@ spec: serviceVersion: 1.0.0 images: write: {{ .Values.images.registry | default "docker.io" }}/{{ .Values.images.repository }}:{{ .Values.images.tag }} + wait-index-gateway: {{ .Values.images.registry | default "docker.io" }}/{{ .Values.images.curl.repository }}:{{ .Values.images.curl.tag }} - name: read-1.0.0 serviceVersion: 1.0.0 images: read: {{ .Values.images.registry | default "docker.io" }}/{{ .Values.images.repository }}:{{ .Values.images.tag }} + wait-index-gateway: {{ .Values.images.registry | default "docker.io" }}/{{ .Values.images.curl.repository }}:{{ .Values.images.curl.tag }} - name: backend-1.0.0 serviceVersion: 1.0.0 images: diff --git a/addons/loki/templates/scripts.yaml b/addons/loki/templates/scripts.yaml new file mode 100644 index 000000000..a7a37ed02 --- /dev/null +++ b/addons/loki/templates/scripts.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki-scripts + labels: + {{- include "loki.labels" . | nindent 4 }} +data: + {{- with include "loki.extend.scripts" . }} + {{- . | nindent 2 }} + {{- end }} diff --git a/addons/loki/values.yaml b/addons/loki/values.yaml index e47bfbfdd..a3738760f 100644 --- a/addons/loki/values.yaml +++ b/addons/loki/values.yaml @@ -11,6 +11,11 @@ images: tag: 1.24-alpine repository: nginxinc/nginx-unprivileged pullPolicy: IfNotPresent + # Curl image for init container + curl: + repository: apecloud/curl-jq + tag: 0.1.0 + pullPolicy: IfNotPresent nameOverride: "" fullnameOverride: ""