From 6426c94728a064a41712577f465cc3e3044aa661 Mon Sep 17 00:00:00 2001 From: "Chandan Kumar (raukadah)" Date: Fri, 9 May 2025 16:44:36 +0530 Subject: [PATCH] [DNM] replace redis with valkey Note: It is added for testing purpose Signed-off-by: Chandan Kumar (raukadah) --- pkg/redis/statefulset.go | 6 +- pkg/redis/volumes.go | 22 ++--- .../valkey/bin/check_valkey_endpoints.sh | 38 ++++++++ templates/valkey/bin/common.sh | 90 +++++++++++++++++++ templates/valkey/bin/start_sentinel.sh | 28 ++++++ .../valkey/bin/start_valkey_replication.sh | 26 ++++++ templates/valkey/bin/valkey_probe.sh | 19 ++++ templates/valkey/config/config-sentinel.json | 34 +++++++ templates/valkey/config/config.json | 33 +++++++ templates/valkey/config/sentinel-tls.conf.in | 9 ++ templates/valkey/config/sentinel.conf.in | 8 ++ templates/valkey/config/valkey-tls.conf.in | 9 ++ templates/valkey/config/valkey.conf.in | 37 ++++++++ 13 files changed, 345 insertions(+), 14 deletions(-) create mode 100755 templates/valkey/bin/check_valkey_endpoints.sh create mode 100644 templates/valkey/bin/common.sh create mode 100755 templates/valkey/bin/start_sentinel.sh create mode 100755 templates/valkey/bin/start_valkey_replication.sh create mode 100755 templates/valkey/bin/valkey_probe.sh create mode 100644 templates/valkey/config/config-sentinel.json create mode 100644 templates/valkey/config/config.json create mode 100644 templates/valkey/config/sentinel-tls.conf.in create mode 100644 templates/valkey/config/sentinel.conf.in create mode 100644 templates/valkey/config/valkey-tls.conf.in create mode 100644 templates/valkey/config/valkey.conf.in diff --git a/pkg/redis/statefulset.go b/pkg/redis/statefulset.go index b873d2f3..1408334a 100644 --- a/pkg/redis/statefulset.go +++ b/pkg/redis/statefulset.go @@ -99,7 +99,7 @@ func StatefulSet( ServiceAccountName: r.RbacResourceName(), Containers: []corev1.Container{{ Image: r.Spec.ContainerImage, - Command: []string{"/var/lib/operator-scripts/start_redis_replication.sh"}, + Command: []string{"/var/lib/operator-scripts/start_valkey_replication.sh"}, Name: "redis", Env: commonEnvVars, VolumeMounts: getRedisVolumeMounts(r), @@ -110,14 +110,14 @@ func StatefulSet( LivenessProbe: &corev1.Probe{ ProbeHandler: corev1.ProbeHandler{ Exec: &corev1.ExecAction{ - Command: []string{"/var/lib/operator-scripts/redis_probe.sh", "liveness"}, + Command: []string{"/var/lib/operator-scripts/valkey_probe.sh", "liveness"}, }, }, }, ReadinessProbe: &corev1.Probe{ ProbeHandler: corev1.ProbeHandler{ Exec: &corev1.ExecAction{ - Command: []string{"/var/lib/operator-scripts/redis_probe.sh", "readiness"}, + Command: []string{"/var/lib/operator-scripts/valkey_probe.sh", "readiness"}, }, }, }, diff --git a/pkg/redis/volumes.go b/pkg/redis/volumes.go index 101535b4..a7ca36fb 100644 --- a/pkg/redis/volumes.go +++ b/pkg/redis/volumes.go @@ -17,22 +17,22 @@ func getVolumes(r *redisv1.Redis) []corev1.Volume { configDataFiles := []corev1.KeyToPath{ { Key: "sentinel.conf.in", - Path: "var/lib/redis/sentinel.conf.in", + Path: "var/lib/valkey/sentinel.conf.in", }, { Key: "redis.conf.in", - Path: "var/lib/redis/redis.conf.in", + Path: "var/lib/valkey/valkey.conf.in", }, } if r.Spec.TLS.Enabled() { configDataFiles = append(configDataFiles, []corev1.KeyToPath{ { - Key: "redis-tls.conf.in", - Path: "var/lib/redis/redis-tls.conf.in", + Key: "valkey-tls.conf.in", + Path: "var/lib/valkey/valkey-tls.conf.in", }, { Key: "sentinel-tls.conf.in", - Path: "var/lib/redis/sentinel-tls.conf.in", + Path: "var/lib/valkey/sentinel-tls.conf.in", }, }...) } @@ -96,20 +96,20 @@ func getVolumes(r *redisv1.Redis) []corev1.Volume { }, Items: []corev1.KeyToPath{ { - Key: "start_redis_replication.sh", - Path: "start_redis_replication.sh", + Key: "start_valkey_replication.sh", + Path: "start_valkey_replication.sh", }, { Key: "start_sentinel.sh", Path: "start_sentinel.sh", }, { - Key: "redis_probe.sh", - Path: "redis_probe.sh", + Key: "valkey_probe.sh", + Path: "valkey_probe.sh", }, { - Key: "check_redis_endpoints.sh", - Path: "check_redis_endpoints.sh", + Key: "check_valkey_endpoints.sh", + Path: "check_valkey_endpoints.sh", }, { Key: "common.sh", diff --git a/templates/valkey/bin/check_valkey_endpoints.sh b/templates/valkey/bin/check_valkey_endpoints.sh new file mode 100755 index 00000000..dc4eb671 --- /dev/null +++ b/templates/valkey/bin/check_valkey_endpoints.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +. /var/lib/operator-scripts/common.sh + +# When the master changed because of a failover, redis notifies this +# script with the following arguments: +# + +log "$0 called with arguments: $*" + +CLUSTER_NAME=$1 +POD_ROLE=$2 +STATE=$3 +OLD_MASTER=$4 +NEW_MASTER=$6 + +OLD_POD=$(echo $OLD_MASTER | cut -d. -f1) +NEW_POD=$(echo $NEW_MASTER | cut -d. -f1) + +if [ "$POD_ROLE" = "leader" ]; then + log "Preparing the endpoint for the failover ${OLD_POD} -> ${NEW_POD}" + + log "Removing ${OLD_POD} from the Redis service's endpoint" + remove_pod_label $OLD_POD redis~1master + if [ $? != 0 ]; then + log_error "Could not remove service endpoint. Aborting" + exit 1 + fi + + log "Setting ${NEW_POD} as the new endpoint for the Redis service" + set_pod_label $NEW_POD redis~1master + if [ $? != 0 ]; then + log_error "Could not add service endpoint. Aborting" + exit 1 + fi +else + log "No action taken since we were an observer during the failover" +fi diff --git a/templates/valkey/bin/common.sh b/templates/valkey/bin/common.sh new file mode 100644 index 00000000..04aee3d2 --- /dev/null +++ b/templates/valkey/bin/common.sh @@ -0,0 +1,90 @@ +# Environment variable common to all scripts +APISERVER=https://kubernetes.default.svc +SERVICEACCOUNT=/var/run/secrets/kubernetes.io/serviceaccount +NAMESPACE=$(cat ${SERVICEACCOUNT}/namespace) +TOKEN=$(cat ${SERVICEACCOUNT}/token) +CACERT=${SERVICEACCOUNT}/ca.crt + +TIMEOUT=3 + +POD_NAME=$HOSTNAME +POD_FQDN=$HOSTNAME.$SVC_FQDN + +if test -d /var/lib/config-data/tls; then + REDIS_CLI_CMD="valkey-cli --tls" + REDIS_CONFIG=/var/lib/valkey/valkey-tls.conf + SENTINEL_CONFIG=/var/lib/valkey/sentinel-tls.conf +else + REDIS_CLI_CMD=valkey-cli + REDIS_CONFIG=/var/lib/valkey/valkey.conf + SENTINEL_CONFIG=/var/lib/valkey/sentinel.conf +fi + +function log() { + echo "$(date +%F_%H_%M_%S) $*" +} + +function log_error() { + echo "$(date +%F_%H_%M_%S) ERROR: $*" +} + +function generate_configs() { + # Copying config files except template files + tar -C /var/lib/config-data --exclude '..*' --exclude '*.in' -h -c default | tar -C /var/lib/config-data/generated -x --strip=1 + # Generating config files from templates + cd /var/lib/config-data/default + for cfg in $(find -L * -name '*.conf.in'); do + log "Generating config file from template $PWD/${cfg}" + sed -e "s/{ POD_FQDN }/${POD_FQDN}/" "${cfg}" > "/var/lib/config-data/generated/${cfg%.in}" + done +} + +function is_bootstrap_pod() { + echo "$1" | grep -qe '-0$' +} + +function extract() { + local var="$1" + local output="$2" + # parse curl vars as well as kube api error fields + echo "$output" | awk -F'[:,]' "/\"?${var}\"?:/ {print \$2; exit}" +} + +function configure_pod_label() { + local pod="$1" + local patch="$2" + local success="$3" + local curlvars="\nexitcode:%{exitcode}\nerrormsg:%{errormsg}\nhttpcode:%{response_code}\n" + + response=$(curl -s -w "${curlvars}" --cacert ${CACERT} --header "Content-Type:application/json-patch+json" --header "Authorization: Bearer ${TOKEN}" --request PATCH --data "$patch" ${APISERVER}/api/v1/namespaces/${NAMESPACE}/pods/${pod}) + + exitcode=$(extract exitcode "$response") + if [ $exitcode -ne 0 ]; then + errormsg=$(extract errormsg "$response") + log_error "Error when running curl: ${errormsg} (${exitcode})" + return 1 + fi + + httpcode=$(extract httpcode "$response") + if echo "${httpcode}" | grep -v -E "^${success}$"; then + message=$(extract message "$response") + log_error "Error when calling API server: ${message} (${httpcode})" + return 1 + fi +} + +function remove_pod_label() { + local pod="$1" + local label="$2" + local patch="[{\"op\": \"remove\", \"path\": \"/metadata/labels/${label}\"}]" + # 200: OK, 422: not found + configure_pod_label $pod "$patch" "(200|422)" +} + +function set_pod_label() { + local pod="$1" + local label="$2" + local patch="[{\"op\": \"add\", \"path\": \"/metadata/labels/${label}\", \"value\": \"true\"}]" + # 200: OK + configure_pod_label $pod "$patch" "200" +} diff --git a/templates/valkey/bin/start_sentinel.sh b/templates/valkey/bin/start_sentinel.sh new file mode 100755 index 00000000..f771a185 --- /dev/null +++ b/templates/valkey/bin/start_sentinel.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +. /var/lib/operator-scripts/common.sh + +generate_configs +sudo -E kolla_set_configs + +# 1. check if a redis cluster is already running by contacting sentinel +output=$(timeout ${TIMEOUT} $REDIS_CLI_CMD -h ${SVC_FQDN} -p 26379 sentinel master redis) +if [ $? -eq 0 ]; then + master=$(echo "$output" | awk '/^ip$/ {getline; print $0; exit}') + # TODO skip if no master was found + log "Connecting to the existing sentinel cluster (master: $master)" + echo "sentinel monitor redis ${master} 6379 ${SENTINEL_QUORUM}" >> $SENTINEL_CONFIG + exec valkey-sentinel $SENTINEL_CONFIG +fi + +# 2. else let the pod's redis server bootstrap a new cluster and monitor it +# (assume we should be the first redis pod) +if is_bootstrap_pod $POD_NAME; then + log "Bootstrapping a new sentinel cluster" + echo "sentinel monitor redis ${POD_FQDN} 6379 ${SENTINEL_QUORUM}" >> $SENTINEL_CONFIG + exec valkey-sentinel $SENTINEL_CONFIG +fi + +# 3. else this is an error, exit and let the pod restart and try again +echo "Could not connect to a sentinel cluster" +exit 1 diff --git a/templates/valkey/bin/start_valkey_replication.sh b/templates/valkey/bin/start_valkey_replication.sh new file mode 100755 index 00000000..2f5ef762 --- /dev/null +++ b/templates/valkey/bin/start_valkey_replication.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +. /var/lib/operator-scripts/common.sh + +generate_configs +sudo -E kolla_set_configs + +# 1. check if a redis cluster is already running by contacting sentinel +output=$(timeout ${TIMEOUT} $REDIS_CLI_CMD -h ${SVC_FQDN} -p 26379 sentinel master redis) +if [ $? -eq 0 ]; then + master=$(echo "$output" | awk '/^ip$/ {getline; print $0; exit}') + # TODO skip if no master was found + log "Connecting to the existing Redis cluster (master: ${master})" + exec valkey-server $REDIS_CONFIG --protected-mode no --replicaof "$master" 6379 +fi + +# 2. else bootstrap a new cluster (assume we should be the first redis pod) +if is_bootstrap_pod $POD_NAME; then + log "Bootstrapping a new Redis cluster from ${POD_NAME}" + set_pod_label $POD_NAME redis~1master + exec valkey-server $REDIS_CONFIG --protected-mode no +fi + +# 3. else this is an error, exit and let the pod restart and try again +echo "Could not connect to a redis cluster" +exit 1 diff --git a/templates/valkey/bin/valkey_probe.sh b/templates/valkey/bin/valkey_probe.sh new file mode 100755 index 00000000..94390351 --- /dev/null +++ b/templates/valkey/bin/valkey_probe.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -eux + +. /var/lib/operator-scripts/common.sh + +case "$1" in + readiness) + # ready if we're the master or if we're a slave connected to the current master + output=$($REDIS_CLI_CMD info replication | tr -d '\r') + declare -A state + while IFS=: read -r key value; do state[$key]=$value; done < <(echo "$output") + [[ "${state[role]}" == "master" ]] || [[ "${state[role]}" == "slave" && "${state[master_link_status]}" == "up" ]] + ;; + liveness) + $REDIS_CLI_CMD -e ping >/dev/null;; + *) + echo "Invalid probe option '$1'" + exit 1;; +esac diff --git a/templates/valkey/config/config-sentinel.json b/templates/valkey/config/config-sentinel.json new file mode 100644 index 00000000..3b3c3744 --- /dev/null +++ b/templates/valkey/config/config-sentinel.json @@ -0,0 +1,34 @@ +{ + "command": "valkey-sentinel /var/lib/valkey/sentinel.conf", + "config_files": [ + { + "dest": "/", + "merge": true, + "preserve_properties": true, + "optional": true, + "source": "/var/lib/config-data/generated/*" + }, + { + "source": "/var/lib/config-data/tls/private/valkey.key", + "dest": "/etc/pki/tls/private/valkey.key", + "owner": "valkey", + "perm": "0600", + "optional": true + }, + { + "source": "/var/lib/config-data/tls/certs/valkey.crt", + "dest": "/etc/pki/tls/certs/valkey.crt", + "owner": "valkey", + "perm": "0755", + "optional": true + } + ], + "permissions": [ + { + "owner": "valkey:valkey", + "perm": "0755", + "path": "/var/lib/valkey", + "recursive": true + } + ] +} diff --git a/templates/valkey/config/config.json b/templates/valkey/config/config.json new file mode 100644 index 00000000..d2d09d5d --- /dev/null +++ b/templates/valkey/config/config.json @@ -0,0 +1,33 @@ +{ + "command": "valkey-server /var/lib/valkey/valkey.conf", + "config_files": [ + { + "dest": "/", + "merge": true, + "preserve_properties": true, + "optional": true, + "source": "/var/lib/config-data/generated/*" + }, + { + "source": "/var/lib/config-data/tls/private/valkey.key", + "dest": "/etc/pki/tls/private/valkey.key", + "owner": "valkey", + "perm": "0600", + "optional": true + }, + { + "source": "/var/lib/config-data/tls/certs/valkey.crt", + "dest": "/etc/pki/tls/certs/valkey.crt", + "owner": "valkey", + "perm": "0755", + "optional": true + } + ], + "permissions": [ + { + "owner": "valkey:valkey", + "path": "/var/lib/valkey", + "recursive": true + } + ] +} diff --git a/templates/valkey/config/sentinel-tls.conf.in b/templates/valkey/config/sentinel-tls.conf.in new file mode 100644 index 00000000..71a94951 --- /dev/null +++ b/templates/valkey/config/sentinel-tls.conf.in @@ -0,0 +1,9 @@ +include /var/lib/valkey/sentinel.conf + +port 0 +tls-port 26379 +tls-cert-file /etc/pki/tls/certs/valkey.crt +tls-key-file /etc/pki/tls/private/valkey.key +tls-ca-cert-file /etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem +tls-replication yes +tls-auth-clients optional diff --git a/templates/valkey/config/sentinel.conf.in b/templates/valkey/config/sentinel.conf.in new file mode 100644 index 00000000..6a223dee --- /dev/null +++ b/templates/valkey/config/sentinel.conf.in @@ -0,0 +1,8 @@ +dir /var/lib/valkey +bind { POD_FQDN } localhost +sentinel resolve-hostnames yes +sentinel announce-hostnames yes +sentinel announce-ip { POD_FQDN } +sentinel down-after-milliseconds valkey 10000 +sentinel failover-timeout valkey 20000 +sentinel client-reconfig-script valkey /var/lib/operator-scripts/check_valkey_endpoints.sh diff --git a/templates/valkey/config/valkey-tls.conf.in b/templates/valkey/config/valkey-tls.conf.in new file mode 100644 index 00000000..82882553 --- /dev/null +++ b/templates/valkey/config/valkey-tls.conf.in @@ -0,0 +1,9 @@ +include /var/lib/valkey/valkey.conf + +port 0 +tls-port 6379 +tls-cert-file /etc/pki/tls/certs/valkey.crt +tls-key-file /etc/pki/tls/private/valkey.key +tls-ca-cert-file /etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem +tls-replication yes +tls-auth-clients optional diff --git a/templates/valkey/config/valkey.conf.in b/templates/valkey/config/valkey.conf.in new file mode 100644 index 00000000..bf7f88f0 --- /dev/null +++ b/templates/valkey/config/valkey.conf.in @@ -0,0 +1,37 @@ +dir /var/lib/valkey +bind { POD_FQDN } localhost +replica-announce-ip { POD_FQDN } +port 6379 +tcp-backlog 511 +timeout 0 +tcp-keepalive 300 +daemonize no +loglevel notice +logfile "" +databases 16 +always-show-logo no +set-proc-title yes +proc-title-template "{title} {listen-addr} {server-mode}" +stop-writes-on-bgsave-error yes +rdbcompression yes +rdbchecksum yes +dbfilename dump.rdb +rdb-del-sync-files no +replica-serve-stale-data yes +replica-read-only yes +repl-diskless-sync no +repl-diskless-sync-delay 5 +repl-diskless-load disabled +repl-disable-tcp-nodelay no +replica-priority 100 +acllog-max-len 128 +lazyfree-lazy-eviction no +lazyfree-lazy-expire no +lazyfree-lazy-server-del no +replica-lazy-flush no +lazyfree-lazy-user-del no +lazyfree-lazy-user-flush no +oom-score-adj no +oom-score-adj-values 0 200 800 +disable-thp yes +appendonly no