Skip to content

Commit afd6636

Browse files
authored
fix: tls connection broken in replication with sentinel (#1472)
* feat: support tls in replication with sentinel Signed-off-by: yangw <[email protected]> * fix lint Signed-off-by: yangw <[email protected]> * fix lint Signed-off-by: yangw <[email protected]> * fix monitor Signed-off-by: yangw <[email protected]> * tls flag Signed-off-by: yangw <[email protected]> --------- Signed-off-by: yangw <[email protected]>
1 parent a2f7243 commit afd6636

File tree

20 files changed

+372
-94
lines changed

20 files changed

+372
-94
lines changed

.github/workflows/ci.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,11 +238,14 @@ jobs:
238238

239239
# NOTE: This is a workaround for the issue where the default storage class does not support volume expansion.
240240
# Since we don't require PVC resizing (unlike physical disks), we can simply ensure that the requested PVC size is met.
241-
- name: Set allowVolumeExpansion to true
241+
- name: Setup k8s Kind Cluster
242242
run: |
243243
DEFAULT_SC=$(kubectl get storageclass -o=jsonpath='{.items[?(@.metadata.annotations.storageclass\.kubernetes\.io/is-default-class=="true")].metadata.name}')
244244
kubectl patch storageclass $DEFAULT_SC -p '{"allowVolumeExpansion": true}'
245245
246+
# install cert-manager
247+
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.18.2/cert-manager.yaml
248+
246249
- name: Load Docker image into Kind
247250
run: |
248251
kubectl cluster-info --context kind-kind

.yamllint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ extends: default
22

33
rules:
44
line-length:
5-
max: 200
5+
max: 300
66
level: warning
77
allow-non-breakable-words: true
88
allow-non-breakable-inline-mappings: true

internal/agent/bootstrap/sentinel/config.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ func GenerateConfig() error {
108108
cfg.Append("tls-key-file", redisTLSCertKey)
109109
cfg.Append("tls-ca-cert-file", redisTLSCAKey)
110110
cfg.Append("tls-auth-clients", "optional")
111+
// Sentinel should use tls for replication connection.
112+
cfg.Append("tls-replication", "yes")
111113
} else {
112114
fmt.Println("Running sentinel without TLS mode")
113115
}

internal/controller/common/redis/check.go

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,7 @@ func (c *checker) GetMasterFromReplication(ctx context.Context, rr *rr.RedisRepl
7171

7272
var masterPods []corev1.Pod
7373
for _, pod := range pods.Items {
74-
connInfo := &redis.ConnectionInfo{
75-
IP: pod.Status.PodIP,
76-
Port: "6379",
77-
Password: password,
78-
}
74+
connInfo := createConnectionInfo(ctx, pod, password, rr.Spec.TLS, c.k8s, rr.Namespace, "6379")
7975
isMaster, err := c.redis.Connect(connInfo).IsMaster(ctx)
8076
if err != nil {
8177
return corev1.Pod{}, err
@@ -87,11 +83,7 @@ func (c *checker) GetMasterFromReplication(ctx context.Context, rr *rr.RedisRepl
8783

8884
var realMasterPod corev1.Pod
8985
for _, pod := range masterPods {
90-
connInfo := &redis.ConnectionInfo{
91-
IP: pod.Status.PodIP,
92-
Port: "6379",
93-
Password: password,
94-
}
86+
connInfo := createConnectionInfo(ctx, pod, password, rr.Spec.TLS, c.k8s, rr.Namespace, "6379")
9587
count, err := c.redis.Connect(connInfo).GetAttachedReplicaCount(ctx)
9688
if err != nil {
9789
continue

internal/controller/common/redis/heal.go

Lines changed: 71 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@ package redis
22

33
import (
44
"context"
5+
"crypto/tls"
6+
"crypto/x509"
57
"fmt"
68
"strings"
79

810
commonapi "github.com/OT-CONTAINER-KIT/redis-operator/api/common/v1beta2"
911
rsvb2 "github.com/OT-CONTAINER-KIT/redis-operator/api/redissentinel/v1beta2"
1012
"github.com/OT-CONTAINER-KIT/redis-operator/internal/controller/common"
1113
"github.com/OT-CONTAINER-KIT/redis-operator/internal/service/redis"
14+
"github.com/OT-CONTAINER-KIT/redis-operator/internal/util/cryptutil"
1215
v1 "k8s.io/api/core/v1"
1316
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1417
"k8s.io/apimachinery/pkg/types"
@@ -22,7 +25,7 @@ type Healer interface {
2225
SentinelReset(ctx context.Context, rs *rsvb2.RedisSentinel) error
2326

2427
// UpdatePodRoleLabel connect to all redis pods and update pod role label `redis-role` to `master` or `slave` according to their role.
25-
UpdateRedisRoleLabel(ctx context.Context, ns string, labels map[string]string, secret *commonapi.ExistingPasswordSecret) error
28+
UpdateRedisRoleLabel(ctx context.Context, ns string, labels map[string]string, secret *commonapi.ExistingPasswordSecret, tlsConfig *commonapi.TLSConfig) error
2629
}
2730

2831
type healer struct {
@@ -37,7 +40,7 @@ func NewHealer(clientset kubernetes.Interface) Healer {
3740
}
3841
}
3942

40-
func (h *healer) UpdateRedisRoleLabel(ctx context.Context, ns string, labels map[string]string, secret *commonapi.ExistingPasswordSecret) error {
43+
func (h *healer) UpdateRedisRoleLabel(ctx context.Context, ns string, labels map[string]string, secret *commonapi.ExistingPasswordSecret, tlsConfig *commonapi.TLSConfig) error {
4144
selector := make([]string, 0, len(labels))
4245
for key, value := range labels {
4346
selector = append(selector, fmt.Sprintf("%s=%s", key, value))
@@ -53,11 +56,7 @@ func (h *healer) UpdateRedisRoleLabel(ctx context.Context, ns string, labels map
5356
return err
5457
}
5558
for _, pod := range pods.Items {
56-
connInfo := &redis.ConnectionInfo{
57-
IP: pod.Status.PodIP,
58-
Port: "6379",
59-
Password: password,
60-
}
59+
connInfo := createConnectionInfo(ctx, pod, password, tlsConfig, h.k8s, ns, "6379")
6160
isMaster, err := h.redis.Connect(connInfo).IsMaster(ctx)
6261
if err != nil {
6362
return err
@@ -98,11 +97,8 @@ func (h *healer) SentinelReset(ctx context.Context, rs *rsvb2.RedisSentinel) err
9897
}
9998

10099
for _, pod := range pods.Items {
101-
connInfo := &redis.ConnectionInfo{
102-
IP: pod.Status.PodIP,
103-
Port: "26379",
104-
Password: sentinelPass,
105-
}
100+
connInfo := createConnectionInfo(ctx, pod, sentinelPass, rs.Spec.TLS, h.k8s, rs.Namespace, "26379")
101+
106102
err = h.redis.Connect(connInfo).SentinelReset(ctx, rs.Spec.RedisSentinelConfig.MasterGroupName)
107103
if err != nil {
108104
return err
@@ -135,13 +131,10 @@ func (h *healer) SentinelMonitor(ctx context.Context, rs *rsvb2.RedisSentinel, m
135131
}
136132

137133
for _, pod := range pods.Items {
138-
connInfo := &redis.ConnectionInfo{
139-
IP: pod.Status.PodIP,
140-
Port: "26379",
141-
Password: sentinelPass,
142-
}
134+
connInfo := createConnectionInfo(ctx, pod, sentinelPass, rs.Spec.TLS, h.k8s, rs.Namespace, "26379")
135+
143136
masterConnInfo := &redis.ConnectionInfo{
144-
IP: master,
137+
Host: master,
145138
Port: "6379",
146139
Password: masterPass,
147140
}
@@ -177,3 +170,63 @@ func (h *healer) getSentinelPods(ctx context.Context, rs *rsvb2.RedisSentinel) (
177170
}
178171
return pods, nil
179172
}
173+
174+
// getRedisTLSConfig creates a TLS configuration for Redis connections
175+
func getRedisTLSConfig(ctx context.Context, client kubernetes.Interface, namespace, tlsSecretName string) *tls.Config {
176+
// This is a wrapper to access the k8sutils internal function
177+
// We'll implement a simplified version here for now
178+
secret, err := client.CoreV1().Secrets(namespace).Get(ctx, tlsSecretName, metav1.GetOptions{})
179+
if err != nil {
180+
log.FromContext(ctx).Error(err, "Failed in getting TLS secret", "secretName", tlsSecretName, "namespace", namespace)
181+
return nil
182+
}
183+
184+
tlsClientCert, certExists := secret.Data["tls.crt"]
185+
tlsClientKey, keyExists := secret.Data["tls.key"]
186+
tlsCACert, caExists := secret.Data["ca.crt"]
187+
188+
if !certExists || !keyExists || !caExists {
189+
log.FromContext(ctx).Error(fmt.Errorf("TLS secret missing required keys"), "TLS secret is missing required keys", "secretName", tlsSecretName)
190+
return nil
191+
}
192+
193+
cert, err := tls.X509KeyPair(tlsClientCert, tlsClientKey)
194+
if err != nil {
195+
log.FromContext(ctx).Error(err, "Failed to load TLS key pair", "secretName", tlsSecretName)
196+
return nil
197+
}
198+
199+
caCertPool := x509.NewCertPool()
200+
caCertPool.AppendCertsFromPEM(tlsCACert)
201+
202+
return &tls.Config{
203+
Certificates: []tls.Certificate{cert},
204+
RootCAs: caCertPool,
205+
MinVersion: tls.VersionTLS12,
206+
InsecureSkipVerify: true,
207+
VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
208+
_, _, err := cryptutil.VerifyCertificateExceptServerName(rawCerts, &tls.Config{RootCAs: caCertPool})
209+
return err
210+
},
211+
}
212+
}
213+
214+
// createConnectionInfo creates a Redis connection info with TLS support
215+
func createConnectionInfo(ctx context.Context, pod v1.Pod, password string, tlsConfig *commonapi.TLSConfig, k8sClient kubernetes.Interface, namespace, port string) *redis.ConnectionInfo {
216+
connInfo := &redis.ConnectionInfo{
217+
Host: pod.Status.PodIP,
218+
Port: port,
219+
Password: password,
220+
}
221+
222+
// Configure TLS if enabled
223+
if tlsConfig != nil && tlsConfig.Secret.SecretName != "" {
224+
serviceName := common.GetHeadlessServiceNameFromPodName(pod.Name)
225+
connInfo.Host = fmt.Sprintf("%s.%s.%s.svc.cluster.local", pod.Name, serviceName, namespace)
226+
// Get TLS configuration
227+
tlsCfg := getRedisTLSConfig(ctx, k8sClient, namespace, tlsConfig.Secret.SecretName)
228+
connInfo.TLSConfig = tlsCfg
229+
}
230+
231+
return connInfo
232+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package common
2+
3+
import (
4+
"strconv"
5+
"strings"
6+
)
7+
8+
// GetHeadlessServiceNameFromPodName trims the trailing ordinal (e.g. "-0", "-1") from a pod name to
9+
// derive the headless service name. If the pod name does not contain a trailing
10+
// ordinal segment the original name is returned unchanged.
11+
//
12+
// Examples for different Redis modes:
13+
// - RedisReplication: "redis-replication-0" -> "redis-replication-headless"
14+
// - RedisCluster Leader: "redis-cluster-leader-0" -> "redis-cluster-leader-headless"
15+
// - RedisCluster Follower: "redis-cluster-follower-1" -> "redis-cluster-follower-headless"
16+
// - RedisSentinel: "redis-sentinel-sentinel-0" -> "redis-sentinel-sentinel-headless"
17+
func GetHeadlessServiceNameFromPodName(podName string) string {
18+
// Find the last dash in the pod name. If there is none, return the whole name.
19+
idx := strings.LastIndex(podName, "-")
20+
if idx == -1 {
21+
return podName
22+
}
23+
// Check whether the suffix after the last dash is a number (the StatefulSet
24+
// ordinal). If it is, trim it to get the service name; otherwise return the
25+
// original pod name.
26+
if _, err := strconv.Atoi(podName[idx+1:]); err == nil {
27+
return podName[:idx] + "-headless"
28+
}
29+
return podName
30+
}

internal/controller/rediscluster/rediscluster_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
287287

288288
for _, fakeRole := range []string{"leader", "follower"} {
289289
labels := common.GetRedisLabels(instance.GetName()+"-"+fakeRole, common.SetupTypeCluster, fakeRole, instance.GetLabels())
290-
if err = r.Healer.UpdateRedisRoleLabel(ctx, instance.GetNamespace(), labels, instance.Spec.KubernetesConfig.ExistingPasswordSecret); err != nil {
290+
if err = r.Healer.UpdateRedisRoleLabel(ctx, instance.GetNamespace(), labels, instance.Spec.KubernetesConfig.ExistingPasswordSecret, instance.Spec.TLS); err != nil {
291291
return intctrlutil.RequeueE(ctx, err, "")
292292
}
293293
}

internal/controller/redisreplication/redisreplication_controller.go

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,14 @@ func (r *Reconciler) reconcileRedis(ctx context.Context, instance *rrvb2.RedisRe
141141
defer cancel()
142142

143143
var realMaster string
144-
masterNodes := k8sutils.GetRedisNodesByRole(ctx, r.K8sClient, instance, "master")
145-
slaveNodes := k8sutils.GetRedisNodesByRole(ctx, r.K8sClient, instance, "slave")
144+
masterNodes, err := k8sutils.GetRedisNodesByRole(ctx, r.K8sClient, instance, "master")
145+
if err != nil {
146+
return intctrlutil.RequeueE(ctx, err, "")
147+
}
148+
slaveNodes, err := k8sutils.GetRedisNodesByRole(ctx, r.K8sClient, instance, "slave")
149+
if err != nil {
150+
return intctrlutil.RequeueE(ctx, err, "")
151+
}
146152
if len(masterNodes) > 1 {
147153
log.FromContext(ctx).Info("Creating redis replication by executing replication creation commands")
148154

@@ -171,17 +177,23 @@ func (r *Reconciler) reconcileStatus(ctx context.Context, instance *rrvb2.RedisR
171177
var err error
172178
var realMaster string
173179

174-
masterNodes := k8sutils.GetRedisNodesByRole(ctx, r.K8sClient, instance, "master")
180+
masterNodes, err := k8sutils.GetRedisNodesByRole(ctx, r.K8sClient, instance, "master")
181+
if err != nil {
182+
return intctrlutil.RequeueE(ctx, err, "")
183+
}
175184
realMaster = k8sutils.GetRedisReplicationRealMaster(ctx, r.K8sClient, instance, masterNodes)
176185
if err = r.UpdateRedisReplicationMaster(ctx, instance, realMaster); err != nil {
177186
return intctrlutil.RequeueE(ctx, err, "")
178187
}
179188
labels := common.GetRedisLabels(instance.GetName(), common.SetupTypeReplication, "replication", instance.GetLabels())
180-
if err = r.Healer.UpdateRedisRoleLabel(ctx, instance.GetNamespace(), labels, instance.Spec.KubernetesConfig.ExistingPasswordSecret); err != nil {
189+
if err = r.Healer.UpdateRedisRoleLabel(ctx, instance.GetNamespace(), labels, instance.Spec.KubernetesConfig.ExistingPasswordSecret, instance.Spec.TLS); err != nil {
181190
return intctrlutil.RequeueE(ctx, err, "")
182191
}
183192

184-
slaveNodes := k8sutils.GetRedisNodesByRole(ctx, r.K8sClient, instance, "slave")
193+
slaveNodes, err := k8sutils.GetRedisNodesByRole(ctx, r.K8sClient, instance, "slave")
194+
if err != nil {
195+
return intctrlutil.RequeueE(ctx, err, "")
196+
}
185197
if realMaster != "" {
186198
monitoring.RedisReplicationConnectedSlavesTotal.WithLabelValues(instance.Namespace, instance.Name).Set(float64(len(slaveNodes)))
187199
} else {

internal/controller/redissentinel/redissentinel_controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@ func (r *RedisSentinelReconciler) Reconcile(ctx context.Context, req ctrl.Reques
5454
reconcilers := []reconciler{
5555
{typ: "finalizer", rec: r.reconcileFinalizer},
5656
{typ: "replication", rec: r.reconcileReplication},
57-
{typ: "sentinel", rec: r.reconcileSentinel},
5857
{typ: "pdb", rec: r.reconcilePDB},
5958
{typ: "service", rec: r.reconcileService},
59+
{typ: "sentinel", rec: r.reconcileSentinel},
6060
}
6161

6262
for _, reconciler := range reconcilers {
@@ -133,7 +133,7 @@ func (r *RedisSentinelReconciler) reconcileSentinel(ctx context.Context, instanc
133133
return intctrlutil.RequeueE(ctx, err, "")
134134
} else {
135135
if instance.Spec.RedisSentinelConfig.ResolveHostnames == "yes" {
136-
monitorAddr = fmt.Sprintf("%s.%s-headless.%s.svc", master.Name, rr.Name, rr.Namespace)
136+
monitorAddr = fmt.Sprintf("%s.%s.%s.svc.cluster.local", master.Name, common.GetHeadlessServiceNameFromPodName(master.Name), rr.Namespace)
137137
} else {
138138
monitorAddr = master.Status.PodIP
139139
}

internal/k8sutils/redis-sentinel.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,11 @@ func getRedisReplicationMasterPod(ctx context.Context, client kubernetes.Interfa
331331
log.FromContext(ctx).V(1).Info("Successfully got RedisReplication", "replication name", replicationName, "namespace", replicationNamespace)
332332
}
333333

334-
masterPods := GetRedisNodesByRole(ctx, client, &replicationInstance, "master")
334+
masterPods, err := GetRedisNodesByRole(ctx, client, &replicationInstance, "master")
335+
if err != nil {
336+
log.FromContext(ctx).Error(err, "Failed to get RedisReplication master pods", "replication name", replicationName, "namespace", replicationNamespace)
337+
return emptyRedisInfo
338+
}
335339
if len(masterPods) == 0 {
336340
log.FromContext(ctx).Error(errors.New("no master pods found"), "")
337341
return emptyRedisInfo

0 commit comments

Comments
 (0)