Skip to content

Commit 33914d5

Browse files
authored
Merge pull request #2043 from rabbitmq/issue-2042
Use Pod name instead of IP for quorum check
2 parents 279c13e + 01bff02 commit 33914d5

File tree

5 files changed

+81
-62
lines changed

5 files changed

+81
-62
lines changed

bin/kubectl-rabbitmq-plugin/commands_test.go

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,36 @@ func TestMain(m *testing.M) {
6666
}
6767

6868
// Check if cluster operator is ready before running tests
69-
if err := checkClusterOperatorReady(); err != nil {
70-
os.Remove(pluginBinaryPath)
71-
log.Fatalf("Failed to run integration tests: %v", err)
69+
// Retry for up to 2 minutes to handle CI startup delays
70+
log.Println("Waiting for cluster operator to be ready...")
71+
72+
checkReady := func() bool {
73+
if err := checkClusterOperatorReady(); err != nil {
74+
log.Printf("Cluster operator not ready yet: %v", err)
75+
return false
76+
}
77+
return true
78+
}
79+
80+
// TestMain doesn't have access to *testing.T; therefore, we can't use require.Eventually from testify 😞
81+
timeout := time.After(2 * time.Minute)
82+
ticker := time.NewTicker(2 * time.Second)
83+
defer ticker.Stop()
84+
85+
for {
86+
select {
87+
case <-timeout:
88+
os.Remove(pluginBinaryPath)
89+
log.Fatal("Timed out waiting for cluster operator to be ready after 2 minutes")
90+
case <-ticker.C:
91+
if checkReady() {
92+
log.Println("Cluster operator is ready!")
93+
goto ready
94+
}
95+
}
7296
}
7397

98+
ready:
7499
code := m.Run()
75100

76101
os.Remove(pluginBinaryPath)

controllers/reconcile_status.go

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,11 @@ func (r *RabbitmqClusterReconciler) getPodEndpoints(ctx context.Context, rmq *ra
103103
}
104104

105105
// checkNodeQuorumStatus checks quorum status for a specific node/pod
106-
func (r *RabbitmqClusterReconciler) checkNodeQuorumStatus(ctx context.Context, rmq *rabbitmqv1beta1.RabbitmqCluster, podIP string, podName string) nodeQuorumCheck {
106+
func (r *RabbitmqClusterReconciler) checkNodeQuorumStatus(ctx context.Context, rmq *rabbitmqv1beta1.RabbitmqCluster, podName string) nodeQuorumCheck {
107107
logger := ctrl.LoggerFrom(ctx)
108108

109109
// Get client for this specific pod
110-
rabbitClient, err := rabbitmqclient.GetRabbitmqClientForPod(ctx, r.APIReader, rmq, podIP)
110+
rabbitClient, err := rabbitmqclient.GetRabbitmqClientForPod(ctx, r.APIReader, rmq, podName)
111111
if err != nil {
112112
logger.V(1).Info("Failed to get client for pod", "pod", podName, "error", err)
113113
return nodeQuorumCheck{
@@ -173,25 +173,12 @@ func (r *RabbitmqClusterReconciler) checkQuorumStatus(ctx context.Context, rmq *
173173
podName = endpoint.TargetRef.Name
174174
}
175175

176-
// Get pod IP
177-
if len(endpoint.Addresses) == 0 {
178-
logger.V(1).Info("Endpoint has no addresses", "podName", podName)
179-
resultsChan <- nodeQuorumCheck{
180-
podName: podName,
181-
status: "unavailable",
182-
err: fmt.Errorf("no addresses for endpoint"),
183-
}
184-
continue
185-
}
186-
187-
podIP := endpoint.Addresses[0]
188-
189176
wg.Add(1)
190-
go func(ip, name string) {
177+
go func(name string) {
191178
defer wg.Done()
192-
result := r.checkNodeQuorumStatus(ctx, rmq, ip, name)
179+
result := r.checkNodeQuorumStatus(ctx, rmq, name)
193180
resultsChan <- result
194-
}(podIP, podName)
181+
}(podName)
195182
}
196183

197184
// Wait for all checks to complete

docs/examples/tls/certificate.yaml

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,14 @@ metadata:
1313
namespace: examples
1414
spec:
1515
dnsNames:
16-
- "*.tls-nodes.examples.svc.cluster.local"
17-
- "*.tls.examples.svc.cluster.local"
18-
- "*.tls.examples.svc"
19-
- "*.examples.pod"
20-
- "*.examples.pod.cluster.local"
16+
- "tls-server-0.tls-nodes.examples.svc.cluster.local"
17+
- "tls-server-1.tls-nodes.examples.svc.cluster.local"
18+
- "tls-server-2.tls-nodes.examples.svc.cluster.local"
19+
- "tls-server-0.tls-nodes.examples.svc"
20+
- "tls-server-1.tls-nodes.examples.svc"
21+
- "tls-server-2.tls-nodes.examples.svc"
22+
- "tls.examples.svc.cluster.local"
23+
- "tls.examples.svc"
2124
secretName: tls-secret
2225
issuerRef:
2326
kind: Issuer

internal/rabbitmqclient/client.go

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ import (
1616
"crypto/x509"
1717
"fmt"
1818
"net/http"
19-
"strings"
2019

2120
rabbithole "github.com/michaelklishin/rabbit-hole/v2"
2221
rabbitmqv1beta1 "github.com/rabbitmq/cluster-operator/v2/api/v1beta1"
@@ -33,10 +32,10 @@ type ClientInfo struct {
3332
Transport *http.Transport
3433
}
3534

36-
// GetRabbitmqClientForPod creates a rabbithole client for a specific pod using its IP address.
37-
// It fetches credentials from the default user secret and connects directly to the pod IP.
38-
func GetRabbitmqClientForPod(ctx context.Context, k8sClient client.Reader, rmq *rabbitmqv1beta1.RabbitmqCluster, podIP string) (*rabbithole.Client, error) {
39-
info, err := GetClientInfoForPod(ctx, k8sClient, rmq, podIP)
35+
// GetRabbitmqClientForPod creates a rabbithole client for a specific pod using its stable DNS name.
36+
// It fetches credentials from the default user secret and connects directly to the pod via its stable DNS.
37+
func GetRabbitmqClientForPod(ctx context.Context, k8sClient client.Reader, rmq *rabbitmqv1beta1.RabbitmqCluster, podName string) (*rabbithole.Client, error) {
38+
info, err := GetClientInfoForPod(ctx, k8sClient, rmq, podName)
4039
if err != nil {
4140
return nil, err
4241
}
@@ -57,9 +56,9 @@ func GetRabbitmqClientForPod(ctx context.Context, k8sClient client.Reader, rmq *
5756
return rabbitmqClient, nil
5857
}
5958

60-
// GetClientInfoForPod creates ClientInfo for a specific pod IP.
59+
// GetClientInfoForPod creates ClientInfo for a specific pod using its stable DNS name.
6160
// This is useful for checking individual pods instead of going through the service.
62-
func GetClientInfoForPod(ctx context.Context, k8sClient client.Reader, rmq *rabbitmqv1beta1.RabbitmqCluster, podIP string) (*ClientInfo, error) {
61+
func GetClientInfoForPod(ctx context.Context, k8sClient client.Reader, rmq *rabbitmqv1beta1.RabbitmqCluster, podName string) (*ClientInfo, error) {
6362
// Fetch the default user secret
6463
secretName := rmq.ChildResourceName("default-user")
6564
secret := &corev1.Secret{}
@@ -82,13 +81,17 @@ func GetClientInfoForPod(ctx context.Context, k8sClient client.Reader, rmq *rabb
8281
return nil, fmt.Errorf("password not found in secret %s", secretName)
8382
}
8483

85-
// Build management API URL using pod IP
84+
// Build management API URL using pod stable DNS name
8685
var port int
8786
var scheme string
8887

8988
// Create HTTP transport
9089
var transport *http.Transport
9190

91+
// Construct the headless service name and pod FQDN
92+
headlessServiceName := rmq.ChildResourceName("nodes")
93+
podFQDN := fmt.Sprintf("%s.%s.%s.svc", podName, headlessServiceName, rmq.Namespace)
94+
9295
// Use TLS only if there's no other alternative
9396
if rmq.Spec.TLS.DisableNonTLSListeners {
9497
port = 15671
@@ -123,9 +126,9 @@ func GetClientInfoForPod(ctx context.Context, k8sClient client.Reader, rmq *rabb
123126
tlsConfig.RootCAs = certPool
124127
}
125128

126-
// https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pods
127-
podIpDnsName := strings.ReplaceAll(podIP, ".", "-")
128-
tlsConfig.ServerName = fmt.Sprintf("%s.%s.pod", podIpDnsName, rmq.Namespace)
129+
// Use the pod's stable DNS name for TLS ServerName
130+
// This matches the SAN entries users typically configure in their certificates
131+
tlsConfig.ServerName = podFQDN
129132

130133
transport = &http.Transport{
131134
TLSClientConfig: tlsConfig,
@@ -135,8 +138,8 @@ func GetClientInfoForPod(ctx context.Context, k8sClient client.Reader, rmq *rabb
135138
scheme = "http"
136139
}
137140

138-
// Use pod IP directly instead of service name
139-
baseURL := fmt.Sprintf("%s://%s:%d", scheme, podIP, port)
141+
// Use pod stable DNS name
142+
baseURL := fmt.Sprintf("%s://%s:%d", scheme, podFQDN, port)
140143

141144
return &ClientInfo{
142145
BaseURL: baseURL,

internal/rabbitmqclient/client_test.go

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -71,43 +71,44 @@ var _ = Describe("RabbitMQ Client", func() {
7171
})
7272

7373
It("returns client info with correct credentials", func() {
74-
podIP := "10.0.0.1"
75-
info, err := GetClientInfoForPod(ctx, k8sClient, rmq, podIP)
74+
podName := "test-cluster-server-0"
75+
info, err := GetClientInfoForPod(ctx, k8sClient, rmq, podName)
7676
Expect(err).NotTo(HaveOccurred())
7777
Expect(info).NotTo(BeNil())
7878
Expect(info.Username).To(Equal("test-user"))
7979
Expect(info.Password).To(Equal("test-password"))
8080
})
8181

82-
It("returns the correct base URL for non-TLS with pod IP", func() {
83-
podIP := "10.0.0.1"
84-
info, err := GetClientInfoForPod(ctx, k8sClient, rmq, podIP)
82+
It("returns the correct base URL for non-TLS with pod DNS name", func() {
83+
podName := "test-cluster-server-0"
84+
info, err := GetClientInfoForPod(ctx, k8sClient, rmq, podName)
8585
Expect(err).NotTo(HaveOccurred())
86-
Expect(info.BaseURL).To(Equal("http://10.0.0.1:15672"))
86+
Expect(info.BaseURL).To(Equal("http://test-cluster-server-0.test-cluster-nodes.test-namespace.svc:15672"))
8787
})
8888

89-
It("returns the correct base URL for TLS with pod IP", func() {
89+
It("returns the correct base URL for TLS with pod DNS name", func() {
9090
rmq.Spec.TLS.SecretName = "tls-secret"
9191
rmq.Spec.TLS.DisableNonTLSListeners = true
92-
podIP := "10.0.0.2"
93-
info, err := GetClientInfoForPod(ctx, k8sClient, rmq, podIP)
92+
podName := "test-cluster-server-1"
93+
info, err := GetClientInfoForPod(ctx, k8sClient, rmq, podName)
9494
Expect(err).NotTo(HaveOccurred())
95-
Expect(info.BaseURL).To(Equal("https://10.0.0.2:15671"))
95+
Expect(info.BaseURL).To(Equal("https://test-cluster-server-1.test-cluster-nodes.test-namespace.svc:15671"))
9696
})
9797

98-
It("returns an HTTP transport for TLS", func() {
98+
It("returns an HTTP transport for TLS with correct ServerName", func() {
9999
rmq.Spec.TLS.SecretName = "tls-secret"
100100
rmq.Spec.TLS.DisableNonTLSListeners = true
101-
podIP := "10.0.0.1"
102-
info, err := GetClientInfoForPod(ctx, k8sClient, rmq, podIP)
101+
podName := "test-cluster-server-0"
102+
info, err := GetClientInfoForPod(ctx, k8sClient, rmq, podName)
103103
Expect(err).NotTo(HaveOccurred())
104104
Expect(info.Transport).NotTo(BeNil())
105105
Expect(info.Transport.TLSClientConfig).NotTo(BeNil())
106+
Expect(info.Transport.TLSClientConfig.ServerName).To(Equal("test-cluster-server-0.test-cluster-nodes.test-namespace.svc"))
106107
})
107108

108109
It("returns nil transport for non-TLS", func() {
109-
podIP := "10.0.0.1"
110-
info, err := GetClientInfoForPod(ctx, k8sClient, rmq, podIP)
110+
podName := "test-cluster-server-0"
111+
info, err := GetClientInfoForPod(ctx, k8sClient, rmq, podName)
111112
Expect(err).NotTo(HaveOccurred())
112113
Expect(info.Transport).To(BeNil())
113114
})
@@ -122,8 +123,8 @@ var _ = Describe("RabbitMQ Client", func() {
122123
})
123124

124125
It("returns an error", func() {
125-
podIP := "10.0.0.1"
126-
_, err := GetClientInfoForPod(ctx, k8sClient, rmq, podIP)
126+
podName := "test-cluster-server-0"
127+
_, err := GetClientInfoForPod(ctx, k8sClient, rmq, podName)
127128
Expect(err).To(HaveOccurred())
128129
Expect(err.Error()).To(ContainSubstring("failed to get default user secret"))
129130
})
@@ -141,8 +142,8 @@ var _ = Describe("RabbitMQ Client", func() {
141142

142143
Context("when TLS is disabled", func() {
143144
It("returns a non-TLS client", func() {
144-
podIP := "10.0.0.1"
145-
client, err := GetRabbitmqClientForPod(ctx, k8sClient, rmq, podIP)
145+
podName := "test-cluster-server-0"
146+
client, err := GetRabbitmqClientForPod(ctx, k8sClient, rmq, podName)
146147
Expect(err).NotTo(HaveOccurred())
147148
Expect(client).NotTo(BeNil())
148149
})
@@ -152,8 +153,8 @@ var _ = Describe("RabbitMQ Client", func() {
152153
It("returns a TLS client", func() {
153154
rmq.Spec.TLS.SecretName = "tls-secret"
154155
rmq.Spec.TLS.DisableNonTLSListeners = true
155-
podIP := "10.0.0.1"
156-
client, err := GetRabbitmqClientForPod(ctx, k8sClient, rmq, podIP)
156+
podName := "test-cluster-server-0"
157+
client, err := GetRabbitmqClientForPod(ctx, k8sClient, rmq, podName)
157158
Expect(err).NotTo(HaveOccurred())
158159
Expect(client).NotTo(BeNil())
159160
})
@@ -168,8 +169,8 @@ var _ = Describe("RabbitMQ Client", func() {
168169
})
169170

170171
It("returns an error", func() {
171-
podIP := "10.0.0.1"
172-
_, err := GetRabbitmqClientForPod(ctx, k8sClient, rmq, podIP)
172+
podName := "test-cluster-server-0"
173+
_, err := GetRabbitmqClientForPod(ctx, k8sClient, rmq, podName)
173174
Expect(err).To(HaveOccurred())
174175
Expect(err.Error()).To(ContainSubstring("failed to get default user secret"))
175176
})

0 commit comments

Comments
 (0)