Skip to content

Commit 53552b9

Browse files
Julien-Benfealebenpae
authored andcommitted
CLOUDP-200130 Do not assume same order for members in Kubeconfig (#4110)
# Summary Fixes a bug where we incorrectly assumed consistent ordering between users and tokens in Kubeconfig. [CLOUDP-200130](https://jira.mongodb.org/browse/CLOUDP-200130) ## Changes - Fix: do not use `token := kubeConfig.Users[n].User.Token` to retrieve user token. This is the change actually addressing the bug in the ticket. - Refactoring of `WatchMemberClusterHealth` to initialize the cache in a separate function. - Unit testing logic for extracting credentials from config. ## Proof of Work New unit tests. ## Checklist - [x] Have you linked a jira ticket and/or is the ticket in the title? - [x] Have you checked whether your jira ticket required DOCSP changes? - [x] Have you checked for release_note changes?
1 parent 7021ed4 commit 53552b9

File tree

3 files changed

+258
-45
lines changed

3 files changed

+258
-45
lines changed

RELEASE_NOTES.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44

55
## Bug Fixes
66
* Fixes the bug when status of `MongoDBUser` was being set to `Updated` prematurely. For example, new users were not immediately usable following `MongoDBUser` creation despite the operator reporting `Updated` state.
7-
8-
<!-- Past releases -->
7+
* Fixed a bug causing cluster health check issues when ordering of users and tokens differed in Kubeconfig.
98

109
# MongoDB Enterprise Kubernetes Operator 1.31.0
1110

pkg/multicluster/memberwatch/memberwatch.go

Lines changed: 64 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"encoding/base64"
66
"encoding/json"
7+
"fmt"
78
"math"
89
"time"
910

@@ -26,55 +27,75 @@ type MemberClusterHealthChecker struct {
2627
Cache map[string]*MemberHeathCheck
2728
}
2829

29-
// WatchMemberClusterHealth watches member clusters healthcheck. If a cluster fails healthcheck it re-enqueues the
30-
// MongoDBMultiCluster resources. It is spun up in the mongodb multi reconciler as a go-routine, and is executed every 10 seconds.
31-
func (m *MemberClusterHealthChecker) WatchMemberClusterHealth(ctx context.Context, log *zap.SugaredLogger, watchChannel chan event.GenericEvent, centralClient kubernetesClient.Client, clustersMap map[string]cluster.Cluster) {
32-
// check if the local cache is populated if not let's do that
33-
if len(m.Cache) == 0 {
34-
// load the kubeconfig file contents from disk
35-
kubeConfigFile, err := multicluster.NewKubeConfigFile(multicluster.GetKubeConfigPath())
36-
if err != nil {
37-
log.Errorf("Failed to read KubeConfig file err: %s", err)
38-
// we can't populate the client so just bail out here
39-
return
40-
}
30+
type ClusterCredentials struct {
31+
Server string
32+
CertificateAuthority []byte
33+
Token string
34+
}
4135

42-
kubeConfig, err := kubeConfigFile.LoadKubeConfigFile()
43-
if err != nil {
44-
log.Errorf("Failed to load the kubeconfig file content err: %s", err)
45-
return
46-
}
36+
func getClusterCredentials(clustersMap map[string]cluster.Cluster,
37+
kubeConfig multicluster.KubeConfigFile,
38+
kubeContext multicluster.KubeConfigContextItem,
39+
) (*ClusterCredentials, error) {
40+
clusterName := kubeContext.Context.Cluster
41+
if _, ok := clustersMap[clusterName]; !ok {
42+
return nil, fmt.Errorf("cluster %s not found in clustersMap", clusterName)
43+
}
4744

48-
for n := range kubeConfig.Contexts {
49-
context := kubeConfig.Contexts[n]
50-
clusterName := context.Context.Cluster
51-
if _, ok := clustersMap[clusterName]; !ok {
52-
continue
53-
}
45+
kubeCluster := getClusterFromContext(clusterName, kubeConfig.Clusters)
46+
if kubeCluster == nil {
47+
return nil, fmt.Errorf("failed to get cluster with clustername: %s, doesn't exists in Kubeconfig clusters", clusterName)
48+
}
5449

55-
// fetch the cluster from the "clusters" with the given clusterName from the context
56-
cluster := getClusterFromContext(clusterName, kubeConfig.Clusters)
57-
if cluster == nil {
58-
log.Errorf("Failed to get cluster with clustername: %s, doesn't exists in Kubeconfig clusters", clusterName)
59-
continue
60-
}
50+
certificateAuthority, err := base64.StdEncoding.DecodeString(kubeCluster.CertificateAuthority)
51+
if err != nil {
52+
return nil, fmt.Errorf("failed to decode certificate for cluster: %s, err: %s", clusterName, err)
53+
}
6154

62-
server := cluster.Server
63-
certificateAuthority, err := base64.StdEncoding.DecodeString(cluster.CertificateAuthority)
64-
if err != nil {
65-
log.Errorf("Failed to decode certificate for cluster: %s, err: %s", clusterName, err)
66-
continue
67-
}
55+
user := getUserFromContext(kubeContext.Context.User, kubeConfig.Users)
56+
if user == nil {
57+
return nil, fmt.Errorf("failed to get user with name: %s, doesn't exists in Kubeconfig users", kubeContext.Context.User)
58+
}
6859

69-
// fetch the user from the "users" against the given user from the context
70-
user := getUserFromContext(context.Context.User, kubeConfig.Users)
71-
if user == nil {
72-
log.Errorf("Failed to get user with clustername: %s, doesn't exists in Kubeconfig users", clusterName)
73-
continue
74-
}
75-
token := kubeConfig.Users[n].User.Token
76-
m.Cache[clusterName] = NewMemberHealthCheck(server, certificateAuthority, token, log)
60+
return &ClusterCredentials{
61+
Server: kubeCluster.Server,
62+
CertificateAuthority: certificateAuthority,
63+
Token: user.Token,
64+
}, nil
65+
}
66+
67+
func (m *MemberClusterHealthChecker) populateCache(clustersMap map[string]cluster.Cluster, log *zap.SugaredLogger) {
68+
kubeConfigFile, err := multicluster.NewKubeConfigFile(multicluster.GetKubeConfigPath())
69+
if err != nil {
70+
log.Errorf("Failed to read KubeConfig file err: %s", err)
71+
// we can't populate the client so just bail out here
72+
return
73+
}
74+
75+
kubeConfig, err := kubeConfigFile.LoadKubeConfigFile()
76+
if err != nil {
77+
log.Errorf("Failed to load the kubeconfig file content err: %s", err)
78+
return
79+
}
80+
81+
for n := range kubeConfig.Contexts {
82+
kubeContext := kubeConfig.Contexts[n]
83+
clusterName := kubeContext.Context.Cluster
84+
credentials, err := getClusterCredentials(clustersMap, kubeConfig, kubeContext)
85+
if err != nil {
86+
log.Errorf("Skipping cluster %s: %v", clusterName, err)
87+
continue
7788
}
89+
m.Cache[clusterName] = NewMemberHealthCheck(credentials.Server, credentials.CertificateAuthority, credentials.Token, log)
90+
}
91+
}
92+
93+
// WatchMemberClusterHealth watches member clusters healthcheck. If a cluster fails healthcheck it re-enqueues the
94+
// MongoDBMultiCluster resources. It is spun up in the mongodb multi reconciler as a go-routine, and is executed every 10 seconds.
95+
func (m *MemberClusterHealthChecker) WatchMemberClusterHealth(ctx context.Context, log *zap.SugaredLogger, watchChannel chan event.GenericEvent, centralClient kubernetesClient.Client, clustersMap map[string]cluster.Cluster) {
96+
// check if the local cache is populated if not let's do that
97+
if len(m.Cache) == 0 {
98+
m.populateCache(clustersMap, log)
7899
}
79100

80101
for {

pkg/multicluster/memberwatch/memberwatch_test.go

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
package memberwatch
22

33
import (
4+
"encoding/base64"
45
"encoding/json"
56
"testing"
67

78
"github.com/stretchr/testify/assert"
9+
"github.com/stretchr/testify/require"
10+
"sigs.k8s.io/controller-runtime/pkg/cluster"
811

912
"github.com/10gen/ops-manager-kubernetes/api/v1/mdb"
13+
mc "github.com/10gen/ops-manager-kubernetes/pkg/multicluster"
1014
"github.com/10gen/ops-manager-kubernetes/pkg/multicluster/failedcluster"
1115
)
1216

@@ -147,3 +151,192 @@ func TestShouldAddFailedClusterAnnotation(t *testing.T) {
147151
assert.Equal(t, shouldAddFailedClusterAnnotation(tt.annotations, tt.clusterName), tt.out)
148152
}
149153
}
154+
155+
func TestGetClusterCredentials(t *testing.T) {
156+
validCertContent := "valid-cert"
157+
validCert := base64.StdEncoding.EncodeToString([]byte(validCertContent))
158+
invalidCert := "invalid-base64!!!"
159+
clusterName := "cluster1"
160+
userToken := "abc123"
161+
mockUserItemList := []mc.KubeConfigUserItem{
162+
{Name: "user1", User: mc.KubeConfigUser{Token: userToken}},
163+
}
164+
mockKubeContext := mc.KubeConfigContextItem{
165+
Name: "context1",
166+
Context: mc.KubeConfigContext{
167+
Cluster: clusterName,
168+
User: "user1",
169+
},
170+
}
171+
kubeconfigServerURL := "https://example.com"
172+
mockKubeConfig := mc.KubeConfigFile{
173+
Clusters: []mc.KubeConfigClusterItem{
174+
{
175+
Name: clusterName,
176+
Cluster: mc.KubeConfigCluster{
177+
Server: kubeconfigServerURL,
178+
CertificateAuthority: validCert,
179+
},
180+
},
181+
},
182+
Users: mockUserItemList,
183+
}
184+
185+
tests := []struct {
186+
name string
187+
clustersMap map[string]cluster.Cluster // Using as a set; the value is not used.
188+
kubeConfig mc.KubeConfigFile
189+
kubeContext mc.KubeConfigContextItem
190+
wantErr bool
191+
errContains string
192+
expectedServer string
193+
expectedToken string
194+
expectedCA []byte
195+
}{
196+
{
197+
name: "Cluster not in clustersMap",
198+
clustersMap: map[string]cluster.Cluster{}, // Empty map; cluster1 is missing.
199+
kubeConfig: mockKubeConfig,
200+
kubeContext: mockKubeContext,
201+
wantErr: true,
202+
errContains: "cluster cluster1 not found in clustersMap",
203+
},
204+
{
205+
name: "Cluster missing in kubeConfig.Clusters",
206+
clustersMap: map[string]cluster.Cluster{
207+
clusterName: nil,
208+
},
209+
kubeConfig: mc.KubeConfigFile{
210+
Clusters: []mc.KubeConfigClusterItem{}, // No cluster defined.
211+
Users: mockUserItemList,
212+
},
213+
kubeContext: mockKubeContext,
214+
wantErr: true,
215+
errContains: "failed to get cluster with clustername: cluster1",
216+
},
217+
{
218+
name: "Invalid certificate authority",
219+
clustersMap: map[string]cluster.Cluster{
220+
clusterName: nil,
221+
},
222+
kubeConfig: mc.KubeConfigFile{
223+
Clusters: []mc.KubeConfigClusterItem{
224+
{
225+
Name: clusterName,
226+
Cluster: mc.KubeConfigCluster{
227+
Server: kubeconfigServerURL,
228+
CertificateAuthority: invalidCert, // The kubeConfig has an invalid CA
229+
},
230+
},
231+
},
232+
Users: mockUserItemList,
233+
},
234+
kubeContext: mockKubeContext,
235+
wantErr: true,
236+
errContains: "failed to decode certificate for cluster: cluster1",
237+
},
238+
{
239+
name: "User not found",
240+
clustersMap: map[string]cluster.Cluster{
241+
clusterName: nil,
242+
},
243+
kubeConfig: mc.KubeConfigFile{
244+
Clusters: []mc.KubeConfigClusterItem{
245+
{
246+
Name: clusterName,
247+
Cluster: mc.KubeConfigCluster{
248+
Server: kubeconfigServerURL,
249+
CertificateAuthority: validCert,
250+
},
251+
},
252+
},
253+
Users: []mc.KubeConfigUserItem{}, // No users defined.
254+
},
255+
kubeContext: mc.KubeConfigContextItem{
256+
Name: "context1",
257+
Context: mc.KubeConfigContext{
258+
Cluster: clusterName,
259+
User: "user1", // User is not present.
260+
},
261+
},
262+
wantErr: true,
263+
errContains: "failed to get user with name: user1",
264+
},
265+
{
266+
name: "Successful extraction",
267+
clustersMap: map[string]cluster.Cluster{
268+
clusterName: nil,
269+
},
270+
kubeConfig: mockKubeConfig,
271+
kubeContext: mockKubeContext,
272+
wantErr: false,
273+
expectedServer: kubeconfigServerURL,
274+
expectedToken: userToken,
275+
expectedCA: []byte(validCertContent),
276+
},
277+
}
278+
279+
for _, tc := range tests {
280+
t.Run(tc.name, func(t *testing.T) {
281+
creds, err := getClusterCredentials(tc.clustersMap, tc.kubeConfig, tc.kubeContext)
282+
if tc.wantErr {
283+
assert.ErrorContains(t, err, tc.errContains)
284+
} else {
285+
require.NoError(t, err)
286+
assert.Equal(t, tc.expectedServer, creds.Server)
287+
assert.Equal(t, tc.expectedToken, creds.Token)
288+
assert.Equal(t, tc.expectedCA, creds.CertificateAuthority)
289+
}
290+
})
291+
}
292+
}
293+
294+
func TestGetUserFromContext(t *testing.T) {
295+
tests := []struct {
296+
name string
297+
userName string
298+
users []mc.KubeConfigUserItem
299+
expectedUser *mc.KubeConfigUser
300+
}{
301+
{
302+
name: "User exists",
303+
userName: "alice",
304+
users: []mc.KubeConfigUserItem{
305+
{Name: "alice", User: mc.KubeConfigUser{Token: "alice-token"}},
306+
{Name: "bob", User: mc.KubeConfigUser{Token: "bob-token"}},
307+
},
308+
expectedUser: &mc.KubeConfigUser{Token: "alice-token"},
309+
},
310+
{
311+
name: "User does not exist",
312+
userName: "charlie",
313+
users: []mc.KubeConfigUserItem{
314+
{Name: "alice", User: mc.KubeConfigUser{Token: "alice-token"}},
315+
{Name: "bob", User: mc.KubeConfigUser{Token: "bob-token"}},
316+
},
317+
expectedUser: nil,
318+
},
319+
{
320+
name: "Empty users slice",
321+
userName: "alice",
322+
users: []mc.KubeConfigUserItem{},
323+
expectedUser: nil,
324+
},
325+
{
326+
name: "Multiple users with same name, returns first match",
327+
userName: "duplicated",
328+
users: []mc.KubeConfigUserItem{
329+
{Name: "duplicated", User: mc.KubeConfigUser{Token: "first-token"}},
330+
{Name: "duplicated", User: mc.KubeConfigUser{Token: "second-token"}},
331+
},
332+
expectedUser: &mc.KubeConfigUser{Token: "first-token"},
333+
},
334+
}
335+
336+
for _, tc := range tests {
337+
t.Run(tc.name, func(t *testing.T) {
338+
user := getUserFromContext(tc.userName, tc.users)
339+
assert.Equal(t, tc.expectedUser, user)
340+
})
341+
}
342+
}

0 commit comments

Comments
 (0)