Skip to content

Commit 18582c7

Browse files
committed
Merge branch 'master' into maciejk/ar-image-staging
# Conflicts: # scripts/release/tests/build_info_test.py
2 parents 92bbd95 + f0df6f4 commit 18582c7

File tree

55 files changed

+328
-82
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+328
-82
lines changed

.evergreen-functions.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@ functions:
336336
- command: shell.exec
337337
type: setup
338338
params:
339+
continue_on_err: true
339340
shell: bash
340341
working_dir: src/github.com/mongodb/mongodb-kubernetes
341342
script: |
@@ -420,6 +421,7 @@ functions:
420421
upload_e2e_logs:
421422
- command: s3.put
422423
params:
424+
continue_on_err: true
423425
aws_key: ${enterprise_aws_access_key_id}
424426
aws_secret: ${enterprise_aws_secret_access_key}
425427
local_files_include_filter:

.evergreen-periodic-builds.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,15 @@ variables:
1919
- func: switch_context
2020

2121
tasks:
22+
- name: periodic_teardown_aws
23+
commands:
24+
- func: cleanup_aws
25+
26+
- name: periodic_teardown_cloudqa
27+
commands:
28+
- func: teardown_cloud_qa_all
29+
30+
task_groups:
2231
- name: periodic_teardown_task_group
2332
<<: *setup_group
2433
tasks:

.evergreen.yml

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,9 @@ variables:
117117
- func: setup_cloud_qa
118118
teardown_task_can_fail_task: true
119119
teardown_task:
120+
- func: teardown_cloud_qa
120121
- func: upload_e2e_logs
121122
- func: teardown_kubernetes_environment
122-
- func: teardown_cloud_qa
123123

124124
- &setup_and_teardown_task
125125
setup_task_can_fail_task: true
@@ -156,6 +156,25 @@ variables:
156156
- name: build_agent_images_ubi
157157
variant: init_test_run
158158

159+
- &base_om7_dependency_with_race
160+
depends_on:
161+
- name: build_om_images
162+
variant: build_om70_images
163+
- name: build_operator_race_ubi
164+
variant: init_test_run
165+
- name: build_init_database_image_ubi
166+
variant: init_test_run
167+
- name: build_database_image_ubi
168+
variant: init_test_run
169+
- name: build_test_image
170+
variant: init_test_run
171+
- name: build_init_appdb_images_ubi
172+
variant: init_test_run
173+
- name: build_init_om_images_ubi
174+
variant: init_test_run
175+
- name: build_agent_images_ubi
176+
variant: init_test_run
177+
159178
- &base_om8_dependency
160179
depends_on:
161180
- name: build_om_images
@@ -439,18 +458,23 @@ tasks:
439458
- func: setup_building_host
440459
- func: pipeline
441460
vars:
442-
skip_tags: ubuntu,release
443-
distro: ubi
444461
image_name: operator
445462

463+
- name: build_operator_race_ubi
464+
commands:
465+
- func: clone
466+
- func: setup_building_host
467+
- func: pipeline
468+
vars:
469+
image_name: operator-race
470+
446471
- name: build_init_om_images_ubi
447472
commands:
448473
- func: clone
449474
- func: setup_building_host
450475
- func: pipeline
451476
vars:
452477
image_name: init-ops-manager
453-
skip_tags: ubuntu,release
454478

455479
- name: build_init_appdb_images_ubi
456480
commands:
@@ -459,7 +483,6 @@ tasks:
459483
- func: pipeline
460484
vars:
461485
image_name: init-appdb
462-
skip_tags: ubuntu,release
463486

464487
- name: build_agent_images_ubi
465488
commands:
@@ -468,7 +491,6 @@ tasks:
468491
- func: pipeline
469492
vars:
470493
image_name: agent
471-
skip_tags: ubuntu,release
472494

473495
- name: build_init_database_image_ubi
474496
commands:
@@ -477,7 +499,6 @@ tasks:
477499
- func: pipeline
478500
vars:
479501
image_name: init-database
480-
skip_tags: ubuntu,release
481502

482503
- name: build_database_image_ubi
483504
commands:
@@ -486,7 +507,6 @@ tasks:
486507
- func: pipeline
487508
vars:
488509
image_name: database
489-
skip_tags: ubuntu,release
490510

491511
- name: build_readiness_probe_image
492512
commands:
@@ -495,7 +515,6 @@ tasks:
495515
- func: pipeline
496516
vars:
497517
image_name: readiness-probe
498-
skip_tags: ubuntu,release
499518

500519
- name: build_upgrade_hook_image
501520
commands:
@@ -504,7 +523,6 @@ tasks:
504523
- func: pipeline
505524
vars:
506525
image_name: upgrade-hook
507-
skip_tags: ubuntu,release
508526

509527
- name: prepare_aws
510528
priority: 59
@@ -1434,7 +1452,7 @@ buildvariants:
14341452
tags: [ "e2e_test_suite" ]
14351453
run_on:
14361454
- ubuntu1804-xlarge
1437-
<<: *base_om7_dependency
1455+
<<: *base_om7_dependency_with_race
14381456
tasks:
14391457
- name: e2e_operator_race_with_telemetry_task_group
14401458

@@ -1653,6 +1671,7 @@ buildvariants:
16531671
- ubuntu2204-small
16541672
tasks:
16551673
- name: build_operator_ubi
1674+
- name: build_operator_race_ubi
16561675
- name: build_test_image
16571676
- name: build_mco_test_image
16581677
- name: build_init_appdb_images_ubi

build_info.json

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,22 @@
2323
]
2424
}
2525
},
26+
"operator-race": {
27+
"dockerfile-path": "docker/mongodb-kubernetes-operator/Dockerfile.atomic",
28+
"patch": {
29+
"repository": "268558157000.dkr.ecr.us-east-1.amazonaws.com/dev/mongodb-kubernetes",
30+
"platforms": [
31+
"linux/amd64"
32+
]
33+
},
34+
"staging": {
35+
"sign": true,
36+
"repository": "268558157000.dkr.ecr.us-east-1.amazonaws.com/staging/mongodb-kubernetes",
37+
"platforms": [
38+
"linux/amd64"
39+
]
40+
}
41+
},
2642
"init-database": {
2743
"dockerfile-path": "docker/mongodb-kubernetes-init-database/Dockerfile.atomic",
2844
"patch": {
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
title: Fixing auth transition edge-cases
3+
kind: fix
4+
date: 2025-08-08
5+
---
6+
7+
* Fixed an issue where the readiness probe reported the node as ready even when its authentication mechanism was not in sync with the other nodes, potentially causing premature restarts.

controllers/om/automation_status.go

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import (
1515
"github.com/mongodb/mongodb-kubernetes/pkg/util/stringutil"
1616
)
1717

18-
const automationAgentKubeUpgradePlan = "ChangeVersionKube"
18+
const automationAgentKubeUpgradeMove = "ChangeVersionKube"
1919

2020
// AutomationStatus represents the status of automation agents registered with Ops Manager
2121
type AutomationStatus struct {
@@ -85,12 +85,25 @@ func checkAutomationStatusIsGoal(as *AutomationStatus, relevantProcesses []strin
8585

8686
goalsNotAchievedMap := map[string]int{}
8787
goalsAchievedMap := map[string]int{}
88+
authTransitionsInProgress := map[string]string{}
89+
8890
for _, p := range as.Processes {
8991
if !stringutil.Contains(relevantProcesses, p.Name) {
9092
continue
9193
}
9294
if p.LastGoalVersionAchieved == as.GoalVersion {
9395
goalsAchievedMap[p.Name] = p.LastGoalVersionAchieved
96+
97+
// Check if authentication transitions are in the current plan.
98+
// If a process has reached goal version but still has auth-related moves in plan,
99+
// it means authentication transition is likely in progress.
100+
// The plan contains non-completed move names from the API.
101+
for _, move := range p.Plan {
102+
if isAuthenticationTransitionMove(move) {
103+
authTransitionsInProgress[p.Name] = move
104+
break
105+
}
106+
}
94107
} else {
95108
goalsNotAchievedMap[p.Name] = p.LastGoalVersionAchieved
96109
}
@@ -103,6 +116,18 @@ func checkAutomationStatusIsGoal(as *AutomationStatus, relevantProcesses []strin
103116
goalsAchievedMsgList := slices.Collect(maps.Keys(goalsAchievedMap))
104117
sort.Strings(goalsAchievedMsgList)
105118

119+
// Check if any authentication transitions are in progress
120+
if len(authTransitionsInProgress) > 0 {
121+
var authTransitionMsgList []string
122+
for processName, step := range authTransitionsInProgress {
123+
authTransitionMsgList = append(authTransitionMsgList, fmt.Sprintf("%s:%s", processName, step))
124+
}
125+
log.Infow("Authentication transitions still in progress, waiting for completion",
126+
"processes", authTransitionMsgList)
127+
return false, fmt.Sprintf("authentication transitions in progress for %d processes: %s",
128+
len(authTransitionsInProgress), authTransitionMsgList)
129+
}
130+
106131
if len(goalsNotAchievedMap) > 0 {
107132
return false, fmt.Sprintf("%d processes waiting to reach automation config goal state (version=%d): %s, %d processes reached goal state: %s",
108133
len(goalsNotAchievedMap), as.GoalVersion, goalsNotAchievedMsgList, len(goalsAchievedMsgList), goalsAchievedMsgList)
@@ -113,17 +138,29 @@ func checkAutomationStatusIsGoal(as *AutomationStatus, relevantProcesses []strin
113138
}
114139
}
115140

141+
// isAuthenticationTransitionMove returns true if the given move is related to authentication transitions
142+
func isAuthenticationTransitionMove(move string) bool {
143+
authMoves := map[string]struct{}{
144+
"UpdateAuth": {},
145+
"WaitAuthUpdate": {},
146+
}
147+
148+
_, ok := authMoves[move]
149+
150+
return ok
151+
}
152+
116153
func areAnyAgentsInKubeUpgradeMode(as *AutomationStatus, relevantProcesses []string, log *zap.SugaredLogger) bool {
117154
for _, p := range as.Processes {
118155
if !stringutil.Contains(relevantProcesses, p.Name) {
119156
continue
120157
}
121-
for _, plan := range p.Plan {
158+
for _, move := range p.Plan {
122159
// This means the following:
123160
// - the cluster is in static architecture
124161
// - the agents are in a dedicated upgrade process, waiting for their binaries to be replaced by kubernetes
125162
// - this can only happen if the statefulset is ready, therefore we are returning ready here
126-
if plan == automationAgentKubeUpgradePlan {
163+
if move == automationAgentKubeUpgradeMove {
127164
log.Debug("cluster is in changeVersionKube mode, returning the agent is ready.")
128165
return true
129166
}

0 commit comments

Comments
 (0)