13
13
# See the License for the specific language governing permissions and
14
14
# limitations under the License.
15
15
16
-
17
16
set -o errexit
18
17
set -o nounset
19
18
set -o pipefail
@@ -26,17 +25,21 @@ set -o pipefail
26
25
# Example 1: hack/run-e2e.sh (run e2e with default config)
27
26
# Example 2: export KARMADA_APISERVER_KUBECONFIG=<KUBECONFIG PATH> hack/run-e2e.sh (run e2e with your KUBECONFIG)
28
27
28
+ # Cluster name definitions
29
+ MEMBER_CLUSTER_1_NAME=${MEMBER_CLUSTER_1_NAME:- " member1" }
30
+ MEMBER_CLUSTER_2_NAME=${MEMBER_CLUSTER_2_NAME:- " member2" }
31
+ PULL_MODE_CLUSTER_NAME=${PULL_MODE_CLUSTER_NAME:- " member3" }
32
+
29
33
KUBECONFIG_PATH=${KUBECONFIG_PATH:- " ${HOME} /.kube" }
30
34
KARMADA_APISERVER_KUBECONFIG=${KARMADA_APISERVER_KUBECONFIG:- " $KUBECONFIG_PATH /karmada.config" }
31
- PULL_BASED_CLUSTERS=${PULL_BASED_CLUSTERS:- " member3 :$KUBECONFIG_PATH /members.config" }
35
+ PULL_BASED_CLUSTERS=${PULL_BASED_CLUSTERS:- " ${PULL_MODE_CLUSTER_NAME} :$KUBECONFIG_PATH /members.config" }
32
36
33
37
# KARMADA_RUNNING_ON_KIND indicates if current testing against on karmada that installed on a kind cluster.
34
38
# Defaults to true.
35
39
# For kind cluster, the kind related logs will be collected after the testing.
36
40
KARMADA_RUNNING_ON_KIND=${KARMADA_RUNNING_ON_KIND:- true}
37
41
38
42
KARMADA_HOST_CLUSTER_NAME=${KARMADA_HOST_CLUSTER_NAME:- " karmada-host" }
39
- KARMADA_PULL_CLUSTER_NAME=${KARMADA_PULL_CLUSTER_NAME:- " member3" }
40
43
41
44
ARTIFACTS_PATH=${ARTIFACTS_PATH:- " ${HOME} /karmada-e2e-logs" }
42
45
mkdir -p " $ARTIFACTS_PATH "
@@ -65,9 +68,9 @@ if [ "$KARMADA_RUNNING_ON_KIND" = true ]; then
65
68
mkdir -p " $ARTIFACTS_PATH /$KARMADA_HOST_CLUSTER_NAME "
66
69
kind export logs --name=" $KARMADA_HOST_CLUSTER_NAME " " $ARTIFACTS_PATH /$KARMADA_HOST_CLUSTER_NAME "
67
70
68
- echo " Collecting $KARMADA_PULL_CLUSTER_NAME logs..."
69
- mkdir -p " $ARTIFACTS_PATH /KARMADA_PULL_CLUSTER_NAME "
70
- kind export logs --name=" $KARMADA_PULL_CLUSTER_NAME " " $ARTIFACTS_PATH /$KARMADA_PULL_CLUSTER_NAME "
71
+ echo " Collecting $PULL_MODE_CLUSTER_NAME logs..."
72
+ mkdir -p " $ARTIFACTS_PATH /$PULL_MODE_CLUSTER_NAME "
73
+ kind export logs --name=" $PULL_MODE_CLUSTER_NAME " " $ARTIFACTS_PATH /$PULL_MODE_CLUSTER_NAME "
71
74
fi
72
75
73
76
echo " Collected logs at $ARTIFACTS_PATH :"
@@ -76,4 +79,78 @@ ls -al "$ARTIFACTS_PATH"
76
79
# Post run e2e for delete extra components
77
80
" ${REPO_ROOT} " /hack/post-run-e2e.sh
78
81
82
+ # If E2E test failed, exit directly with the test result
83
+ if [ $TESTING_RESULT -ne 0 ]; then
84
+ echo " E2E test failed with exit code $TESTING_RESULT , skipping component restart check."
85
+ exit $TESTING_RESULT
86
+ fi
87
+
88
+ # Check if Karmada components have restarted, if any has, it means that OOM or panic has occurred
89
+ # due to memory modification, and needs to be investigated.
90
+ echo " E2E run successfully."
91
+ echo " Checking if Karmada components have restarted..."
92
+
93
+ # Function to check pod restart count for a given component
94
+ check_component_restart () {
95
+ local component_label=$1
96
+ local component_name=$2
97
+
98
+ echo " Checking ${component_name} pods..."
99
+
100
+ # Get pod information in a single call, including both name and restart count
101
+ # Use a template that handles missing containerStatuses gracefully
102
+ local pod_info
103
+ pod_info=$( kubectl --context=" ${KARMADA_HOST_CLUSTER_NAME} " get pod -n karmada-system -l " ${component_label} " \
104
+ -o go-template=' {{range .items}}{{.metadata.name}}:{{if .status.containerStatuses}}{{(index .status.containerStatuses 0).restartCount}}{{else}}0{{end}}{{"\n"}}{{end}}' 2> /dev/null)
105
+
106
+ if [ -z " $pod_info " ]; then
107
+ echo " No pods found for ${component_name} , skipping..."
108
+ return 0
109
+ fi
110
+
111
+ # Process each pod's information
112
+ while IFS=: read -r pod_name restart_count; do
113
+ # Skip empty lines
114
+ [ -z " $pod_name " ] && continue
115
+
116
+ # Ensure restart_count is a number (default to 0 if empty or invalid)
117
+ if ! [[ " $restart_count " =~ ^[0-9]+$ ]]; then
118
+ echo " Warning: Unable to get restart count for pod $pod_name , assuming 0"
119
+ restart_count=0
120
+ fi
121
+
122
+ if [ " $restart_count " -gt 0 ]; then
123
+ echo " ERROR: ${component_name} pod $pod_name has restarted $restart_count times."
124
+ echo " This indicates OOM or panic occurred and needs to be investigated."
125
+ return 1 # Return failure to stop checking
126
+ else
127
+ echo " ${component_name} pod $pod_name : no restarts"
128
+ fi
129
+ done <<< " $pod_info"
130
+
131
+ return 0
132
+ }
133
+
134
+ # List of components to check (label=component_name)
135
+ components=(
136
+ " app=karmada-controller-manager:karmada-controller-manager"
137
+ " app=karmada-descheduler:karmada-descheduler"
138
+ " app=karmada-metrics-adapter:karmada-metrics-adapter"
139
+ " app=karmada-scheduler:karmada-scheduler"
140
+ " app=karmada-search:karmada-search"
141
+ " app=karmada-scheduler-estimator-${MEMBER_CLUSTER_1_NAME} :karmada-scheduler-estimator-${MEMBER_CLUSTER_1_NAME} "
142
+ " app=karmada-scheduler-estimator-${MEMBER_CLUSTER_2_NAME} :karmada-scheduler-estimator-${MEMBER_CLUSTER_2_NAME} "
143
+ " app=karmada-scheduler-estimator-${PULL_MODE_CLUSTER_NAME} :karmada-scheduler-estimator-${PULL_MODE_CLUSTER_NAME} "
144
+ )
145
+
146
+ # Check each component, stop at first failure
147
+ for component in " ${components[@]} " ; do
148
+ IFS=' :' read -r label name <<< " $component"
149
+ if ! check_component_restart " $label " " $name " ; then
150
+ echo " COMPONENT RESTART CHECK FAILED: Component $name has restarted, stopping further checks."
151
+ exit 1
152
+ fi
153
+ done
154
+
155
+ echo " All component restart checks passed."
79
156
exit $TESTING_RESULT
0 commit comments