Skip to content

Commit 7148248

Browse files
authored
Merge pull request #6541 from XiShanYongYe-Chang/kube-cache-mutation-detector
enable mutation detection in e2e
2 parents 6013c0b + 69bf770 commit 7148248

11 files changed

+126
-23
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ jobs:
144144
- name: setup e2e test environment
145145
run: |
146146
export CLUSTER_VERSION=kindest/node:${{ matrix.k8s }}
147+
export KUBE_CACHE_MUTATION_DETECTOR=true
147148
hack/local-up-karmada.sh
148149
- name: run e2e
149150
run: |

artifacts/deploy/karmada-controller-manager.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ spec:
3131
valueFrom:
3232
fieldRef:
3333
fieldPath: status.podIP
34+
- name: KUBE_CACHE_MUTATION_DETECTOR
35+
value: "{{KUBE_CACHE_MUTATION_DETECTOR}}"
3436
command:
3537
- /bin/karmada-controller-manager
3638
- --kubeconfig=/etc/karmada/config/karmada.config

artifacts/deploy/karmada-descheduler.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ spec:
3131
valueFrom:
3232
fieldRef:
3333
fieldPath: status.podIP
34+
- name: KUBE_CACHE_MUTATION_DETECTOR
35+
value: "{{KUBE_CACHE_MUTATION_DETECTOR}}"
3436
command:
3537
- /bin/karmada-descheduler
3638
- --kubeconfig=/etc/karmada/config/karmada.config

artifacts/deploy/karmada-metrics-adapter.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ spec:
3232
valueFrom:
3333
fieldRef:
3434
fieldPath: status.podIP
35+
- name: KUBE_CACHE_MUTATION_DETECTOR
36+
value: "{{KUBE_CACHE_MUTATION_DETECTOR}}"
3537
command:
3638
- /bin/karmada-metrics-adapter
3739
- --kubeconfig=/etc/karmada/config/karmada.config

artifacts/deploy/karmada-scheduler-estimator.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ spec:
3131
valueFrom:
3232
fieldRef:
3333
fieldPath: status.podIP
34+
- name: KUBE_CACHE_MUTATION_DETECTOR
35+
value: "{{KUBE_CACHE_MUTATION_DETECTOR}}"
3436
command:
3537
- /bin/karmada-scheduler-estimator
3638
- --kubeconfig=/etc/{{member_cluster_name}}-kubeconfig

artifacts/deploy/karmada-scheduler.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ spec:
4444
valueFrom:
4545
fieldRef:
4646
fieldPath: status.podIP
47+
- name: KUBE_CACHE_MUTATION_DETECTOR
48+
value: "{{KUBE_CACHE_MUTATION_DETECTOR}}"
4749
command:
4850
- /bin/karmada-scheduler
4951
- --kubeconfig=/etc/karmada/config/karmada.config

artifacts/deploy/karmada-search.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ spec:
3232
valueFrom:
3333
fieldRef:
3434
fieldPath: status.podIP
35+
- name: KUBE_CACHE_MUTATION_DETECTOR
36+
value: "{{KUBE_CACHE_MUTATION_DETECTOR}}"
3537
command:
3638
- /bin/karmada-search
3739
- --kubeconfig=/etc/karmada/config/karmada.config

hack/deploy-karmada.sh

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ set -o nounset
1919
# This script deploy karmada control plane to any cluster you want. REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
2020
# This script depends on utils in: ${REPO_ROOT}/hack/util.sh
2121

22+
# Do not run the mutation detector by default on the local karmada instance.
23+
KUBE_CACHE_MUTATION_DETECTOR="${KUBE_CACHE_MUTATION_DETECTOR:-false}"
24+
2225
REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
2326
CERT_DIR=${CERT_DIR:-"${HOME}/.karmada"}
2427
mkdir -p "${CERT_DIR}" &>/dev/null || mkdir -p "${CERT_DIR}"
@@ -185,6 +188,19 @@ function installCRDs() {
185188
kubectl --context="${context_name}" apply -k "${crd_path}"/_crds
186189
}
187190

191+
function deploy_karmada_component() {
192+
local component_name=$1
193+
local temp_dir=$2
194+
local wait_label=$3
195+
196+
local component_yaml_tmp="${temp_dir}/${component_name}.yaml"
197+
cp "${REPO_ROOT}/artifacts/deploy/${component_name}.yaml" "${component_yaml_tmp}"
198+
sed -i'' -e "s/{{KUBE_CACHE_MUTATION_DETECTOR}}/${KUBE_CACHE_MUTATION_DETECTOR}/g" "${component_yaml_tmp}"
199+
kubectl --context="${HOST_CLUSTER_NAME}" apply -f "${component_yaml_tmp}"
200+
201+
util::wait_pod_ready "${HOST_CLUSTER_NAME}" "${wait_label}" "${KARMADA_SYSTEM_NAMESPACE}"
202+
}
203+
188204
# Use x.x.x.8 IP address, which is the same CIDR with the node address of the Kind cluster,
189205
# as the loadBalancer service address of component karmada-interpreter-webhook-example.
190206
interpreter_webhook_example_service_external_ip_prefix=$(echo $(util::get_apiserver_ip_from_kubeconfig "${HOST_CLUSTER_NAME}") | awk -F. '{printf "%s.%s.%s",$1,$2,$3}')
@@ -376,15 +392,14 @@ util::append_client_kubeconfig "${HOST_CLUSTER_KUBECONFIG}" "${ROOT_CA_FILE}" "$
376392
cp "${REPO_ROOT}"/artifacts/deploy/kube-controller-manager.yaml "${TEMP_PATH_APISERVER}"/kube-controller-manager.yaml
377393
sed -i'' -e "s/{{karmada_apiserver_version}}/${KARMADA_APISERVER_VERSION}/g" "${TEMP_PATH_APISERVER}"/kube-controller-manager.yaml
378394
kubectl --context="${HOST_CLUSTER_NAME}" apply -f "${TEMP_PATH_APISERVER}"/kube-controller-manager.yaml
395+
util::wait_pod_ready "${HOST_CLUSTER_NAME}" "${KUBE_CONTROLLER_POD_LABEL}" "${KARMADA_SYSTEM_NAMESPACE}"
379396
# deploy aggregated-apiserver on host cluster
380397
kubectl --context="${HOST_CLUSTER_NAME}" apply -f "${REPO_ROOT}/artifacts/deploy/karmada-aggregated-apiserver.yaml"
381398
util::wait_pod_ready "${HOST_CLUSTER_NAME}" "${KARMADA_AGGREGATION_APISERVER_LABEL}" "${KARMADA_SYSTEM_NAMESPACE}"
382-
# deploy karmada-search on host cluster
383-
kubectl --context="${HOST_CLUSTER_NAME}" apply -f "${REPO_ROOT}/artifacts/deploy/karmada-search.yaml"
384-
util::wait_pod_ready "${HOST_CLUSTER_NAME}" "${KARMADA_SEARCH_LABEL}" "${KARMADA_SYSTEM_NAMESPACE}"
385-
# deploy karmada-metrics-adapter on host cluster
386-
kubectl --context="${HOST_CLUSTER_NAME}" apply -f "${REPO_ROOT}/artifacts/deploy/karmada-metrics-adapter.yaml"
387-
util::wait_pod_ready "${HOST_CLUSTER_NAME}" "${KARMADA_METRICS_ADAPTER_LABEL}" "${KARMADA_SYSTEM_NAMESPACE}"
399+
400+
# Deploy components that need to wait for readiness
401+
deploy_karmada_component "karmada-search" "${TEMP_PATH_APISERVER}" "${KARMADA_SEARCH_LABEL}"
402+
deploy_karmada_component "karmada-metrics-adapter" "${TEMP_PATH_APISERVER}" "${KARMADA_METRICS_ADAPTER_LABEL}"
388403

389404
# install CRD APIs on karmada apiserver.
390405
if ! kubectl config get-contexts "karmada-apiserver" > /dev/null 2>&1;
@@ -449,17 +464,10 @@ sed -i'' -e "s/{{ca_crt}}/${karmada_ca}/g" "${TEMP_PATH_BOOTSTRAP}"/bootstrap-to
449464
sed -i'' -e "s|{{apiserver_address}}|${karmada_apiserver_address}|g" "${TEMP_PATH_BOOTSTRAP}"/bootstrap-token-configuration-tmp.yaml
450465
kubectl --context="karmada-apiserver" apply -f "${TEMP_PATH_BOOTSTRAP}"/bootstrap-token-configuration-tmp.yaml
451466

452-
# deploy controller-manager on host cluster
453-
kubectl --context="${HOST_CLUSTER_NAME}" apply -f "${REPO_ROOT}/artifacts/deploy/karmada-controller-manager.yaml"
454-
# deploy scheduler on host cluster
455-
kubectl --context="${HOST_CLUSTER_NAME}" apply -f "${REPO_ROOT}/artifacts/deploy/karmada-scheduler.yaml"
456-
# deploy descheduler on host cluster
457-
kubectl --context="${HOST_CLUSTER_NAME}" apply -f "${REPO_ROOT}/artifacts/deploy/karmada-descheduler.yaml"
467+
deploy_karmada_component "karmada-controller-manager" "${TEMP_PATH_BOOTSTRAP}" "${KARMADA_CONTROLLER_LABEL}"
468+
deploy_karmada_component "karmada-scheduler" "${TEMP_PATH_BOOTSTRAP}" "${KARMADA_SCHEDULER_LABEL}"
469+
deploy_karmada_component "karmada-descheduler" "${TEMP_PATH_BOOTSTRAP}" "${KARMADA_DESCHEDULER_LABEL}"
470+
458471
# deploy webhook on host cluster
459472
kubectl --context="${HOST_CLUSTER_NAME}" apply -f "${REPO_ROOT}/artifacts/deploy/karmada-webhook.yaml"
460-
461-
# make sure all karmada control plane components are ready
462-
util::wait_pod_ready "${HOST_CLUSTER_NAME}" "${KARMADA_CONTROLLER_LABEL}" "${KARMADA_SYSTEM_NAMESPACE}"
463-
util::wait_pod_ready "${HOST_CLUSTER_NAME}" "${KARMADA_SCHEDULER_LABEL}" "${KARMADA_SYSTEM_NAMESPACE}"
464-
util::wait_pod_ready "${HOST_CLUSTER_NAME}" "${KUBE_CONTROLLER_POD_LABEL}" "${KARMADA_SYSTEM_NAMESPACE}"
465473
util::wait_pod_ready "${HOST_CLUSTER_NAME}" "${KARMADA_WEBHOOK_LABEL}" "${KARMADA_SYSTEM_NAMESPACE}"

hack/deploy-scheduler-estimator.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
set -o errexit
1818
set -o nounset
1919

20+
# Do not run the mutation detector by default on the local karmada instance.
21+
KUBE_CACHE_MUTATION_DETECTOR="${KUBE_CACHE_MUTATION_DETECTOR:-false}"
22+
2023
REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
2124
function usage() {
2225
echo "This script will deploy karmada-scheduler-estimator of a cluster."
@@ -85,6 +88,7 @@ rm -rf "${TEMP_PATH}"
8588
TEMP_PATH=$(mktemp -d)
8689
cp "${REPO_ROOT}"/artifacts/deploy/karmada-scheduler-estimator.yaml "${TEMP_PATH}"/karmada-scheduler-estimator.yaml
8790
sed -i'' -e "s/{{member_cluster_name}}/${MEMBER_CLUSTER_NAME}/g" "${TEMP_PATH}"/karmada-scheduler-estimator.yaml
91+
sed -i'' -e "s/{{KUBE_CACHE_MUTATION_DETECTOR}}/${KUBE_CACHE_MUTATION_DETECTOR}/g" "${TEMP_PATH}"/karmada-scheduler-estimator.yaml
8892
echo -e "Apply dynamic rendered deployment in ${TEMP_PATH}/karmada-scheduler-estimator.yaml\n"
8993
kubectl --kubeconfig="${HOST_CLUSTER_KUBECONFIG}" --context="${HOST_CLUSTER_NAME}" apply -f "${TEMP_PATH}"/karmada-scheduler-estimator.yaml
9094
rm -rf "${TEMP_PATH}"

hack/run-e2e.sh

Lines changed: 83 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
1716
set -o errexit
1817
set -o nounset
1918
set -o pipefail
@@ -26,17 +25,21 @@ set -o pipefail
2625
# Example 1: hack/run-e2e.sh (run e2e with default config)
2726
# Example 2: export KARMADA_APISERVER_KUBECONFIG=<KUBECONFIG PATH> hack/run-e2e.sh (run e2e with your KUBECONFIG)
2827

28+
# Cluster name definitions
29+
MEMBER_CLUSTER_1_NAME=${MEMBER_CLUSTER_1_NAME:-"member1"}
30+
MEMBER_CLUSTER_2_NAME=${MEMBER_CLUSTER_2_NAME:-"member2"}
31+
PULL_MODE_CLUSTER_NAME=${PULL_MODE_CLUSTER_NAME:-"member3"}
32+
2933
KUBECONFIG_PATH=${KUBECONFIG_PATH:-"${HOME}/.kube"}
3034
KARMADA_APISERVER_KUBECONFIG=${KARMADA_APISERVER_KUBECONFIG:-"$KUBECONFIG_PATH/karmada.config"}
31-
PULL_BASED_CLUSTERS=${PULL_BASED_CLUSTERS:-"member3:$KUBECONFIG_PATH/members.config"}
35+
PULL_BASED_CLUSTERS=${PULL_BASED_CLUSTERS:-"${PULL_MODE_CLUSTER_NAME}:$KUBECONFIG_PATH/members.config"}
3236

3337
# KARMADA_RUNNING_ON_KIND indicates if current testing against on karmada that installed on a kind cluster.
3438
# Defaults to true.
3539
# For kind cluster, the kind related logs will be collected after the testing.
3640
KARMADA_RUNNING_ON_KIND=${KARMADA_RUNNING_ON_KIND:-true}
3741

3842
KARMADA_HOST_CLUSTER_NAME=${KARMADA_HOST_CLUSTER_NAME:-"karmada-host"}
39-
KARMADA_PULL_CLUSTER_NAME=${KARMADA_PULL_CLUSTER_NAME:-"member3"}
4043

4144
ARTIFACTS_PATH=${ARTIFACTS_PATH:-"${HOME}/karmada-e2e-logs"}
4245
mkdir -p "$ARTIFACTS_PATH"
@@ -65,9 +68,9 @@ if [ "$KARMADA_RUNNING_ON_KIND" = true ]; then
6568
mkdir -p "$ARTIFACTS_PATH/$KARMADA_HOST_CLUSTER_NAME"
6669
kind export logs --name="$KARMADA_HOST_CLUSTER_NAME" "$ARTIFACTS_PATH/$KARMADA_HOST_CLUSTER_NAME"
6770

68-
echo "Collecting $KARMADA_PULL_CLUSTER_NAME logs..."
69-
mkdir -p "$ARTIFACTS_PATH/KARMADA_PULL_CLUSTER_NAME"
70-
kind export logs --name="$KARMADA_PULL_CLUSTER_NAME" "$ARTIFACTS_PATH/$KARMADA_PULL_CLUSTER_NAME"
71+
echo "Collecting $PULL_MODE_CLUSTER_NAME logs..."
72+
mkdir -p "$ARTIFACTS_PATH/$PULL_MODE_CLUSTER_NAME"
73+
kind export logs --name="$PULL_MODE_CLUSTER_NAME" "$ARTIFACTS_PATH/$PULL_MODE_CLUSTER_NAME"
7174
fi
7275

7376
echo "Collected logs at $ARTIFACTS_PATH:"
@@ -76,4 +79,78 @@ ls -al "$ARTIFACTS_PATH"
7679
# Post run e2e for delete extra components
7780
"${REPO_ROOT}"/hack/post-run-e2e.sh
7881

82+
# If E2E test failed, exit directly with the test result
83+
if [ $TESTING_RESULT -ne 0 ]; then
84+
echo "E2E test failed with exit code $TESTING_RESULT, skipping component restart check."
85+
exit $TESTING_RESULT
86+
fi
87+
88+
# Check if Karmada components have restarted, if any has, it means that OOM or panic has occurred
89+
# due to memory modification, and needs to be investigated.
90+
echo "E2E run successfully."
91+
echo "Checking if Karmada components have restarted..."
92+
93+
# Function to check pod restart count for a given component
94+
check_component_restart() {
95+
local component_label=$1
96+
local component_name=$2
97+
98+
echo "Checking ${component_name} pods..."
99+
100+
# Get pod information in a single call, including both name and restart count
101+
# Use a template that handles missing containerStatuses gracefully
102+
local pod_info
103+
pod_info=$(kubectl --context="${KARMADA_HOST_CLUSTER_NAME}" get pod -n karmada-system -l "${component_label}" \
104+
-o go-template='{{range .items}}{{.metadata.name}}:{{if .status.containerStatuses}}{{(index .status.containerStatuses 0).restartCount}}{{else}}0{{end}}{{"\n"}}{{end}}' 2>/dev/null)
105+
106+
if [ -z "$pod_info" ]; then
107+
echo "No pods found for ${component_name}, skipping..."
108+
return 0
109+
fi
110+
111+
# Process each pod's information
112+
while IFS=: read -r pod_name restart_count; do
113+
# Skip empty lines
114+
[ -z "$pod_name" ] && continue
115+
116+
# Ensure restart_count is a number (default to 0 if empty or invalid)
117+
if ! [[ "$restart_count" =~ ^[0-9]+$ ]]; then
118+
echo "Warning: Unable to get restart count for pod $pod_name, assuming 0"
119+
restart_count=0
120+
fi
121+
122+
if [ "$restart_count" -gt 0 ]; then
123+
echo "ERROR: ${component_name} pod $pod_name has restarted $restart_count times."
124+
echo "This indicates OOM or panic occurred and needs to be investigated."
125+
return 1 # Return failure to stop checking
126+
else
127+
echo "${component_name} pod $pod_name: no restarts"
128+
fi
129+
done <<< "$pod_info"
130+
131+
return 0
132+
}
133+
134+
# List of components to check (label=component_name)
135+
components=(
136+
"app=karmada-controller-manager:karmada-controller-manager"
137+
"app=karmada-descheduler:karmada-descheduler"
138+
"app=karmada-metrics-adapter:karmada-metrics-adapter"
139+
"app=karmada-scheduler:karmada-scheduler"
140+
"app=karmada-search:karmada-search"
141+
"app=karmada-scheduler-estimator-${MEMBER_CLUSTER_1_NAME}:karmada-scheduler-estimator-${MEMBER_CLUSTER_1_NAME}"
142+
"app=karmada-scheduler-estimator-${MEMBER_CLUSTER_2_NAME}:karmada-scheduler-estimator-${MEMBER_CLUSTER_2_NAME}"
143+
"app=karmada-scheduler-estimator-${PULL_MODE_CLUSTER_NAME}:karmada-scheduler-estimator-${PULL_MODE_CLUSTER_NAME}"
144+
)
145+
146+
# Check each component, stop at first failure
147+
for component in "${components[@]}"; do
148+
IFS=':' read -r label name <<< "$component"
149+
if ! check_component_restart "$label" "$name"; then
150+
echo "COMPONENT RESTART CHECK FAILED: Component $name has restarted, stopping further checks."
151+
exit 1
152+
fi
153+
done
154+
155+
echo "All component restart checks passed."
79156
exit $TESTING_RESULT

0 commit comments

Comments
 (0)