Skip to content

Commit c54db73

Browse files
wangke19claude
andauthored
Add verification for kube-root-ca.crt configmap after cert rotation (openshift#71089)
After rotating the service-network-serving-signer certificate, the new cert must be reflected in the kube-root-ca.crt configmap in openshift-kube-apiserver namespace. Testing shows this update can take up to 10 minutes. This adds a comprehensive checkpoint that: - Captures the old cert details before rotation - Verifies the cert has actually been rotated (subject/notBefore changed) - Waits up to 15 minutes for the new cert to appear in kube-root-ca.crt - Matches the cert by its full subject string - Provides detailed diagnostics showing both old and new cert details if check fails This ensures the fix for OCPBUGS-60045 is working correctly and prevents potential cluster degradation with x509 errors in kube-apiserver, kube-controller-manager, and kube-scheduler. Related to: https://issues.redhat.com/browse/OCPBUGS-60045 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude <[email protected]>
1 parent 3cffe70 commit c54db73

File tree

1 file changed

+83
-0
lines changed

1 file changed

+83
-0
lines changed

ci-operator/step-registry/openshift/e2e/test/qe/regenerate-cluster-cert/openshift-e2e-test-qe-regenerate-cluster-cert-commands.sh

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,95 @@ oc adm ocp-certificates regenerate-leaf -n openshift-config-managed secrets kube
8888
oc adm ocp-certificates regenerate-leaf -n openshift-kube-apiserver-operator secrets node-system-admin-client
8989
oc adm ocp-certificates regenerate-leaf -n openshift-kube-apiserver secrets check-endpoints-client-cert-key control-plane-node-admin-client-cert-key external-loadbalancer-serving-certkey internal-loadbalancer-serving-certkey kubelet-client localhost-recovery-serving-certkey localhost-serving-cert-certkey service-network-serving-certkey
9090
oc adm wait-for-stable-cluster
91+
# Verify old service-network-serving-signer cert is in kube-root-ca.crt before rotation
92+
# Related to: https://issues.redhat.com/browse/OCPBUGS-60045
93+
echo "Verifying service-network-serving-signer cert is in kube-root-ca.crt before rotation..."
94+
old_cert_subject=$(oc get secret service-network-serving-signer -o jsonpath='{.data.tls\.crt}' -n openshift-kube-apiserver-operator 2>/dev/null | base64 -d | openssl x509 -subject -noout 2>/dev/null)
95+
old_cert_notbefore=$(oc get secret service-network-serving-signer -o jsonpath='{.data.tls\.crt}' -n openshift-kube-apiserver-operator 2>/dev/null | base64 -d | openssl x509 -dates -noout 2>/dev/null | grep notBefore)
96+
97+
if [ -z "$old_cert_subject" ] || [ -z "$old_cert_notbefore" ]; then
98+
echo "ERROR: Failed to extract service-network-serving-signer cert details before rotation"
99+
echo " old_cert_subject: '$old_cert_subject'"
100+
echo " old_cert_notbefore: '$old_cert_notbefore'"
101+
exit 1
102+
fi
103+
104+
echo "Current service-network-serving-signer cert: $old_cert_subject, $old_cert_notbefore"
105+
91106
# generate new roots of trust
92107
oc adm ocp-certificates regenerate-top-level -n openshift-kube-apiserver-operator secrets kube-apiserver-to-kubelet-signer kube-control-plane-signer loadbalancer-serving-signer localhost-serving-signer service-network-serving-signer
93108
oc -n openshift-kube-controller-manager-operator delete secrets/next-service-account-private-key
94109
oc -n openshift-kube-apiserver-operator delete secrets/next-bound-service-account-signing-key
95110
oc adm wait-for-stable-cluster
96111

112+
# Verify new service-network-serving-signer cert is reflected in kube-root-ca.crt configmap
113+
# After rotating the kube-apiserver-service-network-signer cert, the new cert must be reflected
114+
# in the kube-root-ca.crt configmap in openshift-kube-apiserver namespace.
115+
# Testing shows this can take up to 10 minutes, so we wait up to 15 minutes.
116+
echo "Verifying new service-network-serving-signer cert is reflected in kube-root-ca.crt configmap..."
117+
max_retries=90 # 90 retries * 10 seconds = 15 minutes
118+
retry_count=0
119+
cert_included=false
120+
121+
while [ $retry_count -lt $max_retries ]; do
122+
# Get the subject and notBefore of the current service-network-serving-signer cert
123+
new_cert_subject=$(oc get secret service-network-serving-signer -o jsonpath='{.data.tls\.crt}' -n openshift-kube-apiserver-operator 2>/dev/null | base64 -d | openssl x509 -subject -noout 2>/dev/null)
124+
new_cert_notbefore=$(oc get secret service-network-serving-signer -o jsonpath='{.data.tls\.crt}' -n openshift-kube-apiserver-operator 2>/dev/null | base64 -d | openssl x509 -dates -noout 2>/dev/null | grep notBefore)
125+
126+
if [ -z "$new_cert_subject" ]; then
127+
echo "Warning: Could not extract service-network-serving-signer cert subject"
128+
retry_count=$((retry_count + 1))
129+
sleep 10
130+
continue
131+
fi
132+
133+
# Verify the cert has actually been rotated (subject or notBefore changed)
134+
if [ "$new_cert_subject" = "$old_cert_subject" ] && [ "$new_cert_notbefore" = "$old_cert_notbefore" ]; then
135+
retry_count=$((retry_count + 1))
136+
echo "Waiting for service-network-serving-signer cert to be rotated... (attempt $retry_count/$max_retries)"
137+
sleep 10
138+
continue
139+
fi
140+
141+
# Extract the subject CN from the new cert to search for it in the configmap
142+
new_cert_cn=$(echo "$new_cert_subject" | sed -n 's/.*CN[[:space:]]*=[[:space:]]*\([^,]*\).*/\1/p')
143+
144+
if [ -z "$new_cert_cn" ]; then
145+
echo "Warning: Could not extract CN from service-network-serving-signer cert"
146+
retry_count=$((retry_count + 1))
147+
sleep 10
148+
continue
149+
fi
150+
151+
# Check if the new cert CN is in the kube-root-ca.crt configmap by searching for the CN value
152+
if oc get cm kube-root-ca.crt -o jsonpath='{.data.ca\.crt}' -n openshift-kube-apiserver 2>/dev/null | openssl crl2pkcs7 -certfile /dev/stdin -nocrl 2>/dev/null | openssl pkcs7 -print_certs -text -in /dev/stdin 2>/dev/null | grep -F "CN=$new_cert_cn"; then
153+
echo "SUCCESS: New service-network-serving-signer cert is included in kube-root-ca.crt configmap"
154+
echo " New cert: $new_cert_subject, $new_cert_notbefore"
155+
cert_included=true
156+
break
157+
fi
158+
159+
retry_count=$((retry_count + 1))
160+
echo "Waiting for new service-network-serving-signer cert to be reflected in kube-root-ca.crt... (attempt $retry_count/$max_retries)"
161+
echo " New cert: $new_cert_subject, $new_cert_notbefore"
162+
sleep 10
163+
done
164+
165+
if [ "$cert_included" = false ]; then
166+
echo "ERROR: New service-network-serving-signer cert is NOT included in kube-root-ca.crt configmap in openshift-kube-apiserver namespace after 15 minutes"
167+
echo "This may cause cluster degradation with x509 errors in kube-apiserver, kube-controller-manager, and kube-scheduler"
168+
echo ""
169+
echo "Old service-network-serving-signer cert: $old_cert_subject, $old_cert_notbefore"
170+
echo "New service-network-serving-signer cert: $new_cert_subject, $new_cert_notbefore"
171+
echo ""
172+
echo "Current service-network-serving-signer cert details:"
173+
oc get secret service-network-serving-signer -o jsonpath='{.data.tls\.crt}' -n openshift-kube-apiserver-operator 2>/dev/null | base64 -d | openssl x509 -text -noout 2>/dev/null | head -20 || echo "Failed to extract cert details"
174+
echo ""
175+
echo "Certs in kube-root-ca.crt configmap:"
176+
oc get cm kube-root-ca.crt -o jsonpath='{.data.ca\.crt}' -n openshift-kube-apiserver 2>/dev/null | openssl crl2pkcs7 -certfile /dev/stdin -nocrl 2>/dev/null | openssl pkcs7 -print_certs -text -in /dev/stdin 2>/dev/null | grep "Issuer:" -A5 || echo "Failed to extract configmap certs"
177+
exit 1
178+
fi
179+
97180
cloud_type=$(oc get infrastructures.config.openshift.io -ojsonpath='{.items[0].spec.platformSpec.type}')
98181
# Configuration for the cluster in manual mode with GCP Workload Identity
99182
if [[ "X${cloud_type}" == "XGCP" ]]; then

0 commit comments

Comments
 (0)