Skip to content

Commit 9fb7ab2

Browse files
authored
NETOBSERV-2358 exit on daemonset failure with logs (#373)
* exit on daemonset failure * lint * check OCP version when possible * find completed state
1 parent 32ffd8b commit 9fb7ab2

File tree

1 file changed

+103
-1
lines changed

1 file changed

+103
-1
lines changed

scripts/functions.sh

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,79 @@ function clusterIsReady() {
164164
fi
165165
}
166166

167+
function checkClusterVersion() {
168+
states=$(${K8S_CLI_BIN} get clusterversion version -o jsonpath='{.status.history[*].state}')
169+
if [[ -z "${states}" ]]; then
170+
echo "Can't check version since cluster is not OpenShift"
171+
else
172+
versions=$(${K8S_CLI_BIN} get clusterversion version -o jsonpath='{.status.history[*].version}')
173+
version=""
174+
175+
# get the current version finding *Completed* state
176+
if [[ "$(declare -p states)" =~ "declare -a" ]]; then
177+
# handle states and versions as arrays
178+
if [ "${#states[@]}" -eq "${#versions[@]}" ]; then
179+
for i in "${!states[@]}"; do
180+
if [[ "${states[$i]}" = "Completed" ]]; then
181+
version="${versions[$i]}"
182+
fi
183+
done
184+
fi
185+
else
186+
# handle states and versions as strings
187+
if [ "${states}" = "Completed" ]; then
188+
version="${versions}"
189+
fi
190+
fi
191+
192+
if [ -z "${version}" ]; then
193+
# allow running if no version found since the user may be running an upgrade
194+
echo "Warning: can't find current version in the clusterversion history"
195+
echo "Is the cluster upgrading?"
196+
return 0
197+
else
198+
echo "OpenShift version: $version"
199+
fi
200+
201+
returnCode=0
202+
result=""
203+
204+
if [[ "$command" = "packets" ]]; then
205+
compare_versions "$version" 4.16.0
206+
if [ "$result" -eq 0 ]; then
207+
echo "- Packet capture requires OpenShift 4.16 or higher"
208+
returnCode=1
209+
fi
210+
fi
211+
212+
if [[ "${options[*]}" == *"enable_all"* || "${options[*]}" == *"enable_network_events"* ]]; then
213+
compare_versions "$version" 4.19.0
214+
if [ "$result" -eq 0 ]; then
215+
echo "- Network events requires OpenShift 4.19 or higher"
216+
returnCode=1
217+
fi
218+
fi
219+
220+
if [[ "${options[*]}" == *"enable_all"* || "${options[*]}" == *"enable_udn_mapping"* ]]; then
221+
compare_versions "$version" 4.18.0
222+
if [ "$result" -eq 0 ]; then
223+
echo "- UDN mapping requires OpenShift 4.18 or higher"
224+
returnCode=1
225+
fi
226+
fi
227+
228+
if [[ "${options[*]}" == *"enable_all"* || "${options[*]}" == *"enable_pkt_drop"* ]]; then
229+
compare_versions "$version" 4.14.0
230+
if [ "$result" -eq 0 ]; then
231+
echo "- Packet drops requires OpenShift 4.14 or higher"
232+
returnCode=1
233+
fi
234+
fi
235+
236+
return $returnCode
237+
fi
238+
}
239+
167240
function namespaceFound() {
168241
# ensure namespace doesn't exist, else we should not override content
169242
if ${K8S_CLI_BIN} get namespace "$namespace" --ignore-not-found=true | grep -q "$namespace"; then
@@ -264,6 +337,11 @@ function setup() {
264337
exit 1
265338
fi
266339

340+
if ! checkClusterVersion; then
341+
printf 'Remove not compatible features and try again\n' >&2
342+
exit 1
343+
fi
344+
267345
if namespaceFound; then
268346
printf "%s namespace already exists. Ensure someone else is not running another capture on this cluster. Else use 'oc netobserv cleanup' to remove the namespace first.\n" "$namespace" >&2
269347
skipCleanup="true"
@@ -701,6 +779,30 @@ function defaultValue() {
701779
fi
702780
}
703781

782+
function waitDaemonset(){
783+
echo "Waiting for daemonset pods to be ready..."
784+
retries=10
785+
while [[ $retries -ge 0 ]];do
786+
sleep 5
787+
ready=$($K8S_CLI_BIN -n "$namespace" get daemonset netobserv-cli -o jsonpath="{.status.numberReady}")
788+
required=$($K8S_CLI_BIN -n "$namespace" get daemonset netobserv-cli -o jsonpath="{.status.desiredNumberScheduled}")
789+
reasons=$($K8S_CLI_BIN get pods -n "$namespace" -o jsonpath='{.items[*].status.containerStatuses[*].state.waiting.reason}')
790+
IFS=" " read -r -a reasons <<< "$(echo "${reasons[@]}" | tr ' ' '\n' | sort -u | tr '\n' ' ')"
791+
echo "$ready/$required Ready. Reason(s): ${reasons[*]}"
792+
if printf '%s\0' "${reasons[@]}" | grep -Fxqz -- 'CrashLoopBackOff'; then
793+
break
794+
elif [[ $ready -eq $required ]]; then
795+
return
796+
fi
797+
((retries--))
798+
done
799+
echo
800+
echo "ERROR: Daemonset pods failed to start:"
801+
${K8S_CLI_BIN} logs daemonset/netobserv-cli -n "$namespace" --tail=1
802+
echo
803+
exit 1
804+
}
805+
704806
# Check if $options are valid
705807
function check_args_and_apply() {
706808
# Iterate through the command-line arguments
@@ -1054,7 +1156,7 @@ function check_args_and_apply() {
10541156
yaml="$(cat "$manifest")"
10551157
applyYAML "$yaml"
10561158
if [[ "$outputYAML" == "false" ]]; then
1057-
${K8S_CLI_BIN} rollout status daemonset netobserv-cli -n "$namespace" --timeout 60s
1159+
waitDaemonset
10581160
fi
10591161
rm -rf ${MANIFEST_OUTPUT_PATH}
10601162
}

0 commit comments

Comments
 (0)