Skip to content

Commit ba5fda4

Browse files
authored
Merge pull request #164 from oracle/int-test-leasing-2
Int test leasing 2
2 parents 6bb6cf1 + 743dd78 commit ba5fda4

File tree

2 files changed

+73
-35
lines changed

2 files changed

+73
-35
lines changed

src/integration-tests/bash/cleanup.sh

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
# PV_ROOT The root directory on the kubernetes cluster
1717
# used for persistent volumes.
1818
#
19+
# LEASE_ID Set this if you want cleanup to release the
20+
# given lease on a failure.
21+
#
1922
# See 'run.sh' for a detailed description of RESULT_ROOT and PV_ROOT.
2023
#
2124
# --------------------
@@ -40,6 +43,9 @@
4043
#
4144
# Phase 4: Delete the local test output directory.
4245
#
46+
# Phase 5: If we own a lease, then release it on a failure
47+
# see LEASE_ID above.
48+
#
4349

4450
DOMAINS=(domain1 domain2 domain3 domain4 domain5)
4551
DOMAIN_NAMESPACES=(default default test1 test2 default)
@@ -103,11 +109,17 @@ function genericDelete {
103109
resfile_yes="$TMP_DIR/kinv_filtered_yesnamespace.out.tmp"
104110

105111
# leftover namespaced artifacts
106-
kubectl get $1 --show-labels=true --all-namespaces=true 2>&1 | egrep -e "($3)" | awk '{ print $1 " " $2 }' | sort > $resfile_yes 2>&1
112+
kubectl get $1 \
113+
-o=jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.kind}{"/"}{.metadata.name}{"\n"}{end}' \
114+
--all-namespaces=true 2>&1 \
115+
| egrep -e "($3)" | sort > $resfile_yes 2>&1
107116
artcount_yes="`cat $resfile_yes | wc -l`"
108117

109118
# leftover non-namespaced artifacts
110-
kubectl get $2 --show-labels=true --all-namespaces=true 2>&1 | egrep -e "($3)" | awk '{ print $1 }' | sort > $resfile_no 2>&1
119+
kubectl get $2 \
120+
-o=jsonpath='{range .items[*]}{.kind}{"/"}{.metadata.name}{"\n"}{end}' \
121+
--all-namespaces=true 2>&1 \
122+
| egrep -e "($3)" | sort > $resfile_no 2>&1
111123
artcount_no="`cat $resfile_no | wc -l`"
112124

113125
artcount_total=$((artcount_yes + artcount_no))
@@ -334,6 +346,18 @@ rm -f /tmp/test_suite.*
334346

335347
# Bye
336348

349+
if [ ! "$LEASE_ID" = "" ] && [ ! "$SUCCESS" = "0" ]; then
350+
# release the lease if we own it
351+
${SCRIPTPATH}/lease.sh -d "$LEASE_ID" > /tmp/release_lease.out 2>&1
352+
if [ "$?" = "0" ]; then
353+
echo @@ Lease released.
354+
else
355+
echo @@ Lease could not be released:
356+
cat /tmp/release_lease.out
357+
fi
358+
rm -f /tmp/release_lease.out
359+
fi
360+
337361
echo @@ Exiting with status $SUCCESS
338362
exit $SUCCESS
339363

src/integration-tests/bash/lease.sh

Lines changed: 47 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,48 @@ EOF
285285
fi
286286
}
287287

288+
function makeLocalLeaseAndReplaceRemote {
289+
# Replace the remote lease with a new lease that we own
290+
# It's assumed that it's already determined it's safe to try and get the lease
291+
# (either the lease is unowned, expired, or owned by us).
292+
#
293+
# TBD: There's a small race where this call temporarily deletes the lease before
294+
# it replaces it with a new one,
295+
# which means someone else could come in and snipe it even if we already
296+
# own an older version of the lease and the older version hasn't expired.
297+
# If this happens, this call will fail when it tries to 'checkLease'
298+
# and the caller therefore is forced to give up their lease. In theory,
299+
# this race could be resolved by using a 'replace -f' pattern - but this
300+
# failed with unexpected errors on some kubectl setups but not others.
301+
#
302+
303+
makeLocalLease
304+
if [ $? -ne 0 ]; then
305+
traceError "failed - could not generate a new local lease"
306+
return 1
307+
fi
308+
309+
deleteRemoteLeaseUnsafe
310+
if [ $? -ne 0 ]; then
311+
traceError "failed - could not delete remote lease"
312+
return 1
313+
fi
314+
315+
kubectl create configmap ${CONFIGMAP_NAME} --from-file ${LOCAL_ROOT}/${LOCAL_FILE} -n default
316+
if [ $? -ne 0 ]; then
317+
traceError "failed - could not replace"
318+
return 1
319+
fi
320+
321+
# finally, check if we now actually own the lease (someone could have been replacing at the same time)
322+
checkLease
323+
if [ $? -ne 0 ]; then
324+
traceError "failed - replaced remote lease, but we somehow lost a race or can no longer communicate with kubernetes"
325+
return 1
326+
fi
327+
return 0
328+
}
329+
288330
function getRemoteLease {
289331
#
290332
# first, if the remote lease configmap doesn't exist
@@ -395,25 +437,11 @@ function obtainLease {
395437
# so assume it can be replaced and we can try takeover the lease
396438

397439
# first make a local candidate lease
398-
makeLocalLease
399-
if [ $? -ne 0 ]; then
400-
traceError "failed - could not generate a new local lease"
401-
return 1
402-
fi
403-
404-
# next, try replace remote lease with the candidate lease
405-
kubectl create configmap ${CONFIGMAP_NAME} --from-file ${LOCAL_ROOT}/${LOCAL_FILE} -o yaml -n default --dry-run | kubectl replace -f -
406-
if [ $? -ne 0 ]; then
407-
traceError "failed - could not replace remote lease"
408-
return 1
409-
fi
410-
411-
# finally, check if we now actually own the lease (someone could have been replacing at the same time)
412-
checkLease
440+
makeLocalLeaseAndReplaceRemote
413441
if [ $? -eq 0 ]; then
414442
return 0
415443
else
416-
traceError "failed - replaced remote lease, but kubernetes is not responding or we lost a race and another potential owner replaced it too, will keep retrying up to the timeout"
444+
traceError "failed to replace remote lease, will keep retrying up to the timeout"
417445
fi
418446
fi
419447
local mnow=`date +%s`
@@ -447,26 +475,12 @@ function renewLease {
447475
fi
448476

449477
# now make a new local candidate lease
450-
makeLocalLease
451-
if [ $? -ne 0 ]; then
452-
traceError "failed - could not generate a new local lease"
453-
return 1
454-
fi
455-
456-
# next, try replace remote lease with the candidate lease
457-
kubectl create configmap ${CONFIGMAP_NAME} --from-file ${LOCAL_ROOT}/${LOCAL_FILE} -o yaml -n default --dry-run | kubectl replace -f -
478+
makeLocalLeaseAndReplaceRemote
458479
if [ $? -ne 0 ]; then
459-
traceError "failed - could not get replace remote lease"
480+
traceError "failed to replace remote lease"
460481
return 1
461-
fi
462-
463-
# finally, check if we now actually own the lease (someone could have been replacing at the same time)
464-
checkLease
465-
if [ $? -eq 0 ]; then
466-
return 0
467482
else
468-
traceError "failed - replaced remote lease, but we somehow lost a race or can no longer communicate with kubernetes"
469-
return 1
483+
return 0
470484
fi
471485
}
472486

0 commit comments

Comments
 (0)