Skip to content

Commit 82d4656

Browse files
author
Tom Barnes
committed
Delete script enhancements: (1) Before directly deleting all k8s objects, set startupControl on each domain to NONE and wait up to half of max wait seconds for operator to shutdown its WLS pods normally. (2) Increase default max wait seconds to 120 seconds.
1 parent c93aae4 commit 82d4656

File tree

1 file changed

+112
-31
lines changed

1 file changed

+112
-31
lines changed

kubernetes/delete-domain.sh

Lines changed: 112 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
# See "function usage" below or call this script with no parameters.
1313
#
1414

15-
# default when to stop retrying (settable via command line)
16-
default_maxwaitsecs=90
15+
# default when to stop retrying (override via command line)
16+
default_maxwaitsecs=120
1717

1818
# optional test mode that lists what would be deleted without
19-
# actually deleting (settable via command line)
19+
# actually deleting (override via command line)
2020
test_mode=false
2121

2222

@@ -33,10 +33,27 @@ cat << EOF
3333
or all artifacts were deleted (default $default_maxwaitsecs seconds).
3434
3535
The domains can be specified as a comma-separated list of
36-
domain-uids (no spaces), or the keyword 'all'.
36+
domain-uids (no spaces), or the keyword 'all'. The domains can be
37+
located in any kubernetes namespace.
3738
3839
Specify '-t' to run the script in a test mode which will
39-
show delete commands but not actually perform them.
40+
show kubernetes commands but not actually perform them.
41+
42+
The delete occurs in three phases:
43+
44+
Phase 1: Set the startupControl of each domain to NONE
45+
if it's not already NONE. This should cause each
46+
domain's operator to initiate a controlled
47+
shutdown of the domain.
48+
49+
Phase 2: Wait up to half the max wait seconds
50+
for WebLogic Server pods to exit normally.
51+
52+
Phase 3: Delete all kubernetes objects for the
53+
specified domains, including any pods
54+
leftover from phase 2. Give up if max
55+
seconds is exceeded and there are any
56+
leftover kubernetes objects for the domain(s).
4057
4158
This script exits with a zero status on success, and a
4259
non-zero status on failure.
@@ -45,18 +62,27 @@ EOF
4562

4663

4764
#
48-
# getDomain
49-
# - get all k8s artifacts for domain $1 using label search weblogic.domainUID in $1
50-
# - if $1 has special value "all" then get the k8s artifacts for all domains
65+
# getDomains domain(s) outfilename
66+
#
67+
# Usage:
68+
# getDomains domainA,domainB,... outfilename
69+
# getDomains all outfilename
70+
#
71+
# Internal helper function
5172
#
52-
function getDomain {
73+
# File output is all domain related artifacts for the given domain uids, one per line,
74+
# in the form: 'kind name [-n namespace]'. For example
75+
# PersistentVolumeClaim domain1-pv-claim -n default
76+
# PersistentVolume domain1-pv
77+
#
78+
function getDomains {
5379
if [ "$1" = "all" ]; then
5480
local label_selector="weblogic.domainUID"
5581
else
5682
local label_selector="weblogic.domainUID in ($1)"
5783
fi
5884

59-
# get all namespaced types with -l $label_selector
85+
# first, let's get all namespaced types with -l $label_selector
6086

6187
local namespaced_types="pod,job,deploy,rs,service,pvc,ingress,cm,serviceaccount,role,rolebinding,secret"
6288

@@ -69,53 +95,108 @@ function getDomain {
6995
kubectl get $namespaced_types \
7096
-l "$label_selector" \
7197
-o=jsonpath='{range .items[*]}{.kind}{" "}{.metadata.name}{" -n "}{.metadata.namespace}{"\n"}{end}' \
72-
--all-namespaces=true
98+
--all-namespaces=true > $2
7399

74-
# get all non-namespaced types with -l $label_selector
100+
# now, get all non-namespaced types with -l $label_selector
75101

76102
kubectl get pv,crd,clusterroles,clusterrolebindings \
77103
-l "$label_selector" \
78104
-o=jsonpath='{range .items[*]}{.kind}{" "}{.metadata.name}{"\n"}{end}' \
79-
--all-namespaces=true
105+
--all-namespaces=true >> $2
80106
}
81107

82108
#
83-
# deleteDomain
84-
# - delete all k8s artifacts for domain $1 and retry up to $2 seconds
85-
# - if $1 has special value "all" then delete the k8s artifacts for all domains
86-
# - $2 is optional, default is $default_maxwaitsecs
87-
# - if $test_mode is true, show deletes but don't actually perform them
88-
function deleteDomain {
109+
# deleteDomains domain(s) maxwaitsecs
110+
#
111+
# Usage:
112+
# deleteDomains domainA,domainB,... maxwaitsecs
113+
# deleteDomains all maxwaitsecs
114+
#
115+
# Internal helper function
116+
# This function first sets the startupControl of each Domain to NONE
117+
# and waits up to half of $2 for pods to 'self delete'. It then deletes
118+
# all remaining k8s artifacts for domain $1 (including any remaining pods)
119+
# and retries up to $2 seconds.
120+
#
121+
# If $1 has special value "all", it deletes all domains in all namespaces.
122+
#
123+
# If global $test_mode is true, show candidate actions but don't actually perform them
124+
#
125+
function deleteDomains {
89126

90127
if [ "$test_mode" = "true" ]; then
91-
echo @@ Test mode. Delete commands for kubernetes artifacts with label weblogic.domainUID \'$1\'.
128+
echo @@ Test mode! Displaying commands for deleting kubernetes artifacts with label weblogic.domainUID \'$1\' without actually deleting them.
92129
else
93130
echo @@ Deleting kubernetes artifacts with label weblogic.domainUID \'$1\'.
94131
fi
95132

96133
local maxwaitsecs=${2:-$default_maxwaitsecs}
97134
local tempfile="/tmp/getdomain.tmp.$1.$$"
98135
local mstart=`date +%s`
136+
local phase=1
99137

100138
while : ; do
101-
getDomain $1 > $tempfile
102-
local count=`wc -l $tempfile | awk '{ print $1 }'`
139+
# get all k8s objects with matching domain-uid labels and put them in $tempfile
140+
getDomains $1 $tempfile
141+
142+
# get a count of all k8s objects with matching domain-uid labels
143+
local allcount=`wc -l $tempfile | awk '{ print $1 }'`
144+
145+
# get a count of all WLS pods (any pod with a matching domain-uid label that doesn't have 'traefik' embedded in its name)
146+
local podcount=`grep "^Pod" $tempfile | grep -v traefik | wc -l | awk '{ print $1 }'`
103147

104148
local mnow=`date +%s`
105149

106-
echo @@ $count objects remaining after $((mnow - mstart)) seconds. Max wait is $maxwaitsecs seconds.
107-
if [ $count -eq 0 ]; then
150+
echo @@ $allcount objects remaining after $((mnow - mstart)) seconds, including $podcount WebLogic Server pods. Max wait is $maxwaitsecs seconds.
151+
152+
# Exit if all k8s objects deleted are max wait seconds exceeded.
153+
154+
if [ $allcount -eq 0 ]; then
108155
echo @@ Success.
109156
rm -f $tempfile
110157
exit 0
111-
fi
112-
113-
if [ $((mnow - mstart)) -gt $maxwaitsecs ]; then
114-
echo @@ Error. Max wait of $maxwaitsecs seconds exceeded with $count objects remaining. giving up. Remaining objects:
158+
elif [ $((mnow - mstart)) -gt $maxwaitsecs ]; then
159+
echo @@ Error! Max wait of $maxwaitsecs seconds exceeded with $allcount objects remaining, including $podcount WebLogic Server pods. Giving up. Remaining objects:
115160
cat $tempfile
116161
rm -f $tempfile
117-
exit $count
162+
exit $allcount
163+
fi
164+
165+
# In phase 1, set the startupControl of each domain to NONE and then immediately
166+
# proceed to phase 2. If there are no domains or WLS pods, we also immediately go to phase 2.
167+
168+
if [ $phase -eq 1 -a $podcount -gt 0 ]; then
169+
echo @@ "Setting startupControl to NONE on each domain (this should cause operator(s) to initiate a controlled shutdown of the domain's pods.)"
170+
cat $tempfile | grep "^Domain" | while read line; do
171+
local name="`echo $line | awk '{ print $2 }'`"
172+
local namespace="`echo $line | awk '{ print $4 }'`"
173+
if [ "$test_mode" = "true" ]; then
174+
echo "kubectl patch domain $name -n $namespace -p '{\"spec\":{\"startupControl\":\"NONE\"}}' --type merge"
175+
else
176+
kubectl patch domain $name -n $namespace -p '{"spec":{"startupControl":"NONE"}}' --type merge
177+
fi
178+
done
118179
fi
180+
phase=2
181+
182+
# In phase 2, wait for the WLS pod count to go down to 0 for at most half
183+
# of 'maxwaitsecs'. Otherwise proceed immediately to phase 3.
184+
185+
if [ $phase -eq 2 ]; then
186+
if [ $podcount -eq 0 ]; then
187+
echo @@ All pods shutdown, about to directly delete remaining artifacts.
188+
elif [ $((mnow - mstart)) -gt $((maxwaitsecs / 2)) ]; then
189+
echo @@ Warning! $podcount WebLogic Server pods remaining but wait time exceeds half of max wait seconds. About to directly delete all remaining artifacts, including the leftover pods.
190+
else
191+
echo @@ "Waiting for operator to shutdown pods (will wait for no more than half of max wait seconds before directly deleting them)."
192+
sleep 3
193+
continue
194+
fi
195+
fi
196+
phase=3
197+
198+
# In phase 3, directly delete all k8s artifacts for the given domainUids
199+
# (including any leftover WLS pods from phases 1 & 2).
119200

120201
cat $tempfile | while read line; do
121202
if [ "$test_mode" = "true" ]; then
@@ -158,8 +239,8 @@ if [ "$domains" = "" ]; then
158239
fi
159240

160241
if [ ! -x "$(command -v kubectl)" ]; then
161-
echo "@@ Error. kubectl is not installed."
242+
echo "@@ Error! kubectl is not installed."
162243
exit 9999
163244
fi
164245

165-
deleteDomain "${domains}" "${maxwaitsecs}"
246+
deleteDomains "${domains}" "${maxwaitsecs:-$default_maxwaitsecs}"

0 commit comments

Comments
 (0)