Skip to content

Commit 5c8483b

Browse files
committed
force shutdown admin server if Coherence exists otherwise Coherence MBeans are not found. Also reduce timeouts
1 parent 8a9a8ce commit 5c8483b

File tree

1 file changed

+17
-8
lines changed

1 file changed

+17
-8
lines changed

operator/src/main/resources/scripts/stop-server.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -105,20 +105,22 @@ def checkCoherenceClusterExist(configData):
105105
# The user must set that timeout to a large enough value to give Coherence time to get safe.
106106
def waitUntilCoherenceSafe():
107107
print ('Shutdown: getting all service Coherence MBeans')
108-
109-
domainRuntime()
110108
query='Coherence:type=PartitionAssignment,service=*,*'
111109

112110
# Wait forever until we get positive ack that it is ok to shutdown this server.
113111
done = False
112+
warnSleep = True
114113
while (not done):
115114
try:
115+
domainRuntime()
116116
beans = list(mbs.queryMBeans(ObjectName(query), None))
117117
if beans is None or len(beans) == 0:
118118
# during rolling restart the beans might not be available right away
119119
# we need to wait since we know Coherence is enabled
120-
print('Shutdown: Waiting until Coherence MBeans available... ')
121-
systime.sleep(5)
120+
if warnSleep:
121+
print('Shutdown: Waiting until Coherence MBeans available... ')
122+
warnSleep = False
123+
systime.sleep(1)
122124
continue
123125

124126
# Loop waiting for each service to be safe
@@ -133,7 +135,7 @@ def waitUntilCoherenceSafe():
133135
print ("Shutdown: Exception checking a service Coherence HAStatus, retrying...")
134136
traceback.print_exc(file=sys.stdout)
135137
dumpStack()
136-
systime.sleep(30)
138+
systime.sleep(10)
137139
pass
138140

139141

@@ -167,7 +169,7 @@ def waitUntilServiceSafeToShutdown(objectName):
167169
print ('Shutdown: An exception occurred getting Coherence MBeans, staying in loop checking for safe')
168170
traceback.print_exc(file=sys.stdout)
169171
dumpStack()
170-
systime.sleep(30)
172+
systime.sleep(10)
171173
pass
172174

173175

@@ -176,6 +178,7 @@ def waitUntilServiceSafeToShutdown(objectName):
176178
#----------------------------------
177179
print ("Shutdown: main script")
178180
domain_uid = getEnvVar('DOMAIN_UID')
181+
admin_name = getEnvVar('ADMIN_NAME')
179182
server_name = getEnvVar('SERVER_NAME')
180183
domain_name = getEnvVar('DOMAIN_NAME')
181184
domain_path = getEnvVar('DOMAIN_HOME')
@@ -209,6 +212,12 @@ def waitUntilServiceSafeToShutdown(objectName):
209212
if (cohExists):
210213
print ('Shutdown: Coherence cluster exists')
211214
connect_url = local_admin_protocol + '://' + admin_host + ':' + admin_port
215+
216+
# must use force shutdown for admin server since Coherence MBeans cannot be found after
217+
# a graceful admin server restart
218+
if admin_name == server_name:
219+
force = 'true'
220+
212221
else:
213222
print ('Shutdown: Coherence cluster does not exist')
214223
connect_url = local_admin_protocol + '://' + service_name + ':' + local_admin_port
@@ -233,7 +242,7 @@ def waitUntilServiceSafeToShutdown(objectName):
233242
waitUntilCoherenceSafe()
234243
cohSafe = True
235244

236-
print('Shutdown: Calling server shutdown')
245+
print('Shutdown: Calling server shutdown with force = ' + force)
237246
shutdown(server_name, 'Server', ignoreSessions=ignore_sessions, timeOut=int(timeout), block='true', force=force)
238247
print('Shutdown: Successfully shutdown the server')
239248

@@ -243,7 +252,7 @@ def waitUntilServiceSafeToShutdown(objectName):
243252
if (cohExists and not cohSafe):
244253
print('Shutdown: Coherence not safe to shutdown. Sleeping before connect retry ...')
245254
stayInConnectLoop = True
246-
systime.sleep(30)
255+
systime.sleep(10)
247256
else:
248257
try:
249258
shutdownUsingNodeManager(domain_name, domain_path)

0 commit comments

Comments
 (0)