Skip to content

Commit 2504d4c

Browse files
committed
Correct server status checking
1 parent 2aa91b7 commit 2504d4c

File tree

13 files changed

+180
-82
lines changed

13 files changed

+180
-82
lines changed

operator/src/main/java/oracle/kubernetes/operator/DomainProcessorImpl.java

Lines changed: 2 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import java.util.concurrent.ConcurrentMap;
2222
import java.util.concurrent.ScheduledFuture;
2323
import java.util.concurrent.TimeUnit;
24-
import java.util.concurrent.atomic.AtomicInteger;
2524
import javax.annotation.Nullable;
2625
import oracle.kubernetes.operator.TuningParameters.MainTuning;
2726
import oracle.kubernetes.operator.calls.CallResponse;
@@ -239,7 +238,7 @@ private static void onEvent(V1Event event) {
239238

240239
Optional.ofNullable(DOMAINS.get(event.getMetadata().getNamespace()))
241240
.map(m -> m.get(domainUid))
242-
.ifPresent(info -> info.setLastKnownServerStatus(serverName, status));
241+
.ifPresent(info -> info.updateLastKnownServerStatus(serverName, status));
243242
}
244243

245244
private static String getReadinessStatus(V1Event event) {
@@ -289,7 +288,6 @@ public void dispatchDomainWatch(Watch.Response<Domain> item) {
289288
*/
290289

291290
private void scheduleDomainStatusUpdating(DomainPresenceInfo info) {
292-
AtomicInteger unchangedCount = new AtomicInteger(0);
293291
Runnable command =
294292
new Runnable() {
295293
public void run() {
@@ -311,46 +309,11 @@ public void run() {
311309
packet,
312310
new CompletionCallback() {
313311
@Override
314-
public void onCompletion(Packet packet) {
315-
Boolean isStatusUnchanged =
316-
(Boolean) packet.get(ProcessingConstants.STATUS_UNCHANGED);
317-
if (Boolean.TRUE.equals(isStatusUnchanged)) {
318-
if (unchangedCount.incrementAndGet()
319-
== main.unchangedCountToDelayStatusRecheck) {
320-
// slow down retries because of sufficient unchanged statuses
321-
registerStatusUpdater(
322-
info.getNamespace(),
323-
info.getDomainUID(),
324-
delegate.scheduleWithFixedDelay(
325-
r,
326-
main.eventualLongDelay,
327-
main.eventualLongDelay,
328-
TimeUnit.SECONDS));
329-
}
330-
} else {
331-
// reset to trying after shorter delay because of changed status
332-
unchangedCount.set(0);
333-
registerStatusUpdater(
334-
info.getNamespace(),
335-
info.getDomainUID(),
336-
delegate.scheduleWithFixedDelay(
337-
r,
338-
main.initialShortDelay,
339-
main.initialShortDelay,
340-
TimeUnit.SECONDS));
341-
}
342-
}
312+
public void onCompletion(Packet packet) {}
343313

344314
@Override
345315
public void onThrowable(Packet packet, Throwable throwable) {
346316
LOGGER.severe(MessageKeys.EXCEPTION, throwable);
347-
// retry to trying after shorter delay because of exception
348-
unchangedCount.set(0);
349-
registerStatusUpdater(
350-
info.getNamespace(),
351-
info.getDomainUID(),
352-
delegate.scheduleWithFixedDelay(
353-
r, main.initialShortDelay, main.initialShortDelay, TimeUnit.SECONDS));
354317
}
355318
});
356319
} catch (Throwable t) {

operator/src/main/java/oracle/kubernetes/operator/DomainStatusUpdater.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,6 @@ public NextAction apply(Packet packet) {
146146
}
147147
}
148148

149-
// This will control if we need to re-check states soon or if we can slow down checks
150-
packet.put(ProcessingConstants.STATUS_UNCHANGED, !status.isModified());
151-
152149
if (status.isModified()) {
153150
LOGGER.info(MessageKeys.DOMAIN_STATUS, context.getInfo().getDomainUID(), status);
154151
}

operator/src/main/java/oracle/kubernetes/operator/Main.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,10 @@ private static String computeOperatorNamespace() {
495495
}
496496

497497
private static Collection<String> getTargetNamespaces() {
498-
return getTargetNamespaces(tuningAndConfig.get("targetNamespaces"), operatorNamespace);
498+
return getTargetNamespaces(
499+
Optional.ofNullable(System.getenv("OPERATOR_TARGET_NAMESPACES"))
500+
.orElse(tuningAndConfig.get("targetNamespaces")),
501+
operatorNamespace);
499502
}
500503

501504
private static class DomainListStep extends ResponseStep<DomainList> {

operator/src/main/java/oracle/kubernetes/operator/ProcessingConstants.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@ public interface ProcessingConstants {
2626
public static final String SERVER_STATE_MAP = "serverStateMap";
2727
public static final String SERVER_HEALTH_MAP = "serverHealthMap";
2828

29-
public static final String STATUS_UNCHANGED = "statusUnchanged";
30-
3129
public static final String DOMAIN_TOPOLOGY = "domainTopology";
3230
public static final String JOB_POD_NAME = "jobPodName";
3331
public static final String DOMAIN_INTROSPECTOR_JOB = "domainIntrospectorJob";

operator/src/main/java/oracle/kubernetes/operator/ServerStatusReader.java

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.util.stream.Collectors;
2626
import oracle.kubernetes.operator.helpers.ClientPool;
2727
import oracle.kubernetes.operator.helpers.DomainPresenceInfo;
28+
import oracle.kubernetes.operator.helpers.LastKnownStatus;
2829
import oracle.kubernetes.operator.helpers.PodHelper;
2930
import oracle.kubernetes.operator.logging.LoggingFacade;
3031
import oracle.kubernetes.operator.logging.LoggingFactory;
@@ -37,6 +38,7 @@
3738
import oracle.kubernetes.operator.work.Packet;
3839
import oracle.kubernetes.operator.work.Step;
3940
import oracle.kubernetes.weblogic.domain.model.ServerHealth;
41+
import org.joda.time.DateTime;
4042

4143
/** Creates an asynchronous step to read the WebLogic server state from a particular pod. */
4244
public class ServerStatusReader {
@@ -121,16 +123,23 @@ public NextAction apply(Packet packet) {
121123
ConcurrentMap<String, String> serverStateMap =
122124
(ConcurrentMap<String, String>) packet.get(SERVER_STATE_MAP);
123125

126+
TuningParameters.MainTuning main = TuningParameters.getInstance().getMainTuning();
127+
LastKnownStatus lastKnownStatus = info.getLastKnownServerStatus(serverName);
128+
if (lastKnownStatus != null
129+
&& lastKnownStatus.getUnchangedCount() >= main.unchangedCountToDelayStatusRecheck) {
130+
if (DateTime.now()
131+
.isBefore(lastKnownStatus.getTime().plusSeconds((int) main.eventualLongDelay))) {
132+
String state = lastKnownStatus.getStatus();
133+
info.updateLastKnownServerStatus(serverName, state);
134+
serverStateMap.put(serverName, state);
135+
return doNext(packet);
136+
}
137+
}
138+
124139
if (PodHelper.getReadyStatus(pod)) {
125-
info.setLastKnownServerStatus(serverName, WebLogicConstants.RUNNING_STATE);
140+
info.updateLastKnownServerStatus(serverName, WebLogicConstants.RUNNING_STATE);
126141
serverStateMap.put(serverName, WebLogicConstants.RUNNING_STATE);
127142
return doNext(packet);
128-
} else {
129-
String lastKnownState = info.getLastKnownServerStatus(serverName);
130-
if (lastKnownState != null) {
131-
serverStateMap.put(serverName, lastKnownState);
132-
return doNext(packet);
133-
}
134143
}
135144

136145
// Even though we don't need input data for this call, the API server is
@@ -153,8 +162,12 @@ public NextAction apply(Packet packet) {
153162

154163
InputStream in = proc.getInputStream();
155164
if (proc.waitFor(timeoutSeconds, TimeUnit.SECONDS)) {
156-
try (final Reader reader = new InputStreamReader(in, Charsets.UTF_8)) {
157-
state = CharStreams.toString(reader);
165+
if (proc.exitValue() == 0) {
166+
try (final Reader reader = new InputStreamReader(in, Charsets.UTF_8)) {
167+
state = CharStreams.toString(reader);
168+
}
169+
} else {
170+
state = WebLogicConstants.UNKNOWN_STATE;
158171
}
159172
}
160173
} catch (InterruptedException ignore) {
@@ -168,11 +181,25 @@ public NextAction apply(Packet packet) {
168181
}
169182
}
170183

171-
serverStateMap.put(
172-
serverName, state != null ? state.trim() : WebLogicConstants.UNKNOWN_STATE);
184+
state = chooseStateOrLastKnownServerStatus(lastKnownStatus, state);
185+
serverStateMap.put(serverName, state);
186+
info.updateLastKnownServerStatus(serverName, state);
173187
fiber.resume(packet);
174188
});
175189
}
190+
191+
private String chooseStateOrLastKnownServerStatus(
192+
LastKnownStatus lastKnownStatus, String state) {
193+
if (state != null) {
194+
state = state.trim();
195+
if (!state.isEmpty()) {
196+
return state;
197+
}
198+
}
199+
return (lastKnownStatus != null)
200+
? lastKnownStatus.getStatus()
201+
: WebLogicConstants.UNKNOWN_STATE;
202+
}
176203
}
177204

178205
private static class ServerHealthStep extends Step {

operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -144,11 +144,24 @@ public void setServerPodFromEvent(String serverName, V1Pod event) {
144144
}
145145

146146
private void updateStatus(String serverName, V1Pod event) {
147-
if (PodHelper.isReady(event)) {
148-
getSko(serverName).getLastKnownStatus().set(WebLogicConstants.RUNNING_STATE);
149-
} else {
150-
getSko(serverName).getLastKnownStatus().compareAndSet(WebLogicConstants.RUNNING_STATE, null);
151-
}
147+
getSko(serverName)
148+
.getLastKnownStatus()
149+
.getAndUpdate(
150+
lastKnownStatus -> {
151+
LastKnownStatus updatedStatus = lastKnownStatus;
152+
if (PodHelper.isReady(event)) {
153+
if (lastKnownStatus == null
154+
|| !WebLogicConstants.RUNNING_STATE.equals(lastKnownStatus.getStatus())) {
155+
updatedStatus = new LastKnownStatus(WebLogicConstants.RUNNING_STATE);
156+
}
157+
} else {
158+
if (lastKnownStatus != null
159+
&& WebLogicConstants.RUNNING_STATE.equals(lastKnownStatus.getStatus())) {
160+
updatedStatus = null;
161+
}
162+
}
163+
return updatedStatus;
164+
});
152165
}
153166

154167
private V1Pod getNewerPod(V1Pod first, V1Pod second) {
@@ -171,7 +184,8 @@ public boolean deleteServerPodFromEvent(String serverName, V1Pod event) {
171184
if (serverName == null) return false;
172185
ServerKubernetesObjects sko = getSko(serverName);
173186
V1Pod deletedPod = sko.getPod().getAndAccumulate(event, this::getNewerCurrentOrNull);
174-
if (deletedPod != null) sko.getLastKnownStatus().set(WebLogicConstants.SHUTDOWN_STATE);
187+
if (deletedPod != null)
188+
sko.getLastKnownStatus().set(new LastKnownStatus(WebLogicConstants.SHUTDOWN_STATE));
175189
return deletedPod != null;
176190
}
177191

@@ -195,18 +209,30 @@ public V1Pod removeServerPod(String serverName) {
195209
* @param serverName the name of the server
196210
* @return the corresponding reported status
197211
*/
198-
public String getLastKnownServerStatus(String serverName) {
212+
public LastKnownStatus getLastKnownServerStatus(String serverName) {
199213
return getSko(serverName).getLastKnownStatus().get();
200214
}
201215

202216
/**
203-
* Setss the last status reported for the specified server.
217+
* Updates the last status reported for the specified server.
204218
*
205219
* @param serverName the name of the server
206220
* @param status the new status
207221
*/
208-
public void setLastKnownServerStatus(String serverName, String status) {
209-
getSko(serverName).getLastKnownStatus().set(status);
222+
public void updateLastKnownServerStatus(String serverName, String status) {
223+
getSko(serverName)
224+
.getLastKnownStatus()
225+
.getAndUpdate(
226+
lastKnownStatus -> {
227+
LastKnownStatus updatedStatus = null;
228+
if (status != null) {
229+
updatedStatus =
230+
(lastKnownStatus != null && status.equals(lastKnownStatus.getStatus()))
231+
? new LastKnownStatus(status, lastKnownStatus.getUnchangedCount() + 1)
232+
: new LastKnownStatus(status);
233+
}
234+
return updatedStatus;
235+
});
210236
}
211237

212238
/**
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Copyright 2019, Oracle Corporation and/or its affiliates. All rights reserved.
2+
// Licensed under the Universal Permissive License v 1.0 as shown at
3+
// http://oss.oracle.com/licenses/upl.
4+
5+
package oracle.kubernetes.operator.helpers;
6+
7+
import org.apache.commons.lang3.builder.EqualsBuilder;
8+
import org.apache.commons.lang3.builder.HashCodeBuilder;
9+
import org.apache.commons.lang3.builder.ToStringBuilder;
10+
import org.joda.time.DateTime;
11+
12+
public class LastKnownStatus {
13+
private final String status;
14+
private final int unchangedCount;
15+
private final DateTime time;
16+
17+
public LastKnownStatus(String status) {
18+
this(status, 0);
19+
}
20+
21+
public LastKnownStatus(String status, int unchangedCount) {
22+
this.status = status;
23+
this.unchangedCount = unchangedCount;
24+
this.time = new DateTime();
25+
}
26+
27+
public String getStatus() {
28+
return status;
29+
}
30+
31+
public int getUnchangedCount() {
32+
return unchangedCount;
33+
}
34+
35+
public DateTime getTime() {
36+
return time;
37+
}
38+
39+
@Override
40+
public String toString() {
41+
return new ToStringBuilder(this)
42+
.append("status", status)
43+
.append("unchangedCount", unchangedCount)
44+
.append("time", time)
45+
.toString();
46+
}
47+
48+
@Override
49+
public boolean equals(Object o) {
50+
if (this == o) {
51+
return true;
52+
}
53+
54+
if (o == null || getClass() != o.getClass()) {
55+
return false;
56+
}
57+
58+
LastKnownStatus that = (LastKnownStatus) o;
59+
60+
// ignore time
61+
return new EqualsBuilder()
62+
.append(status, that.status)
63+
.append(unchangedCount, that.unchangedCount)
64+
.isEquals();
65+
}
66+
67+
@Override
68+
public int hashCode() {
69+
// ignore time
70+
return new HashCodeBuilder().append(status).append(unchangedCount).toHashCode();
71+
}
72+
}

operator/src/main/java/oracle/kubernetes/operator/helpers/ServerKubernetesObjects.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
/** Kubernetes pods and services associated with a single WebLogic server. */
1212
class ServerKubernetesObjects {
1313
private final AtomicReference<V1Pod> pod = new AtomicReference<>(null);
14-
private final AtomicReference<String> lastKnownStatus = new AtomicReference<>(null);
14+
private final AtomicReference<LastKnownStatus> lastKnownStatus = new AtomicReference<>(null);
1515
private final AtomicReference<V1Service> service = new AtomicReference<>(null);
1616
private final AtomicReference<V1Service> externalService = new AtomicReference<>();
1717

@@ -31,7 +31,7 @@ AtomicReference<V1Pod> getPod() {
3131
*
3232
* @return Status
3333
*/
34-
AtomicReference<String> getLastKnownStatus() {
34+
AtomicReference<LastKnownStatus> getLastKnownStatus() {
3535
return lastKnownStatus;
3636
}
3737

operator/src/main/resources/scripts/readState.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,15 @@ DH=${DOMAIN_HOME?}
2020

2121
STATEFILE=/${DH}/servers/${SN}/data/nodemanager/${SN}.state
2222

23-
if [ `jps -l | grep -c " weblogic.NodeManager"` -eq 0 ]; then
24-
trace "Error: WebLogic NodeManager process not found."
25-
exit 1
23+
if [ `jps -l | grep -c " -Dweblogic.Name=${SERVER_NAME} "` -eq 0 ]; then
24+
trace "WebLogic server process not found"
25+
echo SHUTDOWN
26+
exit 0
2627
fi
2728

2829
if [ ! -f ${STATEFILE} ]; then
2930
trace "Error: WebLogic Server state file not found."
30-
exit 2
31+
exit 1
3132
fi
3233

3334
cat ${STATEFILE} | cut -f 1 -d ':'

operator/src/main/resources/scripts/stopServer.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ trace "After stop-server.py" &>> /u01/oracle/stopserver.out
6868
# but let's try looking for the server process and
6969
# kill the server if the process still exists,
7070
# just in case we failed to stop it via wlst
71-
pid=$(jps -v | grep "[D]weblogic.Name=${SERVER_NAME}" | awk '{print $1}')
71+
pid=$(jps -v | grep " -Dweblogic.Name=${SERVER_NAME} " | awk '{print $1}')
7272
if [ ! -z $pid ]; then
7373
echo "Killing the server process $pid" &>> /u01/oracle/stopserver.out
7474
kill -15 $pid
@@ -77,7 +77,7 @@ fi
7777
# stop node manager process
7878
#
7979
trace "Stopping NodeManager" &>> /u01/oracle/stopserver.out
80-
pid=$(jps | grep "NodeManager" | awk '{print $1}')
80+
pid=$(jps | grep "weblogic.NodeManager" | awk '{print $1}')
8181
trace "PID=[${pid}]" &>> /u01/oracle/stopserver.out
8282
if [ ! -z $pid ]; then
8383
echo "Killing NodeManager process $pid" &>> /u01/oracle/stopserver.out

0 commit comments

Comments
 (0)