Skip to content

Commit 1a66d37

Browse files
committed
Improve logging of retry and cap wait time
1 parent 3edbfe6 commit 1a66d37

File tree

5 files changed

+61
-55
lines changed

5 files changed

+61
-55
lines changed

src/main/java/oracle/kubernetes/operator/helpers/CallBuilder.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,6 +1225,7 @@ public interface RetryStrategy {
12251225
private static final Random R = new Random();
12261226
private static final int HIGH = 1000;
12271227
private static final int LOW = 100;
1228+
private static final int MAX = 30000;
12281229

12291230
private final class DefaultRetryStrategy implements RetryStrategy {
12301231
private long retryCount = 0;
@@ -1247,24 +1248,26 @@ public NextAction doPotentialRetry(Step conflictStep, Packet packet, ApiExceptio
12471248
statusCode == 504 /* StatusServerTimeout */) {
12481249

12491250
// exponential back-off
1250-
long waitTime = (2 << ++retryCount) * 1000 + (R.nextInt(HIGH - LOW) + LOW);
1251+
long waitTime = Math.min((2 << ++retryCount) * 1000 + (R.nextInt(HIGH - LOW) + LOW), MAX);
12511252

12521253
if (statusCode == 0 || statusCode == 504 /* StatusServerTimeout */) {
12531254
// increase server timeout
12541255
timeoutSeconds *= 2;
12551256
}
12561257

1258+
LOGGER.info(MessageKeys.ASYNC_RETRY, String.valueOf(waitTime));
12571259
NextAction na = new NextAction();
12581260
na.delay(retryStep, packet, waitTime, TimeUnit.MILLISECONDS);
12591261
return na;
12601262
} else if (statusCode == 409 /* Conflict */ && conflictStep != null) {
12611263
// Conflict is an optimistic locking failure. Therefore, we can't
12621264
// simply retry the request. Instead, application code needs to rebuild
1263-
// the request based on latest contents. If provided, a confict step will do that.
1265+
// the request based on latest contents. If provided, a conflict step will do that.
12641266

12651267
// exponential back-off
1266-
long waitTime = (2 << ++retryCount) * 1000 + (R.nextInt(HIGH - LOW) + LOW);
1268+
long waitTime = Math.min((2 << ++retryCount) * 1000 + (R.nextInt(HIGH - LOW) + LOW), MAX);
12671269

1270+
LOGGER.info(MessageKeys.ASYNC_RETRY, String.valueOf(waitTime));
12681271
NextAction na = new NextAction();
12691272
na.delay(conflictStep, packet, waitTime, TimeUnit.MILLISECONDS);
12701273
return na;

src/main/java/oracle/kubernetes/operator/helpers/ResponseStep.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ private NextAction doPotentialRetry(Step conflictStep, Packet packet, ApiExcepti
102102
return retryStrategy.doPotentialRetry(conflictStep, packet, e, statusCode, responseHeaders);
103103
}
104104

105-
LOGGER.info(MessageKeys.ASYNC_NO_RETRY, e != null ? e.getMessage() : "", statusCode, responseHeaders != null ? responseHeaders.toString() : "");
105+
LOGGER.warning(MessageKeys.ASYNC_NO_RETRY, e != null ? e.getMessage() : "", statusCode, responseHeaders != null ? responseHeaders.toString() : "");
106106
return null;
107107
}
108108

src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -104,29 +104,30 @@ private MessageKeys() {
104104
public static final String ASYNC_FAILURE = "WLSKO-0088";
105105
public static final String ASYNC_SUCCESS = "WLSKO-0089";
106106
public static final String ASYNC_NO_RETRY = "WLSKO-0090";
107-
public static final String ASYNC_TIMEOUT = "WLSKO-0091";
108-
public static final String DOMAIN_STATUS = "WLSKO-0092";
109-
public static final String INVALID_MANAGE_SERVER_COUNT = "WLSKO-0093";
110-
public static final String SCALE_COUNT_GREATER_THAN_CONFIGURED = "WLSKO-0094";
111-
public static final String SCALING_AUTO_CONTROL_AUTO = "WLSKO-0095";
112-
public static final String MATCHING_DOMAIN_NOT_FOUND = "WLSKO-0096";
113-
public static final String INVALID_DOMAIN_UID = "WLSKO-0097";
114-
public static final String NULL_DOMAIN_UID = "WLSKO-0098";
115-
public static final String NULL_TOKEN_REVIEW_STATUS = "WLSKO-0099";
116-
public static final String NULL_USER_INFO = "WLSKO-0100";
117-
public static final String RESOURCE_BUNDLE_NOT_FOUND = "WLSKO-0101";
118-
public static final String RESTART_ADMIN_COMPLETE = "WLSKO-0102";
119-
public static final String RESTART_SERVERS_COMPLETE = "WLSKO-0103";
120-
public static final String ROLLING_CLUSTERS_COMPLETE = "WLSKO-0104";
121-
public static final String RESTART_ADMIN_STARTING = "WLSKO-0105";
122-
public static final String RESTART_SERVERS_STARTING = "WLSKO-0106";
123-
public static final String ROLLING_CLUSTERS_STARTING = "WLSKO-0107";
124-
public static final String CYCLING_SERVERS = "WLSKO-0108";
125-
public static final String ROLLING_SERVERS = "WLSKO-0109";
126-
public static final String REMOVING_INGRESS = "WLSKO-0110";
127-
public static final String LIST_INGRESS_FOR_DOMAIN = "WLSKO-0111";
128-
public static final String POD_DELETED = "WLSKO-0112";
129-
public static final String SERVICE_DELETED = "WLSKO-0113";
130-
public static final String INGRESS_DELETED = "WLSKO-0114";
107+
public static final String ASYNC_RETRY = "WLSKO-0091";
108+
public static final String ASYNC_TIMEOUT = "WLSKO-0092";
109+
public static final String DOMAIN_STATUS = "WLSKO-0093";
110+
public static final String INVALID_MANAGE_SERVER_COUNT = "WLSKO-0094";
111+
public static final String SCALE_COUNT_GREATER_THAN_CONFIGURED = "WLSKO-0095";
112+
public static final String SCALING_AUTO_CONTROL_AUTO = "WLSKO-0096";
113+
public static final String MATCHING_DOMAIN_NOT_FOUND = "WLSKO-0097";
114+
public static final String INVALID_DOMAIN_UID = "WLSKO-0098";
115+
public static final String NULL_DOMAIN_UID = "WLSKO-0099";
116+
public static final String NULL_TOKEN_REVIEW_STATUS = "WLSKO-0100";
117+
public static final String NULL_USER_INFO = "WLSKO-0101";
118+
public static final String RESOURCE_BUNDLE_NOT_FOUND = "WLSKO-0102";
119+
public static final String RESTART_ADMIN_COMPLETE = "WLSKO-0103";
120+
public static final String RESTART_SERVERS_COMPLETE = "WLSKO-0104";
121+
public static final String ROLLING_CLUSTERS_COMPLETE = "WLSKO-0105";
122+
public static final String RESTART_ADMIN_STARTING = "WLSKO-0106";
123+
public static final String RESTART_SERVERS_STARTING = "WLSKO-0107";
124+
public static final String ROLLING_CLUSTERS_STARTING = "WLSKO-0108";
125+
public static final String CYCLING_SERVERS = "WLSKO-0109";
126+
public static final String ROLLING_SERVERS = "WLSKO-0110";
127+
public static final String REMOVING_INGRESS = "WLSKO-0111";
128+
public static final String LIST_INGRESS_FOR_DOMAIN = "WLSKO-0112";
129+
public static final String POD_DELETED = "WLSKO-0113";
130+
public static final String SERVICE_DELETED = "WLSKO-0114";
131+
public static final String INGRESS_DELETED = "WLSKO-0115";
131132

132133
}

src/main/java/oracle/kubernetes/operator/wlsconfig/WlsConfigRetriever.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ public static Step readConfigStep(Step next) {
9292
private static final Random R = new Random();
9393
private static final int HIGH = 1000;
9494
private static final int LOW = 100;
95-
95+
private static final int MAX = 30000;
96+
9697
private static final class ReadConfigStep extends Step {
9798
public ReadConfigStep(Step next) {
9899
super(next);
@@ -133,7 +134,7 @@ public NextAction apply(Packet packet) {
133134
if (retryCount == null) {
134135
retryCount = 0;
135136
}
136-
long waitTime = (2 << ++retryCount) * 1000 + (R.nextInt(HIGH - LOW) + LOW);
137+
long waitTime = Math.min((2 << ++retryCount) * 1000 + (R.nextInt(HIGH - LOW) + LOW), MAX);
137138
packet.put(RETRY_COUNT, retryCount);
138139
return doRetry(packet, waitTime, TimeUnit.MILLISECONDS);
139140
}
@@ -182,7 +183,7 @@ public NextAction apply(Packet packet) {
182183
if (retryCount == null) {
183184
retryCount = 0;
184185
}
185-
long waitTime = (2 << ++retryCount) * 1000 + (R.nextInt(HIGH - LOW) + LOW);
186+
long waitTime = Math.min((2 << ++retryCount) * 1000 + (R.nextInt(HIGH - LOW) + LOW), MAX);
186187
packet.put(RETRY_COUNT, retryCount);
187188
return doRetry(packet, waitTime, TimeUnit.MILLISECONDS);
188189
}

src/main/resources/Operator.properties

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -89,29 +89,30 @@ WLSKO-0087=External Channel {0} port {1} is outside the NodePort range starting
8989
WLSKO-0088=Async call failed: {0}, code: {1}, headers {2} after invoking {3}, namespace: {4}, name: {5}, body: {6}, fieldSelector: {7}, labelSelector: {8}, resourceVersion: {9}
9090
WLSKO-0089=Async call succeeded with result: {0}, code: {1}, headers {2}
9191
WLSKO-0090=Async call will not be retried after message: {0}, code: {1}, headers {2}
92-
WLSKO-0091=Async call timed-out while invoking {0}, namespace: {1}, name: {2}, body: {3}, fieldSelector: {4}, labelSelector: {5}, resourceVersion: {6}
93-
WLSKO-0092=Status for Domain with UID {0} is now, available server: {1}, available clusters: {2}, unavailable servers: {3}, unavailable clusters: {4}, conditions: {5}
94-
WLSKO-0093=Specified managed server count parameter of {0} is invalid. Please specify a positive managed server count for scaling
95-
WLSKO-0094=Requested scaling count of {0} is greater than configured cluster size of {1} for WebLogic cluster {2}. Please increase the number of configured managed servers for WebLogic cluster {3}
96-
WLSKO-0095=Scaling of WLS Cluster {0} is only supported with Startup Control=AUTO
97-
WLSKO-0096=Domain matching {0} not found
98-
WLSKO-0097=Invalid domain UID: {0}
99-
WLSKO-0098=Null domainUID
100-
WLSKO-0099=Null V1TokenReviewStatus
101-
WLSKO-0100=Null userInfo {0}
102-
WLSKO-0101=Could not find the resource bundle
103-
WLSKO-0102=Restart of administration server for Domain with UID {0} has completed
104-
WLSKO-0103=Restart of servers for Domain with UID {0} in the list {1} has completed
105-
WLSKO-0104=Rolling restart of servers for Domain with UID {0} in the list of clusters {1} has completed
106-
WLSKO-0105=Restart of administration server for Domain with UID {0} is starting
107-
WLSKO-0106=Restart of servers for Domain with UID {0} in the list {1} is starting
108-
WLSKO-0107=Rolling restart of servers for Domain with UID {0} in the list of clusters {1} is starting
109-
WLSKO-0108=Cycling of servers for Domain with UID {0} in the list {1} now
110-
WLSKO-0109=Rolling of servers for Domain with UID {0} in the list {1} now with ready servers {2}
111-
WLSKO-0110=Removing Ingress {0} in namespace {1}
112-
WLSKO-0111=List Ingress for domain with domainUID {0} in namespace {1}
113-
WLSKO-0112=Pod for domain with domainUID {0} in namespace {1} and with server name {2} deleted; validating domain
114-
WLSKO-0113=Service for domain with domainUID {0} in namespace {1} and with server name {2} deleted; validating domain
115-
WLSKO-0114=Ingress for domain with domainUID {0} in namespace {1} and with cluster name {2} deleted; validating domain
92+
WLSKO-0091=Async call will be retried after delay: {0} ms
93+
WLSKO-0092=Async call timed-out while invoking {0}, namespace: {1}, name: {2}, body: {3}, fieldSelector: {4}, labelSelector: {5}, resourceVersion: {6}
94+
WLSKO-0093=Status for Domain with UID {0} is now, available server: {1}, available clusters: {2}, unavailable servers: {3}, unavailable clusters: {4}, conditions: {5}
95+
WLSKO-0094=Specified managed server count parameter of {0} is invalid. Please specify a positive managed server count for scaling
96+
WLSKO-0095=Requested scaling count of {0} is greater than configured cluster size of {1} for WebLogic cluster {2}. Please increase the number of configured managed servers for WebLogic cluster {3}
97+
WLSKO-0096=Scaling of WLS Cluster {0} is only supported with Startup Control=AUTO
98+
WLSKO-0097=Domain matching {0} not found
99+
WLSKO-0098=Invalid domain UID: {0}
100+
WLSKO-0099=Null domainUID
101+
WLSKO-0100=Null V1TokenReviewStatus
102+
WLSKO-0101=Null userInfo {0}
103+
WLSKO-0102=Could not find the resource bundle
104+
WLSKO-0103=Restart of administration server for Domain with UID {0} has completed
105+
WLSKO-0104=Restart of servers for Domain with UID {0} in the list {1} has completed
106+
WLSKO-0105=Rolling restart of servers for Domain with UID {0} in the list of clusters {1} has completed
107+
WLSKO-0106=Restart of administration server for Domain with UID {0} is starting
108+
WLSKO-0107=Restart of servers for Domain with UID {0} in the list {1} is starting
109+
WLSKO-0108=Rolling restart of servers for Domain with UID {0} in the list of clusters {1} is starting
110+
WLSKO-0109=Cycling of servers for Domain with UID {0} in the list {1} now
111+
WLSKO-0110=Rolling of servers for Domain with UID {0} in the list {1} now with ready servers {2}
112+
WLSKO-0111=Removing Ingress {0} in namespace {1}
113+
WLSKO-0112=List Ingress for domain with domainUID {0} in namespace {1}
114+
WLSKO-0113=Pod for domain with domainUID {0} in namespace {1} and with server name {2} deleted; validating domain
115+
WLSKO-0114=Service for domain with domainUID {0} in namespace {1} and with server name {2} deleted; validating domain
116+
WLSKO-0115=Ingress for domain with domainUID {0} in namespace {1} and with cluster name {2} deleted; validating domain
116117

117118

0 commit comments

Comments
 (0)