Skip to content

Commit 614a966

Browse files
authored
Merge pull request #821 from oracle/owls-70947
OWLS-70974 - cyclic scaling test using REST API gets stuck after some time
2 parents 138fa5a + 28e105d commit 614a966

File tree

8 files changed

+389
-15
lines changed

8 files changed

+389
-15
lines changed

operator/src/main/java/oracle/kubernetes/operator/helpers/CallBuilder.java

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1-
// Copyright 2017, 2018, Oracle Corporation and/or its affiliates. All rights reserved.
1+
// Copyright 2017, 2019, Oracle Corporation and/or its affiliates. All rights reserved.
22
// Licensed under the Universal Permissive License v 1.0 as shown at
33
// http://oss.oracle.com/licenses/upl.
44

55
package oracle.kubernetes.operator.helpers;
66

7+
import static java.net.HttpURLConnection.HTTP_CONFLICT;
8+
79
import com.squareup.okhttp.Call;
810
import io.kubernetes.client.ApiCallback;
911
import io.kubernetes.client.ApiClient;
@@ -46,6 +48,9 @@
4648
import oracle.kubernetes.operator.calls.RequestParams;
4749
import oracle.kubernetes.operator.calls.SynchronousCallDispatcher;
4850
import oracle.kubernetes.operator.calls.SynchronousCallFactory;
51+
import oracle.kubernetes.operator.logging.LoggingFacade;
52+
import oracle.kubernetes.operator.logging.LoggingFactory;
53+
import oracle.kubernetes.operator.logging.MessageKeys;
4954
import oracle.kubernetes.operator.work.Step;
5055
import oracle.kubernetes.weblogic.domain.v2.Domain;
5156
import oracle.kubernetes.weblogic.domain.v2.DomainList;
@@ -58,6 +63,8 @@ public class CallBuilder {
5863
/** HTTP status code for "Not Found" */
5964
public static final int NOT_FOUND = 404;
6065

66+
private static final LoggingFacade LOGGER = LoggingFactory.getLogger("Operator", "Operator");
67+
6168
private static SynchronousCallDispatcher DISPATCHER =
6269
new SynchronousCallDispatcher() {
6370
@Override
@@ -159,6 +166,65 @@ public VersionInfo readVersionCode() throws ApiException {
159166
requestParams, ((client, params) -> new VersionApi(client).getCode()));
160167
}
161168

169+
/**
170+
* Class extended by callers to {@link
171+
* #executeSynchronousCallWithConflictRetry(RequestParamsBuilder, SynchronousCallFactory,
172+
* ConflictRetry)} for building the RequestParams to be passed to {@link
173+
* #executeSynchronousCall(RequestParams, SynchronousCallFactory)}.
174+
*
175+
* @param <T> Type of kubernetes object to be passed to the API
176+
*/
177+
abstract static class RequestParamsBuilder<T> {
178+
T body;
179+
180+
public RequestParamsBuilder(T body) {
181+
this.body = body;
182+
}
183+
184+
abstract RequestParams buildRequestParams();
185+
186+
void setBody(T body) {
187+
this.body = body;
188+
}
189+
}
190+
191+
private <T> T executeSynchronousCallWithConflictRetry(
192+
RequestParamsBuilder requestParamsBuilder,
193+
SynchronousCallFactory<T> factory,
194+
ConflictRetry<T> conflictRetry)
195+
throws ApiException {
196+
int retryCount = 0;
197+
while (retryCount == 0 || retryCount < maxRetryCount) {
198+
retryCount++;
199+
RequestParams requestParams = requestParamsBuilder.buildRequestParams();
200+
try {
201+
return executeSynchronousCall(requestParams, factory);
202+
} catch (ApiException apiException) {
203+
boolean retry = false;
204+
if (apiException.getCode() == HTTP_CONFLICT
205+
&& conflictRetry != null
206+
&& retryCount < maxRetryCount) {
207+
T body = conflictRetry.getUpdatedObject();
208+
if (body != null) {
209+
requestParamsBuilder.setBody(body);
210+
retry = true;
211+
LOGGER.fine(
212+
MessageKeys.SYNC_RETRY,
213+
requestParams.call,
214+
apiException.getCode(),
215+
apiException.getMessage(),
216+
retryCount,
217+
maxRetryCount);
218+
}
219+
}
220+
if (!retry) {
221+
throw apiException;
222+
}
223+
}
224+
}
225+
return null;
226+
}
227+
162228
private <T> T executeSynchronousCall(
163229
RequestParams requestParams, SynchronousCallFactory<T> factory) throws ApiException {
164230
return DISPATCHER.execute(factory, requestParams, helper);
@@ -295,6 +361,33 @@ public Step readDomainAsync(String name, String namespace, ResponseStep<Domain>
295361
.replaceWebLogicOracleV2NamespacedDomain(
296362
requestParams.name, requestParams.namespace, (Domain) requestParams.body, pretty);
297363

364+
/**
365+
* Replace domain
366+
*
367+
* @param uid the domain uid (unique within the k8s cluster)
368+
* @param namespace Namespace
369+
* @param body Body
370+
* @param conflictRetry ConflictRetry implementation to be called to obtain the latest version of
371+
* the Domain for retrying the replaceDomain synchronous call if previous call failed with
372+
* Conflict response code (409)
373+
* @return Replaced domain
374+
* @throws ApiException APIException
375+
*/
376+
public Domain replaceDomainWithConflictRetry(
377+
String uid, String namespace, Domain body, ConflictRetry<Domain> conflictRetry)
378+
throws ApiException {
379+
return executeSynchronousCallWithConflictRetry(
380+
new RequestParamsBuilder<Domain>(body) {
381+
382+
@Override
383+
RequestParams buildRequestParams() {
384+
return new RequestParams("replaceDomain", namespace, uid, body);
385+
}
386+
},
387+
REPLACE_DOMAIN_CALL,
388+
conflictRetry);
389+
}
390+
298391
/**
299392
* Replace domain
300393
*
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Copyright 2019, Oracle Corporation and/or its affiliates. All rights reserved.
2+
// Licensed under the Universal Permissive License v 1.0 as shown at
3+
// http://oss.oracle.com/licenses/upl.
4+
5+
package oracle.kubernetes.operator.helpers;
6+
7+
/**
8+
* Interface used by CallBuilder to obtain the latest version of the Kubernetes object for retrying
9+
* synchronous API calls that previously failed with Conflict response code (409). This indicates an
10+
* optimistic locking failure and the kubernetes object has since been modified. The synchoronus API
11+
* can be retried with the latest version of the kubernetes object.
12+
*
13+
* @param <T> Type of kubernetes object to be passed to the API
14+
*/
15+
public interface ConflictRetry<T> {
16+
17+
/**
18+
* @return The latest version of the kubernetes object for passing to the kubernetes API, or null
19+
* if the API should not be retried
20+
*/
21+
T getUpdatedObject();
22+
}

operator/src/main/java/oracle/kubernetes/operator/logging/MessageKeys.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2017, 2018, Oracle Corporation and/or its affiliates. All rights reserved.
1+
// Copyright 2017, 2019, Oracle Corporation and/or its affiliates. All rights reserved.
22
// Licensed under the Universal Permissive License v 1.0 as shown at
33
// http://oss.oracle.com/licenses/upl.
44

@@ -147,4 +147,5 @@ private MessageKeys() {}
147147
public static final String CANNOT_START_DOMAIN_AFTER_MAX_RETRIES = "WLSKO-0144";
148148
public static final String CYCLING_POD = "WLSKO-0145";
149149
public static final String REPLICAS_EXCEEDS_TOTAL_CLUSTER_SERVER_COUNT = "WLSKO-0146";
150+
public static final String SYNC_RETRY = "WLSKO-0147";
150151
}

operator/src/main/java/oracle/kubernetes/operator/rest/RestBackendImpl.java

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2017, 2018, Oracle Corporation and/or its affiliates. All rights reserved.
1+
// Copyright 2017, 2019, Oracle Corporation and/or its affiliates. All rights reserved.
22
// Licensed under the Universal Permissive License v 1.0 as shown at
33
// http://oss.oracle.com/licenses/upl.
44

@@ -27,6 +27,7 @@
2727
import oracle.kubernetes.operator.helpers.AuthorizationProxy.Resource;
2828
import oracle.kubernetes.operator.helpers.AuthorizationProxy.Scope;
2929
import oracle.kubernetes.operator.helpers.CallBuilder;
30+
import oracle.kubernetes.operator.helpers.ConflictRetry;
3031
import oracle.kubernetes.operator.logging.LoggingFacade;
3132
import oracle.kubernetes.operator.logging.LoggingFactory;
3233
import oracle.kubernetes.operator.logging.MessageKeys;
@@ -237,23 +238,30 @@ public void scaleCluster(String domainUID, String cluster, int managedServerCoun
237238

238239
verifyWLSConfiguredClusterCapacity(domain, cluster, managedServerCount);
239240

240-
updateReplicasForDomain(namespace, domain, cluster, managedServerCount);
241+
if (updateReplicasForDomain(domain, cluster, managedServerCount)) {
242+
overwriteDomain(
243+
namespace,
244+
domain,
245+
() -> getDomainForConflictRetry(domainUID, cluster, managedServerCount));
246+
}
241247
LOGGER.exiting();
242248
}
243249

244-
private void updateReplicasForDomain(
245-
String namespace, Domain domain, String cluster, int newReplicaCount) {
250+
private boolean updateReplicasForDomain(Domain domain, String cluster, int newReplicaCount) {
246251
if (newReplicaCount != domain.getReplicaCount(cluster)) {
247252
domain.setReplicaCount(cluster, newReplicaCount);
248-
overwriteDomain(namespace, domain);
253+
return true;
249254
}
255+
return false;
250256
}
251257

252-
private void overwriteDomain(String namespace, Domain domain) {
258+
private void overwriteDomain(
259+
String namespace, final Domain domain, ConflictRetry<Domain> conflictRetry) {
253260
try {
254261
// Write out the Domain with updated replica values
255262
// TODO: Can we patch instead of replace?
256-
new CallBuilder().replaceDomain(domain.getDomainUID(), namespace, domain);
263+
new CallBuilder()
264+
.replaceDomainWithConflictRetry(domain.getDomainUID(), namespace, domain, conflictRetry);
257265
} catch (ApiException e) {
258266
LOGGER.finer(
259267
String.format(
@@ -264,6 +272,14 @@ private void overwriteDomain(String namespace, Domain domain) {
264272
}
265273
}
266274

275+
Domain getDomainForConflictRetry(String domainUid, String cluster, int newReplicaCount) {
276+
Domain domain = findDomain(domainUid, getDomainsList());
277+
if (updateReplicasForDomain(domain, cluster, newReplicaCount)) {
278+
return domain;
279+
}
280+
return null;
281+
}
282+
267283
private void verifyWLSConfiguredClusterCapacity(
268284
Domain domain, String cluster, int requestedSize) {
269285
// Query WebLogic Admin Server for current configured WebLogic Cluster size

operator/src/main/resources/Operator.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,3 +145,4 @@ WLSKO-0143=Failed to parse file {0} from domain introspector for domain {1} due
145145
WLSKO-0144=Unable to start domain with domainUID {0} in namespace {1} after {2} attempts due to exception: {3}
146146
WLSKO-0145=Replacing pod {0} with {1}
147147
WLSKO-0146=Replica request of {0} exceeds the maximum dynamic server count + server count of {1} configured for cluster {2}
148+
WLSKO-0147=Call {0} has failed with code {1}: message {2}. It has failed {3} times and will be retried up to {4} times.

0 commit comments

Comments
 (0)