Skip to content

Commit 326ec06

Browse files
committed
Merge branch 'OWLS-109579' into 'main'
OWLS-109579 - Potential fixes for issues observed in CAGBU environment with large K8s cluster. See merge request weblogic-cloud/weblogic-kubernetes-operator!4271
2 parents c6bb1c0 + f45b235 commit 326ec06

File tree

5 files changed

+112
-21
lines changed

5 files changed

+112
-21
lines changed

operator/src/main/java/oracle/kubernetes/operator/DomainResourcesValidation.java

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ public void completeProcessing(Packet packet) {
104104
.forEach(info -> adjustClusterResources(c, info)));
105105
executeMakeRightForClusterEvents(dp);
106106
getActiveDomainPresenceInfos().forEach(info -> activateDomain(dp, info));
107+
getDomainPresenceInfoMap().values().forEach(DomainResourcesValidation.this::removeDeletedPodsFromDPI);
108+
getDomainPresenceInfoMap().values().forEach(DomainPresenceInfo::clearServerPodNamesFromList);
107109
}
108110
};
109111
}
@@ -134,15 +136,11 @@ private boolean isForDomain(ClusterResource clusterResource, DomainPresenceInfo
134136
}
135137

136138
private void addPodList(V1PodList list) {
137-
getDomainPresenceInfoMap().values().forEach(dpi -> removeDeletedPodsFromDPI(list, dpi));
138139
list.getItems().forEach(this::addPod);
139140
}
140141

141-
private void removeDeletedPodsFromDPI(V1PodList list, DomainPresenceInfo dpi) {
142-
Collection<String> serverNamesFromPodList = list.getItems().stream()
143-
.map(PodHelper::getPodServerName).collect(Collectors.toList());
144-
145-
dpi.getServerNames().stream().filter(s -> !serverNamesFromPodList.contains(s)).collect(Collectors.toList())
142+
private void removeDeletedPodsFromDPI(DomainPresenceInfo dpi) {
143+
dpi.getServerNames().stream().filter(s -> !dpi.getServerNamesFromPodList().contains(s)).collect(Collectors.toList())
146144
.forEach(name -> dpi.deleteServerPodFromEvent(name, null));
147145
}
148146

@@ -157,11 +155,14 @@ private void addOperatorEventList(CoreV1EventList list) {
157155
private void addPod(V1Pod pod) {
158156
String domainUid = PodHelper.getPodDomainUid(pod);
159157
String serverName = PodHelper.getPodServerName(pod);
158+
DomainPresenceInfo info = getExistingDomainPresenceInfo(domainUid);
159+
Optional.ofNullable(info).ifPresent(i -> i.addServerNameFromPodList(serverName));
160+
160161
if (domainUid != null && serverName != null) {
161-
setServerPodFromEvent(getExistingDomainPresenceInfo(domainUid), serverName, pod);
162+
setServerPodFromEvent(info, serverName, pod);
162163
}
163164
if (PodHelper.getPodLabel(pod, LabelConstants.JOBNAME_LABEL) != null) {
164-
processor.updateDomainStatus(pod, getExistingDomainPresenceInfo(domainUid));
165+
processor.updateDomainStatus(pod, info);
165166
}
166167
}
167168

@@ -243,14 +244,14 @@ private void addClusterList(ClusterList list) {
243244
}
244245

245246
private void addCluster(ClusterResource cluster) {
246-
ClusterPresenceInfo cachedInfo = getClusterPresenceInfoMap().get(cluster.getClusterName());
247+
ClusterPresenceInfo cachedInfo = getClusterPresenceInfoMap().get(getClusterName(cluster));
247248
if (cachedInfo == null) {
248-
newClusterNames.add(cluster.getClusterName());
249+
newClusterNames.add(getClusterName(cluster));
249250
} else if (cluster.isGenerationChanged(cachedInfo.getCluster())) {
250-
modifiedClusterNames.add(cluster.getClusterName());
251+
modifiedClusterNames.add(getClusterName(cluster));
251252
}
252253

253-
getClusterPresenceInfoMap().put(cluster.getClusterName(), new ClusterPresenceInfo(cluster));
254+
getClusterPresenceInfoMap().put(getClusterName(cluster), new ClusterPresenceInfo(cluster));
254255
}
255256

256257
private Stream<DomainPresenceInfo> getStrandedDomainPresenceInfos(DomainProcessor dp) {
@@ -305,18 +306,22 @@ private EventItem getEventItem(DomainPresenceInfo info) {
305306
}
306307

307308
private EventItem getEventItem(ClusterResource cluster) {
308-
if (newClusterNames.contains(cluster.getClusterName()) || cluster.getStatus() == null) {
309+
if (newClusterNames.contains(getClusterName(cluster)) || cluster.getStatus() == null) {
309310
return CLUSTER_CREATED;
310311
}
311-
if (modifiedClusterNames.contains(cluster.getClusterName())) {
312+
if (modifiedClusterNames.contains(getClusterName(cluster))) {
312313
return CLUSTER_CHANGED;
313314
}
314315
return null;
315316
}
316317

318+
private String getClusterName(ClusterResource cluster) {
319+
return cluster.getMetadata().getName();
320+
}
321+
317322
private void updateCluster(DomainProcessor dp, ClusterResource cluster, EventItem eventItem) {
318323
List<DomainPresenceInfo> list =
319-
dp.getExistingDomainPresenceInfoForCluster(cluster.getNamespace(), cluster.getClusterName());
324+
dp.getExistingDomainPresenceInfoForCluster(cluster.getNamespace(), getClusterName(cluster));
320325
if (list.isEmpty()) {
321326
createAndExecuteMakeRightOperation(dp, cluster, eventItem, null);
322327
} else {

operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ public class DomainPresenceInfo extends ResourcePresenceInfo {
8686
private String adminServerName;
8787

8888
private final List<String> validationWarnings = Collections.synchronizedList(new ArrayList<>());
89+
private final List<String> serverNamesFromPodList = Collections.synchronizedList(new ArrayList<>());
8990
private Map<String, Step.StepAndPacket> serversToRoll = Collections.emptyMap();
9091

9192
/**
@@ -850,6 +851,36 @@ public void addValidationWarning(String validationWarning) {
850851
validationWarnings.add(validationWarning);
851852
}
852853

854+
/**
855+
* Return server Pod names from List operation.
856+
*/
857+
public List<String> getServerNamesFromPodList() {
858+
return serverNamesFromPodList;
859+
}
860+
861+
/**
862+
* Add server Pod names from List operation.
863+
* @param podNames pod names to be added
864+
*/
865+
public void addServerNamesFromPodList(Collection<String> podNames) {
866+
serverNamesFromPodList.addAll(podNames);
867+
}
868+
869+
/**
870+
* Add server Pod name from List operation.
871+
* @param podName pod name to be added
872+
*/
873+
public void addServerNameFromPodList(String podName) {
874+
serverNamesFromPodList.add(podName);
875+
}
876+
877+
/**
878+
* Clear server Pod names from List operation.
879+
*/
880+
public void clearServerPodNamesFromList() {
881+
serverNamesFromPodList.clear();
882+
}
883+
853884
/**
854885
* Returns the names of the servers which are supposed to be running.
855886
*/

operator/src/main/java/oracle/kubernetes/operator/makeright/MakeRightDomainOperationImpl.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
package oracle.kubernetes.operator.makeright;
55

66
import java.util.ArrayList;
7-
import java.util.Collection;
87
import java.util.List;
98
import java.util.Objects;
109
import java.util.Optional;
@@ -480,11 +479,8 @@ public Consumer<V1PodList> getPodListProcessing() {
480479
}
481480

482481
private void processList(V1PodList list) {
483-
Collection<String> serverNamesFromPodList = list.getItems().stream()
484-
.map(PodHelper::getPodServerName).collect(Collectors.toList());
485-
486-
info.getServerNames().stream().filter(s -> !serverNamesFromPodList.contains(s)).collect(Collectors.toList())
487-
.forEach(name -> info.deleteServerPodFromEvent(name, null));
482+
info.addServerNamesFromPodList(list.getItems().stream()
483+
.map(PodHelper::getPodServerName).collect(Collectors.toList()));
488484
list.getItems().forEach(this::addPod);
489485
}
490486

@@ -510,6 +506,14 @@ public Consumer<V1PodDisruptionBudgetList> getPodDisruptionBudgetListProcessing(
510506
private void addPodDisruptionBudget(V1PodDisruptionBudget pdb) {
511507
PodDisruptionBudgetHelper.addToPresence(info, pdb);
512508
}
509+
510+
@Override
511+
public void completeProcessing(Packet packet) {
512+
info.getServerNames().stream().filter(
513+
s -> !info.getServerNamesFromPodList().contains(s)).collect(Collectors.toList())
514+
.forEach(name -> info.deleteServerPodFromEvent(name, null));
515+
info.clearServerPodNamesFromList();
516+
}
513517
};
514518

515519
return executor.createNamespacedResourceSteps(processor, info, delegate.getDomainNamespaces());

operator/src/test/java/oracle/kubernetes/operator/DomainPresenceTest.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
import static oracle.kubernetes.operator.helpers.EventHelper.EventItem.DOMAIN_CREATED;
6262
import static oracle.kubernetes.operator.helpers.KubernetesTestSupport.CLUSTER;
6363
import static oracle.kubernetes.operator.helpers.KubernetesTestSupport.DOMAIN;
64+
import static oracle.kubernetes.operator.tuning.TuningParameters.DEFAULT_CALL_LIMIT;
6465
import static org.hamcrest.Matchers.anEmptyMap;
6566
import static org.hamcrest.Matchers.equalTo;
6667
import static org.hamcrest.Matchers.hasKey;
@@ -81,6 +82,9 @@ class DomainPresenceTest extends ThreadFactoryTestBase {
8182
// Call builder tuning
8283
public static final int CALL_REQUEST_LIMIT = 10;
8384
private static final int LAST_DOMAIN_NUM = 2 * CALL_REQUEST_LIMIT - 1;
85+
/** More than one chunk's worth of pods. */
86+
private static final int MULTICHUNK_LAST_POD_NUM = 2 * DEFAULT_CALL_LIMIT - 1;
87+
8488
public static final String CLUSTER_1 = "cluster1";
8589
public static final String CLUSTER_2 = "cluster2";
8690
public static final String CLUSTER_3 = "cluster3";
@@ -432,6 +436,23 @@ void whenK8sHasOneDomainWithPod_recordPodPresence() {
432436
assertThat(getDomainPresenceInfo(dp, UID1).getServerPod("admin"), equalTo(pod));
433437
}
434438

439+
@Test
440+
void whenK8sDomainWithMoreThanCallRequestLimitNumberOfPods_recordPodsPresence() {
441+
addDomainResource(UID1, NS);
442+
V1Pod pod = createPodResource(UID1, NS, "admin");
443+
testSupport.defineResources(pod);
444+
createPodResources(UID1, NS, MULTICHUNK_LAST_POD_NUM);
445+
446+
dp.domains.computeIfAbsent(NS, k -> new ConcurrentHashMap<>()).put(UID1, info);
447+
448+
testSupport.addComponent("DP", DomainProcessor.class, dp);
449+
testSupport.runSteps(domainNamespaces.readExistingResources(NS, dp));
450+
451+
assertThat(getDomainPresenceInfo(dp, UID1).getServerPod("managed-server1"), notNullValue());
452+
assertThat(getDomainPresenceInfo(dp, UID1).getServerPod("managed-server" + MULTICHUNK_LAST_POD_NUM),
453+
notNullValue());
454+
}
455+
435456
@Test
436457
void whenK8sHasOneDomainWithPodButMissingInfo_dontRecordPodPresence() {
437458
addDomainResource(UID1, NS);
@@ -479,6 +500,14 @@ private void addPodResource(String uid, String namespace, String serverName) {
479500
testSupport.defineResources(createPodResource(uid, namespace, serverName));
480501
}
481502

503+
private void createPodResources(String uid, String namespace, int lastPodNum) {
504+
IntStream.rangeClosed(1, lastPodNum)
505+
.boxed()
506+
.map(i -> "managed-server" + i)
507+
.map(s -> createPodResource(uid, namespace, s))
508+
.forEach(testSupport::defineResources);
509+
}
510+
482511
@Test
483512
void whenK8sHasOneDomainWithOtherEvent_ignoreIt() {
484513
addDomainResource(UID1, NS);

operator/src/test/java/oracle/kubernetes/operator/DomainProcessorTest.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,22 @@ void whenClusterChanged_generateClusterChangedEvent() {
10671067
assertThat(getEventsForSeason(CLUSTER_CHANGED.getReason()), not(empty()));
10681068
}
10691069

1070+
@Test
1071+
void whenClusterResourceWithDifferentMetadataNameAndSpecNameChanged_generateClusterChangedEvent() {
1072+
ClusterStatus status = new ClusterStatus().withClusterName(CLUSTER4);
1073+
ClusterResource cluster1 = createClusterWithDifferentMetadataAndSpecName(CLUSTER4, NS).withStatus(status);
1074+
ClusterPresenceInfo info = new ClusterPresenceInfo(cluster1);
1075+
processor.registerClusterPresenceInfo(info);
1076+
ClusterResource cluster2 = createClusterWithDifferentMetadataAndSpecName(CLUSTER4, NS).withStatus(status);
1077+
cluster2.getMetadata().setGeneration(1234L);
1078+
testSupport.defineResources(cluster2);
1079+
1080+
testSupport.runSteps(domainNamespaces.readExistingResources(NS, processor));
1081+
1082+
assertThat(testSupport, hasEvent(CLUSTER_CHANGED.getReason()));
1083+
assertThat(getEventsForSeason(CLUSTER_CHANGED.getReason()), not(empty()));
1084+
}
1085+
10701086
private List<Object> getEventsForSeason(String reason) {
10711087
return testSupport.getResources(EVENT).stream()
10721088
.filter(e -> ((CoreV1Event)e).getReason().equals(reason)).collect(Collectors.toList());
@@ -1525,6 +1541,12 @@ private ClusterResource createClusterAlone(String clusterName, String ns) {
15251541
.spec(new ClusterSpec().withClusterName(clusterName));
15261542
}
15271543

1544+
private ClusterResource createClusterWithDifferentMetadataAndSpecName(String clusterMetadataName, String ns) {
1545+
return new ClusterResource()
1546+
.withMetadata(new V1ObjectMeta().name(clusterMetadataName).namespace(ns))
1547+
.spec(new ClusterSpec().withClusterName("specClusterName-" + clusterMetadataName));
1548+
}
1549+
15281550
private V1Service createNonOperatorService() {
15291551
return new V1Service()
15301552
.metadata(

0 commit comments

Comments
 (0)