Skip to content

Commit 743b0eb

Browse files
ankediarjeberhard
authored andcommitted
Backport OWLS-109579 fix in MR 4271 - Potential fixes for issues observed in CAGBU environment with large K8s cluster.
1 parent 135162b commit 743b0eb

File tree

5 files changed

+112
-21
lines changed

5 files changed

+112
-21
lines changed

operator/src/main/java/oracle/kubernetes/operator/DomainResourcesValidation.java

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ public void completeProcessing(Packet packet) {
104104
.forEach(info -> adjustClusterResources(c, info)));
105105
executeMakeRightForClusterEvents(dp);
106106
getActiveDomainPresenceInfos().forEach(info -> activateDomain(dp, info));
107+
getDomainPresenceInfoMap().values().forEach(DomainResourcesValidation.this::removeDeletedPodsFromDPI);
108+
getDomainPresenceInfoMap().values().forEach(DomainPresenceInfo::clearServerPodNamesFromList);
107109
}
108110
};
109111
}
@@ -134,15 +136,11 @@ private boolean isForDomain(ClusterResource clusterResource, DomainPresenceInfo
134136
}
135137

136138
private void addPodList(V1PodList list) {
137-
getDomainPresenceInfoMap().values().forEach(dpi -> removeDeletedPodsFromDPI(list, dpi));
138139
list.getItems().forEach(this::addPod);
139140
}
140141

141-
private void removeDeletedPodsFromDPI(V1PodList list, DomainPresenceInfo dpi) {
142-
Collection<String> serverNamesFromPodList = list.getItems().stream()
143-
.map(PodHelper::getPodServerName).collect(Collectors.toList());
144-
145-
dpi.getServerNames().stream().filter(s -> !serverNamesFromPodList.contains(s)).collect(Collectors.toList())
142+
private void removeDeletedPodsFromDPI(DomainPresenceInfo dpi) {
143+
dpi.getServerNames().stream().filter(s -> !dpi.getServerNamesFromPodList().contains(s)).collect(Collectors.toList())
146144
.forEach(name -> dpi.deleteServerPodFromEvent(name, null));
147145
}
148146

@@ -157,11 +155,14 @@ private void addOperatorEventList(CoreV1EventList list) {
157155
private void addPod(V1Pod pod) {
158156
String domainUid = PodHelper.getPodDomainUid(pod);
159157
String serverName = PodHelper.getPodServerName(pod);
158+
DomainPresenceInfo info = getExistingDomainPresenceInfo(domainUid);
159+
Optional.ofNullable(info).ifPresent(i -> i.addServerNameFromPodList(serverName));
160+
160161
if (domainUid != null && serverName != null) {
161-
setServerPodFromEvent(getExistingDomainPresenceInfo(domainUid), serverName, pod);
162+
setServerPodFromEvent(info, serverName, pod);
162163
}
163164
if (PodHelper.getPodLabel(pod, LabelConstants.JOBNAME_LABEL) != null) {
164-
processor.updateDomainStatus(pod, getExistingDomainPresenceInfo(domainUid));
165+
processor.updateDomainStatus(pod, info);
165166
}
166167
}
167168

@@ -243,14 +244,14 @@ private void addClusterList(ClusterList list) {
243244
}
244245

245246
private void addCluster(ClusterResource cluster) {
246-
ClusterPresenceInfo cachedInfo = getClusterPresenceInfoMap().get(cluster.getClusterName());
247+
ClusterPresenceInfo cachedInfo = getClusterPresenceInfoMap().get(getClusterName(cluster));
247248
if (cachedInfo == null) {
248-
newClusterNames.add(cluster.getClusterName());
249+
newClusterNames.add(getClusterName(cluster));
249250
} else if (cluster.isGenerationChanged(cachedInfo.getCluster())) {
250-
modifiedClusterNames.add(cluster.getClusterName());
251+
modifiedClusterNames.add(getClusterName(cluster));
251252
}
252253

253-
getClusterPresenceInfoMap().put(cluster.getClusterName(), new ClusterPresenceInfo(cluster));
254+
getClusterPresenceInfoMap().put(getClusterName(cluster), new ClusterPresenceInfo(cluster));
254255
}
255256

256257
private Stream<DomainPresenceInfo> getStrandedDomainPresenceInfos(DomainProcessor dp) {
@@ -305,18 +306,22 @@ private EventItem getEventItem(DomainPresenceInfo info) {
305306
}
306307

307308
private EventItem getEventItem(ClusterResource cluster) {
308-
if (newClusterNames.contains(cluster.getClusterName()) || cluster.getStatus() == null) {
309+
if (newClusterNames.contains(getClusterName(cluster)) || cluster.getStatus() == null) {
309310
return CLUSTER_CREATED;
310311
}
311-
if (modifiedClusterNames.contains(cluster.getClusterName())) {
312+
if (modifiedClusterNames.contains(getClusterName(cluster))) {
312313
return CLUSTER_CHANGED;
313314
}
314315
return null;
315316
}
316317

318+
private String getClusterName(ClusterResource cluster) {
319+
return cluster.getMetadata().getName();
320+
}
321+
317322
private void updateCluster(DomainProcessor dp, ClusterResource cluster, EventItem eventItem) {
318323
List<DomainPresenceInfo> list =
319-
dp.getExistingDomainPresenceInfoForCluster(cluster.getNamespace(), cluster.getClusterName());
324+
dp.getExistingDomainPresenceInfoForCluster(cluster.getNamespace(), getClusterName(cluster));
320325
if (list.isEmpty()) {
321326
createAndExecuteMakeRightOperation(dp, cluster, eventItem, null);
322327
} else {

operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ public class DomainPresenceInfo extends ResourcePresenceInfo {
8484
private String adminServerName;
8585

8686
private final List<String> validationWarnings = Collections.synchronizedList(new ArrayList<>());
87+
private final List<String> serverNamesFromPodList = Collections.synchronizedList(new ArrayList<>());
8788
private Map<String, Step.StepAndPacket> serversToRoll = Collections.emptyMap();
8889

8990
/**
@@ -848,6 +849,36 @@ public void addValidationWarning(String validationWarning) {
848849
validationWarnings.add(validationWarning);
849850
}
850851

852+
/**
853+
* Return server Pod names from List operation.
854+
*/
855+
public List<String> getServerNamesFromPodList() {
856+
return serverNamesFromPodList;
857+
}
858+
859+
/**
860+
* Add server Pod names from List operation.
861+
* @param podNames pod names to be added
862+
*/
863+
public void addServerNamesFromPodList(Collection<String> podNames) {
864+
serverNamesFromPodList.addAll(podNames);
865+
}
866+
867+
/**
868+
* Add server Pod name from List operation.
869+
* @param podName pod name to be added
870+
*/
871+
public void addServerNameFromPodList(String podName) {
872+
serverNamesFromPodList.add(podName);
873+
}
874+
875+
/**
876+
* Clear server Pod names from List operation.
877+
*/
878+
public void clearServerPodNamesFromList() {
879+
serverNamesFromPodList.clear();
880+
}
881+
851882
/**
852883
* Returns the names of the servers which are supposed to be running.
853884
*/

operator/src/main/java/oracle/kubernetes/operator/makeright/MakeRightDomainOperationImpl.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
package oracle.kubernetes.operator.makeright;
55

66
import java.util.ArrayList;
7-
import java.util.Collection;
87
import java.util.List;
98
import java.util.Objects;
109
import java.util.Optional;
@@ -435,11 +434,8 @@ public Consumer<V1PodList> getPodListProcessing() {
435434
}
436435

437436
private void processList(V1PodList list) {
438-
Collection<String> serverNamesFromPodList = list.getItems().stream()
439-
.map(PodHelper::getPodServerName).collect(Collectors.toList());
440-
441-
info.getServerNames().stream().filter(s -> !serverNamesFromPodList.contains(s)).collect(Collectors.toList())
442-
.forEach(name -> info.deleteServerPodFromEvent(name, null));
437+
info.addServerNamesFromPodList(list.getItems().stream()
438+
.map(PodHelper::getPodServerName).collect(Collectors.toList()));
443439
list.getItems().forEach(this::addPod);
444440
}
445441

@@ -465,6 +461,14 @@ public Consumer<V1PodDisruptionBudgetList> getPodDisruptionBudgetListProcessing(
465461
private void addPodDisruptionBudget(V1PodDisruptionBudget pdb) {
466462
PodDisruptionBudgetHelper.addToPresence(info, pdb);
467463
}
464+
465+
@Override
466+
public void completeProcessing(Packet packet) {
467+
info.getServerNames().stream().filter(
468+
s -> !info.getServerNamesFromPodList().contains(s)).collect(Collectors.toList())
469+
.forEach(name -> info.deleteServerPodFromEvent(name, null));
470+
info.clearServerPodNamesFromList();
471+
}
468472
};
469473

470474
return executor.createNamespacedResourceSteps(processor, info, delegate.getDomainNamespaces());

operator/src/test/java/oracle/kubernetes/operator/DomainPresenceTest.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
import static oracle.kubernetes.operator.helpers.EventHelper.EventItem.DOMAIN_CREATED;
6262
import static oracle.kubernetes.operator.helpers.KubernetesTestSupport.CLUSTER;
6363
import static oracle.kubernetes.operator.helpers.KubernetesTestSupport.DOMAIN;
64+
import static oracle.kubernetes.operator.tuning.TuningParameters.DEFAULT_CALL_LIMIT;
6465
import static org.hamcrest.Matchers.anEmptyMap;
6566
import static org.hamcrest.Matchers.equalTo;
6667
import static org.hamcrest.Matchers.hasKey;
@@ -81,6 +82,9 @@ class DomainPresenceTest extends ThreadFactoryTestBase {
8182
// Call builder tuning
8283
public static final int CALL_REQUEST_LIMIT = 10;
8384
private static final int LAST_DOMAIN_NUM = 2 * CALL_REQUEST_LIMIT - 1;
85+
/** More than one chunk's worth of pods. */
86+
private static final int MULTICHUNK_LAST_POD_NUM = 2 * DEFAULT_CALL_LIMIT - 1;
87+
8488
public static final String CLUSTER_1 = "cluster1";
8589
public static final String CLUSTER_2 = "cluster2";
8690
public static final String CLUSTER_3 = "cluster3";
@@ -432,6 +436,23 @@ void whenK8sHasOneDomainWithPod_recordPodPresence() {
432436
assertThat(getDomainPresenceInfo(dp, UID1).getServerPod("admin"), equalTo(pod));
433437
}
434438

439+
@Test
440+
void whenK8sDomainWithMoreThanCallRequestLimitNumberOfPods_recordPodsPresence() {
441+
addDomainResource(UID1, NS);
442+
V1Pod pod = createPodResource(UID1, NS, "admin");
443+
testSupport.defineResources(pod);
444+
createPodResources(UID1, NS, MULTICHUNK_LAST_POD_NUM);
445+
446+
dp.domains.computeIfAbsent(NS, k -> new ConcurrentHashMap<>()).put(UID1, info);
447+
448+
testSupport.addComponent("DP", DomainProcessor.class, dp);
449+
testSupport.runSteps(domainNamespaces.readExistingResources(NS, dp));
450+
451+
assertThat(getDomainPresenceInfo(dp, UID1).getServerPod("managed-server1"), notNullValue());
452+
assertThat(getDomainPresenceInfo(dp, UID1).getServerPod("managed-server" + MULTICHUNK_LAST_POD_NUM),
453+
notNullValue());
454+
}
455+
435456
@Test
436457
void whenK8sHasOneDomainWithPodButMissingInfo_dontRecordPodPresence() {
437458
addDomainResource(UID1, NS);
@@ -479,6 +500,14 @@ private void addPodResource(String uid, String namespace, String serverName) {
479500
testSupport.defineResources(createPodResource(uid, namespace, serverName));
480501
}
481502

503+
private void createPodResources(String uid, String namespace, int lastPodNum) {
504+
IntStream.rangeClosed(1, lastPodNum)
505+
.boxed()
506+
.map(i -> "managed-server" + i)
507+
.map(s -> createPodResource(uid, namespace, s))
508+
.forEach(testSupport::defineResources);
509+
}
510+
482511
@Test
483512
void whenK8sHasOneDomainWithOtherEvent_ignoreIt() {
484513
addDomainResource(UID1, NS);

operator/src/test/java/oracle/kubernetes/operator/DomainProcessorTest.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,6 +1011,22 @@ void whenClusterChanged_generateClusterChangedEvent() {
10111011
assertThat(getEventsForSeason(CLUSTER_CHANGED.getReason()), not(empty()));
10121012
}
10131013

1014+
@Test
1015+
void whenClusterResourceWithDifferentMetadataNameAndSpecNameChanged_generateClusterChangedEvent() {
1016+
ClusterStatus status = new ClusterStatus().withClusterName(CLUSTER4);
1017+
ClusterResource cluster1 = createClusterWithDifferentMetadataAndSpecName(CLUSTER4, NS).withStatus(status);
1018+
ClusterPresenceInfo info = new ClusterPresenceInfo(cluster1);
1019+
processor.registerClusterPresenceInfo(info);
1020+
ClusterResource cluster2 = createClusterWithDifferentMetadataAndSpecName(CLUSTER4, NS).withStatus(status);
1021+
cluster2.getMetadata().setGeneration(1234L);
1022+
testSupport.defineResources(cluster2);
1023+
1024+
testSupport.runSteps(domainNamespaces.readExistingResources(NS, processor));
1025+
1026+
assertThat(testSupport, hasEvent(CLUSTER_CHANGED.getReason()));
1027+
assertThat(getEventsForSeason(CLUSTER_CHANGED.getReason()), not(empty()));
1028+
}
1029+
10141030
private List<Object> getEventsForSeason(String reason) {
10151031
return testSupport.getResources(EVENT).stream()
10161032
.filter(e -> ((CoreV1Event)e).getReason().equals(reason)).collect(Collectors.toList());
@@ -1469,6 +1485,12 @@ private ClusterResource createClusterAlone(String clusterName, String ns) {
14691485
.spec(new ClusterSpec().withClusterName(clusterName));
14701486
}
14711487

1488+
private ClusterResource createClusterWithDifferentMetadataAndSpecName(String clusterMetadataName, String ns) {
1489+
return new ClusterResource()
1490+
.withMetadata(new V1ObjectMeta().name(clusterMetadataName).namespace(ns))
1491+
.spec(new ClusterSpec().withClusterName("specClusterName-" + clusterMetadataName));
1492+
}
1493+
14721494
private V1Service createNonOperatorService() {
14731495
return new V1Service()
14741496
.metadata(

0 commit comments

Comments
 (0)