Skip to content

Commit 098327d

Browse files
authored
Merge pull request #528 from oracle/fix-take4
Lots of work to improve resiliency and diagnosability
2 parents b844b00 + 993d379 commit 098327d

17 files changed

+804
-390
lines changed

operator/src/main/java/oracle/kubernetes/operator/DomainProcessor.java

Lines changed: 322 additions & 148 deletions
Large diffs are not rendered by default.

operator/src/main/java/oracle/kubernetes/operator/Main.java

Lines changed: 85 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.StringTokenizer;
2525
import java.util.TreeSet;
2626
import java.util.concurrent.ConcurrentHashMap;
27+
import java.util.concurrent.ConcurrentMap;
2728
import java.util.concurrent.ScheduledExecutorService;
2829
import java.util.concurrent.ThreadFactory;
2930
import java.util.concurrent.TimeUnit;
@@ -66,6 +67,8 @@ private static ThreadFactory getThreadFactory() {
6667

6768
private static final LoggingFacade LOGGER = LoggingFactory.getLogger("Operator", "Operator");
6869

70+
private static final String NS_STARTING_NOW = "NS_STARTING_NOW";
71+
6972
static final TuningParameters tuningAndConfig;
7073

7174
static {
@@ -101,7 +104,8 @@ private static ThreadFactory getThreadFactory() {
101104

102105
static final Engine engine = new Engine(wrappedExecutorService);
103106

104-
static final Map<String, AtomicBoolean> isNamespaceStopping = new ConcurrentHashMap<>();
107+
static final ConcurrentMap<String, AtomicBoolean> isNamespaceStarted = new ConcurrentHashMap<>();
108+
static final ConcurrentMap<String, AtomicBoolean> isNamespaceStopping = new ConcurrentHashMap<>();
105109

106110
private static final Map<String, ConfigMapWatcher> configMapWatchers = new ConcurrentHashMap<>();
107111
private static final Map<String, DomainWatcher> domainWatchers = new ConcurrentHashMap<>();
@@ -121,12 +125,12 @@ private static ThreadFactory getThreadFactory() {
121125
static final String READINESS_PROBE_FAILURE_EVENT_FILTER =
122126
"reason=Unhealthy,type=Warning,involvedObject.fieldPath=spec.containers{weblogic-server}";
123127

124-
static Map<String, DomainPresenceInfo> getDomainPresenceInfos() {
125-
return DomainPresenceInfoManager.getDomainPresenceInfos();
128+
static Map<String, DomainPresenceInfo> getDomainPresenceInfos(String ns) {
129+
return DomainPresenceInfoManager.getDomainPresenceInfos(ns);
126130
}
127131

128-
static ServerKubernetesObjects getKubernetesObjects(String serverLegalName) {
129-
return ServerKubernetesObjectsManager.lookup(serverLegalName);
132+
static Map<String, ServerKubernetesObjects> getKubernetesObjects(String ns) {
133+
return ServerKubernetesObjectsManager.getServerKubernetesObjects(ns);
130134
}
131135

132136
/**
@@ -261,7 +265,11 @@ private static class StartNamespaceBeforeStep extends Step {
261265

262266
@Override
263267
public NextAction apply(Packet packet) {
264-
if (isNamespaceStopping.putIfAbsent(ns, new AtomicBoolean(false)) == null) {
268+
AtomicBoolean a = isNamespaceStarted.computeIfAbsent(ns, (key) -> new AtomicBoolean(false));
269+
boolean startingNow = !a.getAndSet(true);
270+
packet.put(NS_STARTING_NOW, (Boolean) startingNow);
271+
272+
if (startingNow) {
265273
try {
266274
HealthCheckHelper.performSecurityChecks(version, operatorNamespace, ns);
267275
} catch (Throwable e) {
@@ -280,13 +288,12 @@ private static void stopNamespaces(Collection<String> namespacesToStop) {
280288
if (stopping != null) {
281289
stopping.set(true);
282290
}
291+
isNamespaceStarted.remove(ns);
283292
}
284293
}
285294

286-
private static final AtomicBoolean UNINITIALIZED_NS_STOPPING = new AtomicBoolean(true);
287-
288295
static AtomicBoolean isNamespaceStopping(String ns) {
289-
return isNamespaceStopping.getOrDefault(ns, UNINITIALIZED_NS_STOPPING);
296+
return isNamespaceStopping.computeIfAbsent(ns, (key) -> new AtomicBoolean(false));
290297
}
291298

292299
static void runSteps(Step firstStep) {
@@ -527,21 +534,26 @@ public NextAction onFailure(Packet packet, CallResponse<DomainList> callResponse
527534

528535
@Override
529536
public NextAction onSuccess(Packet packet, CallResponse<DomainList> callResponse) {
537+
Boolean startingNow = (Boolean) packet.get(NS_STARTING_NOW);
538+
if (startingNow == null) {
539+
startingNow = Boolean.TRUE;
540+
}
541+
530542
Set<String> domainUIDs = new HashSet<>();
531543
if (callResponse.getResult() != null) {
532544
for (Domain dom : callResponse.getResult().getItems()) {
533545
String domainUID = dom.getSpec().getDomainUID();
534546
domainUIDs.add(domainUID);
535547
DomainPresenceInfo info = DomainPresenceInfoManager.getOrCreate(dom);
536-
if (isNamespaceStopping(dom.getMetadata().getNamespace()).get()) {
548+
if (startingNow) {
537549
// Update domain here if namespace is not yet running
538550
info.setDomain(dom);
539551
}
540552
DomainProcessor.makeRightDomainPresence(info, domainUID, dom, true, false, false);
541553
}
542554
}
543555

544-
getDomainPresenceInfos()
556+
getDomainPresenceInfos(ns)
545557
.forEach(
546558
(key, value) -> {
547559
if (!domainUIDs.contains(key)) {
@@ -590,23 +602,66 @@ public NextAction onFailure(Packet packet, CallResponse<V1ServiceList> callRespo
590602
@Override
591603
public NextAction onSuccess(Packet packet, CallResponse<V1ServiceList> callResponse) {
592604
V1ServiceList result = callResponse.getResult();
605+
606+
Set<String> serviceNames = new HashSet<>();
607+
Set<String> channelNames = new HashSet<>();
608+
Set<String> clusterNames = new HashSet<>();
593609
if (result != null) {
594610
for (V1Service service : result.getItems()) {
595611
String domainUID = ServiceWatcher.getServiceDomainUID(service);
596612
String serverName = ServiceWatcher.getServiceServerName(service);
597613
String channelName = ServiceWatcher.getServiceChannelName(service);
598-
if (domainUID != null && serverName != null) {
614+
String clusterName = ServiceWatcher.getServiceClusterName(service);
615+
if (domainUID != null) {
599616
DomainPresenceInfo info = DomainPresenceInfoManager.getOrCreate(ns, domainUID);
600-
ServerKubernetesObjects sko =
601-
ServerKubernetesObjectsManager.getOrCreate(info, domainUID, serverName);
602-
if (channelName != null) {
603-
sko.getChannels().put(channelName, service);
604-
} else {
605-
sko.getService().set(service);
617+
if (clusterName != null) {
618+
clusterNames.add(clusterName);
619+
info.getClusters().put(clusterName, service);
620+
} else if (serverName != null) {
621+
ServerKubernetesObjects sko =
622+
ServerKubernetesObjectsManager.getOrCreate(info, domainUID, serverName);
623+
if (channelName != null) {
624+
channelNames.add(channelName);
625+
sko.getChannels().put(channelName, service);
626+
} else {
627+
serviceNames.add(service.getMetadata().getName());
628+
sko.getService().set(service);
629+
}
606630
}
607631
}
608632
}
609633
}
634+
635+
getDomainPresenceInfos(ns)
636+
.forEach(
637+
(key, value) -> {
638+
ConcurrentMap<String, V1Service> map = value.getClusters();
639+
map.forEach(
640+
(ckey, cvalue) -> {
641+
map.compute(
642+
ckey,
643+
(k, current) -> {
644+
return clusterNames.contains(ckey) ? current : null;
645+
});
646+
});
647+
});
648+
getKubernetesObjects(ns)
649+
.forEach(
650+
(key, value) -> {
651+
if (!serviceNames.contains(key)) {
652+
value.getService().set(null);
653+
}
654+
ConcurrentMap<String, V1Service> map = value.getChannels();
655+
map.forEach(
656+
(ckey, cvalue) -> {
657+
map.compute(
658+
ckey,
659+
(k, current) -> {
660+
return channelNames.contains(ckey) ? current : null;
661+
});
662+
});
663+
});
664+
610665
if (!serviceWatchers.containsKey(ns)) {
611666
serviceWatchers.put(ns, createServiceWatcher(ns, getInitialResourceVersion(result)));
612667
}
@@ -665,18 +720,30 @@ public NextAction onFailure(Packet packet, CallResponse<V1PodList> callResponse)
665720
@Override
666721
public NextAction onSuccess(Packet packet, CallResponse<V1PodList> callResponse) {
667722
V1PodList result = callResponse.getResult();
723+
724+
Set<String> podNames = new HashSet<>();
668725
if (result != null) {
669726
for (V1Pod pod : result.getItems()) {
670727
String domainUID = PodWatcher.getPodDomainUID(pod);
671728
String serverName = PodWatcher.getPodServerName(pod);
672729
if (domainUID != null && serverName != null) {
730+
podNames.add(pod.getMetadata().getName());
673731
DomainPresenceInfo info = DomainPresenceInfoManager.getOrCreate(ns, domainUID);
674732
ServerKubernetesObjects sko =
675733
ServerKubernetesObjectsManager.getOrCreate(info, domainUID, serverName);
676734
sko.getPod().set(pod);
677735
}
678736
}
679737
}
738+
739+
getKubernetesObjects(ns)
740+
.forEach(
741+
(key, value) -> {
742+
if (!podNames.contains(key)) {
743+
value.getPod().set(null);
744+
}
745+
});
746+
680747
if (!podWatchers.containsKey(ns)) {
681748
podWatchers.put(ns, createPodWatcher(ns, getInitialResourceVersion(result)));
682749
}

operator/src/main/java/oracle/kubernetes/operator/PodWatcher.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ public void receivedResponse(Watch.Response<V1Pod> item) {
9494
Boolean isReady = isReady(pod);
9595
String podName = pod.getMetadata().getName();
9696
Container c = ContainerResolver.getInstance().getContainer();
97-
ServerKubernetesObjects sko = ServerKubernetesObjectsManager.lookup(podName);
97+
ServerKubernetesObjects sko =
98+
ServerKubernetesObjectsManager.lookup(pod.getMetadata().getNamespace(), podName);
9899
if (sko != null) {
99100
sko.getLastKnownStatus().set(isReady ? WebLogicConstants.RUNNING_STATE : null);
100101
}

operator/src/main/java/oracle/kubernetes/operator/ServiceWatcher.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,13 @@ static String getServiceChannelName(V1Service service) {
7474
}
7575
return null;
7676
}
77+
78+
static String getServiceClusterName(V1Service service) {
79+
V1ObjectMeta meta = service.getMetadata();
80+
Map<String, String> labels = meta.getLabels();
81+
if (labels != null) {
82+
return labels.get(LabelConstants.CLUSTERNAME_LABEL);
83+
}
84+
return null;
85+
}
7786
}

operator/src/main/java/oracle/kubernetes/operator/helpers/DomainPresenceInfo.java

Lines changed: 28 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
*/
3232
public class DomainPresenceInfo {
3333
private final String namespace;
34+
private final String domainUID;
3435
private final AtomicReference<Domain> domain;
3536
private final AtomicBoolean isDeleting = new AtomicBoolean(false);
3637
private final AtomicReference<ScheduledFuture<?>> statusUpdater;
@@ -54,6 +55,7 @@ public class DomainPresenceInfo {
5455
public DomainPresenceInfo(Domain domain) {
5556
this.domain = new AtomicReference<>(domain);
5657
this.namespace = domain.getMetadata().getNamespace();
58+
this.domainUID = domain.getSpec().getDomainUID();
5759
this.serverStartupInfo = new AtomicReference<>(null);
5860
this.statusUpdater = new AtomicReference<>(null);
5961
}
@@ -63,9 +65,10 @@ public DomainPresenceInfo(Domain domain) {
6365
*
6466
* @param namespace Namespace
6567
*/
66-
DomainPresenceInfo(String namespace) {
68+
DomainPresenceInfo(String namespace, String domainUID) {
6769
this.domain = new AtomicReference<>(null);
6870
this.namespace = namespace;
71+
this.domainUID = domainUID;
6972
this.serverStartupInfo = new AtomicReference<>(null);
7073
this.statusUpdater = new AtomicReference<>(null);
7174
}
@@ -165,11 +168,23 @@ public void setDomain(Domain domain) {
165168
if (old == null) {
166169
for (Map.Entry<String, ServerKubernetesObjects> entry : servers.entrySet()) {
167170
ServerKubernetesObjectsManager.register(
168-
domain.getSpec().getDomainUID(), entry.getKey(), entry.getValue());
171+
domain.getMetadata().getNamespace(),
172+
domain.getSpec().getDomainUID(),
173+
entry.getKey(),
174+
entry.getValue());
169175
}
170176
}
171177
}
172178

179+
/**
180+
* Gets the Domain UID
181+
*
182+
* @return Domain UID
183+
*/
184+
public String getDomainUID() {
185+
return domainUID;
186+
}
187+
173188
/**
174189
* Gets the namespace
175190
*
@@ -342,41 +357,29 @@ public ServerKubernetesObjects get(Object key) {
342357

343358
@Override
344359
public ServerKubernetesObjects put(String key, ServerKubernetesObjects value) {
345-
Domain d = domain.get();
346-
if (d != null) {
347-
ServerKubernetesObjectsManager.register(d.getSpec().getDomainUID(), key, value);
348-
}
360+
ServerKubernetesObjectsManager.register(namespace, domainUID, key, value);
349361
return delegate.put(key, value);
350362
}
351363

352364
@Override
353365
public ServerKubernetesObjects remove(Object key) {
354-
Domain d = domain.get();
355-
if (d != null) {
356-
ServerKubernetesObjectsManager.unregister(d.getSpec().getDomainUID(), (String) key);
357-
}
366+
ServerKubernetesObjectsManager.unregister(namespace, domainUID, (String) key);
358367
return delegate.remove(key);
359368
}
360369

361370
@Override
362371
public void putAll(Map<? extends String, ? extends ServerKubernetesObjects> m) {
363-
Domain d = domain.get();
364-
if (d != null) {
365-
for (Map.Entry<? extends String, ? extends ServerKubernetesObjects> entry : m.entrySet()) {
366-
ServerKubernetesObjectsManager.register(
367-
d.getSpec().getDomainUID(), entry.getKey(), entry.getValue());
368-
}
372+
for (Map.Entry<? extends String, ? extends ServerKubernetesObjects> entry : m.entrySet()) {
373+
ServerKubernetesObjectsManager.register(
374+
namespace, domainUID, entry.getKey(), entry.getValue());
369375
}
370376
delegate.putAll(m);
371377
}
372378

373379
@Override
374380
public void clear() {
375-
Domain d = domain.get();
376-
if (d != null) {
377-
for (Map.Entry<? extends String, ? extends ServerKubernetesObjects> entry : entrySet()) {
378-
ServerKubernetesObjectsManager.unregister(d.getSpec().getDomainUID(), entry.getKey());
379-
}
381+
for (Map.Entry<? extends String, ? extends ServerKubernetesObjects> entry : entrySet()) {
382+
ServerKubernetesObjectsManager.unregister(namespace, domainUID, entry.getKey());
380383
}
381384
delegate.clear();
382385
}
@@ -400,10 +403,7 @@ public Set<Entry<String, ServerKubernetesObjects>> entrySet() {
400403
public ServerKubernetesObjects putIfAbsent(String key, ServerKubernetesObjects value) {
401404
ServerKubernetesObjects result = delegate.putIfAbsent(key, value);
402405
if (result == null) {
403-
Domain d = domain.get();
404-
if (d != null) {
405-
ServerKubernetesObjectsManager.register(d.getSpec().getDomainUID(), key, value);
406-
}
406+
ServerKubernetesObjectsManager.register(namespace, domainUID, key, value);
407407
}
408408
return result;
409409
}
@@ -412,10 +412,7 @@ public ServerKubernetesObjects putIfAbsent(String key, ServerKubernetesObjects v
412412
public boolean remove(Object key, Object value) {
413413
boolean result = delegate.remove(key, value);
414414
if (result) {
415-
Domain d = domain.get();
416-
if (d != null) {
417-
ServerKubernetesObjectsManager.unregister(d.getSpec().getDomainUID(), (String) key);
418-
}
415+
ServerKubernetesObjectsManager.unregister(namespace, domainUID, (String) key);
419416
}
420417
return result;
421418
}
@@ -425,10 +422,7 @@ public boolean replace(
425422
String key, ServerKubernetesObjects oldValue, ServerKubernetesObjects newValue) {
426423
boolean result = delegate.replace(key, oldValue, newValue);
427424
if (result) {
428-
Domain d = domain.get();
429-
if (d != null) {
430-
ServerKubernetesObjectsManager.unregister(d.getSpec().getDomainUID(), (String) key);
431-
}
425+
ServerKubernetesObjectsManager.unregister(namespace, domainUID, (String) key);
432426
}
433427
return result;
434428
}
@@ -437,10 +431,7 @@ public boolean replace(
437431
public ServerKubernetesObjects replace(String key, ServerKubernetesObjects value) {
438432
ServerKubernetesObjects result = delegate.replace(key, value);
439433
if (result == null) {
440-
Domain d = domain.get();
441-
if (d != null) {
442-
ServerKubernetesObjectsManager.unregister(d.getSpec().getDomainUID(), (String) key);
443-
}
434+
ServerKubernetesObjectsManager.unregister(namespace, domainUID, (String) key);
444435
}
445436
return result;
446437
}

0 commit comments

Comments
 (0)