Skip to content

Commit 6ef960d

Browse files
authored
Merge pull request #76 from slaskawi/fix_split_during_rolling_upgrade
Fix splitting cluster on Rolling Update
2 parents 93d3ff4 + add6b69 commit 6ef960d

File tree

6 files changed

+1134
-15
lines changed

6 files changed

+1134
-15
lines changed

README.adoc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,9 @@ namespace `"default"` will be returned.
169169

170170
| dump_requests || Dumps all discovery requests and responses to the Kubernetes server to stdout when true
171171

172-
| split_clusters_during_rolling_update || During the Rolling Update, prevents from putting all Pods into a single cluster
172+
| split_clusters_during_rolling_update | KUBERNETES_SPLIT_CLUSTERS_DURING_ROLLING_UPDATE | During the Rolling Update, prevents from putting all Pods into a single cluster
173+
174+
| useNotReadyAddresses | KUBERNETES_USE_NOT_READY_ADDRESSES | True if initial discovery should take unready Pods into consideration. Default is `true`.
173175

174176
|===============
175177

src/main/java/org/jgroups/protocols/kubernetes/Client.java

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ protected String fetchFromKubernetes(String op, String namespace, String labels,
9191

9292

9393
public List<Pod> getPods(String namespace, String labels, boolean dump_requests) throws Exception {
94-
String result=fetchFromKubernetes("pods", namespace, labels, dump_requests);
94+
String result = fetchFromKubernetes("pods", namespace, labels, dump_requests);
9595
if(result == null)
9696
return Collections.emptyList();
9797
return parseJsonResult(result, namespace, labels);
@@ -107,20 +107,29 @@ String getPodGroup(Json pod) {
107107
Json labels = Optional.ofNullable(meta)
108108
.map(podMetadata -> podMetadata.at("labels"))
109109
.orElse(null);
110+
111+
// This works for Deployment Config
110112
String group = Optional.ofNullable(labels)
111113
.map(l -> l.at("pod-template-hash"))
112114
.map(Json::asString)
113115
.orElse(null);
114116

115117
if (group == null) {
116-
log.warn("metadata.labels.pod-template-hash not found in pod json. Impossible to reliably determine pod group during Rolling Update");
117-
// keep backward-compatible behavior
118+
// Ok, maybe, it's a Deployment and has a valid deployment flag?
118119
group = Optional.ofNullable(labels)
119120
.map(l -> l.at("deployment"))
120121
.map(Json::asString)
121122
.orElse(null);
122123
}
123124

125+
if (group == null) {
126+
// Final check, maybe it's a StatefulSet?
127+
group = Optional.ofNullable(labels)
128+
.map(l -> l.at("controller-revision-hash"))
129+
.map(Json::asString)
130+
.orElse(null);
131+
}
132+
124133
log.debug("pod %s, group %s", Optional.ofNullable(meta)
125134
.map(m -> m.at("name"))
126135
.map(Json::asString)
@@ -146,7 +155,6 @@ protected List<Pod> parseJsonResult(String input, String namespace, String label
146155
List<Pod> pods=new ArrayList<>();
147156
for(Json obj: items) {
148157
String parentDeployment = getPodGroup(obj);
149-
150158
String name = Optional.ofNullable(obj.at("metadata"))
151159
.map(podMetadata -> podMetadata.at("name"))
152160
.map(Json::asString)

src/main/java/org/jgroups/protocols/kubernetes/KUBE_PING.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,12 @@ public class KUBE_PING extends Discovery {
105105

106106
@Property(description="The standard behavior during Rolling Update is to put all Pods in the same cluster. In" +
107107
" cases (application level incompatibility) this causes problems. One might decide to split clusters to" +
108-
" 'old' and 'new' during that process")
108+
" 'old' and 'new' during that process", systemProperty="KUBERNETES_SPLIT_CLUSTERS_DURING_ROLLING_UPDATE")
109109
protected boolean split_clusters_during_rolling_update;
110110

111+
@Property(description="Introduces similar behaviour to Kubernetes Services (using DNS) with publishNotReadyAddresses set to true. " +
112+
"By default it's true", systemProperty="KUBERNETES_USE_NOT_READY_ADDRESSES")
113+
protected boolean useNotReadyAddresses = true;
111114

112115
protected Client client;
113116

@@ -217,7 +220,7 @@ public void findMembers(List<Address> members, boolean initial_discovery, Respon
217220
if(log.isTraceEnabled())
218221
log.trace("%s: hosts fetched from Kubernetes: %s", local_addr, hosts);
219222
for(Pod host: hosts) {
220-
if (!host.isReady())
223+
if (!host.isReady() && !useNotReadyAddresses)
221224
continue;
222225
for(int i=0; i <= port_range; i++) {
223226
try {
@@ -243,7 +246,11 @@ public void findMembers(List<Address> members, boolean initial_discovery, Respon
243246
if(physical_addr != null) {
244247
String senderIp = ((IpAddress)physical_addr).getIpAddress().getHostAddress();
245248
// Please note we search for sender parent group through all pods, ever not ready. It's because JGroup discovery is performed
246-
// before Wildfly can respond to http liveness probe.
249+
// before WildFly can respond to http readiness probe.
250+
hosts.stream()
251+
.filter(p -> p.getPodGroup() == null)
252+
.forEach(p -> log.warn("Pod %s doesn't have group assigned. Impossible to reliably determine pod group during Rolling Update."));
253+
247254
String senderPodGroup = hosts.stream()
248255
.filter(pod -> senderIp.contains(pod.getIp()))
249256
.map(Pod::getPodGroup)

src/main/java/org/jgroups/protocols/kubernetes/Pod.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package org.jgroups.protocols.kubernetes;
22

3+
import java.util.Objects;
4+
35
public class Pod {
46

57
private final String name;
@@ -15,10 +17,6 @@ public Pod(String name, String ip, String podGroup, boolean isReady) {
1517
this.isReady = isReady;
1618
}
1719

18-
public Pod(String name, String ip, String podGroup) {
19-
this(name, ip, podGroup, false);
20-
}
21-
2220
public String getName() {
2321
return name;
2422
}
@@ -51,9 +49,9 @@ public boolean equals(Object o) {
5149

5250
Pod pod = (Pod) o;
5351

54-
if (name != null ? !name.equals(pod.name) : pod.name != null) return false;
55-
if (ip != null ? !ip.equals(pod.ip) : pod.ip != null) return false;
56-
return podGroup != null ? podGroup.equals(pod.podGroup) : pod.podGroup == null;
52+
if (!Objects.equals(name, pod.name)) return false;
53+
if (!Objects.equals(ip, pod.ip)) return false;
54+
return Objects.equals(podGroup, pod.podGroup);
5755
}
5856

5957
@Override

src/test/java/org/jgroups/ping/kube/test/RollingUpdateTest.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ public void testPutOnlyNodesWithTheSameParentDuringRollingUpdateOpenShift() thro
5353
KUBE_PING_FOR_TESTING testedProtocol = new KUBE_PING_FOR_TESTING("/openshift_rolling_update.json");
5454
testedProtocol.setValue("split_clusters_during_rolling_update", true);
5555

56+
//when //then
5657
testPutOnlyNodesWithTheSameParentDuringRollingUpdate(testedProtocol);
5758
}
5859

@@ -61,6 +62,18 @@ public void testPutOnlyNodesWithTheSameParentDuringRollingUpdateReplicaSet() thr
6162
//given
6263
KUBE_PING_FOR_TESTING testedProtocol = new KUBE_PING_FOR_TESTING("/replicaset_rolling_update.json");
6364
testedProtocol.setValue("split_clusters_during_rolling_update", true);
65+
66+
//when //then
67+
testPutOnlyNodesWithTheSameParentDuringRollingUpdate(testedProtocol);
68+
}
69+
70+
@Test
71+
public void testPutOnlyNodesWithTheSameParentDuringRollingUpdateStatefulSet() throws Exception {
72+
//given
73+
KUBE_PING_FOR_TESTING testedProtocol = new KUBE_PING_FOR_TESTING("/statefulset_rolling_update.json");
74+
testedProtocol.setValue("split_clusters_during_rolling_update", true);
75+
76+
//when //then
6477
testPutOnlyNodesWithTheSameParentDuringRollingUpdate(testedProtocol);
6578
}
6679

0 commit comments

Comments
 (0)