Skip to content

Commit 4ddb16c

Browse files
committed
[grid] stop the health check of a restarted node
1 parent c62597e commit 4ddb16c

File tree

5 files changed

+25
-33
lines changed

5 files changed

+25
-33
lines changed

java/src/org/openqa/selenium/grid/distributor/GridModel.java

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,9 @@ public void add(NodeStatus node) {
114114
"Re-adding node with id %s and URI %s.",
115115
node.getNodeId(), node.getExternalUri()));
116116

117-
events.fire(new NodeRestartedEvent(node));
117+
// Send the previous state to allow cleaning up the old node related resources.
118+
// Nodes are initially added in the "down" state, so the new state must be ignored.
119+
events.fire(new NodeRestartedEvent(next));
118120
iterator.remove();
119121
break;
120122
}
@@ -226,7 +228,8 @@ public void purgeDeadNodes() {
226228
if (nodeHealthCount.getOrDefault(id, 0) > UNHEALTHY_THRESHOLD) {
227229
LOG.info(
228230
String.format(
229-
"Removing Node %s, unhealthy threshold has been reached", node.getExternalUri()));
231+
"Removing Node %s (uri: %s), unhealthy threshold has been reached",
232+
node.getNodeId(), node.getExternalUri()));
230233
toRemove.add(node);
231234
break;
232235
}
@@ -239,11 +242,17 @@ public void purgeDeadNodes() {
239242
lastTouched.plus(node.getHeartbeatPeriod().multipliedBy(PURGE_TIMEOUT_MULTIPLIER));
240243

241244
if (node.getAvailability() == UP && lostTime.isBefore(now)) {
242-
LOG.info(String.format("Switching Node %s from UP to DOWN", node.getExternalUri()));
245+
LOG.info(
246+
String.format(
247+
"Switching Node %s (uri: %s) from UP to DOWN",
248+
node.getNodeId(), node.getExternalUri()));
243249
replacements.put(node, rewrite(node, DOWN));
244250
nodePurgeTimes.put(id, Instant.now());
245251
} else if (node.getAvailability() == DOWN && deadTime.isBefore(now)) {
246-
LOG.info(String.format("Removing Node %s, DOWN for too long", node.getExternalUri()));
252+
LOG.info(
253+
String.format(
254+
"Removing Node %s (uri: %s), DOWN for too long",
255+
node.getNodeId(), node.getExternalUri()));
247256
toRemove.add(node);
248257
}
249258
}

java/src/org/openqa/selenium/grid/distributor/local/LocalDistributor.java

Lines changed: 5 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ public LocalDistributor(
206206

207207
bus.addListener(NodeStatusEvent.listener(this::register));
208208
bus.addListener(NodeStatusEvent.listener(model::refresh));
209-
bus.addListener(NodeRestartedEvent.listener(this::handleNodeRestarted));
209+
bus.addListener(
210+
NodeRestartedEvent.listener(previousNodeStatus -> remove(previousNodeStatus.getNodeId())));
210211
bus.addListener(NodeRemovedEvent.listener(nodeStatus -> remove(nodeStatus.getNodeId())));
211212
bus.addListener(
212213
NodeHeartBeatEvent.listener(
@@ -329,25 +330,6 @@ private void register(NodeStatus status) {
329330
}
330331
}
331332

332-
private void handleNodeRestarted(NodeStatus status) {
333-
Require.nonNull("Node", status);
334-
Lock writeLock = lock.writeLock();
335-
writeLock.lock();
336-
try {
337-
if (!nodes.containsKey(status.getNodeId())) {
338-
return;
339-
}
340-
if (!getNodeFromURI(status.getExternalUri()).isDraining()) {
341-
LOG.info(
342-
String.format(
343-
"Node %s has restarted. Setting availability to DOWN.", status.getNodeId()));
344-
model.setAvailability(status.getNodeId(), DOWN);
345-
}
346-
} finally {
347-
writeLock.unlock();
348-
}
349-
}
350-
351333
@Override
352334
public LocalDistributor add(Node node) {
353335
Require.nonNull("Node", node);
@@ -499,15 +481,13 @@ public void remove(NodeId nodeId) {
499481
Lock writeLock = lock.writeLock();
500482
writeLock.lock();
501483
try {
502-
Node node = nodes.get(nodeId);
484+
Node node = nodes.remove(nodeId);
485+
model.remove(nodeId);
486+
allChecks.remove(nodeId);
503487

504488
if (node instanceof RemoteNode) {
505489
((RemoteNode) node).close();
506490
}
507-
508-
nodes.remove(nodeId);
509-
model.remove(nodeId);
510-
allChecks.remove(nodeId);
511491
} finally {
512492
writeLock.unlock();
513493
}

java/src/org/openqa/selenium/grid/sessionmap/jdbc/JdbcBackedSessionMap.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ public JdbcBackedSessionMap(Tracer tracer, Connection jdbcConnection, EventBus b
9292
.forEach(this::remove)));
9393

9494
bus.addListener(
95-
NodeRestartedEvent.listener(nodeStatus -> this.removeByUri(nodeStatus.getExternalUri())));
95+
NodeRestartedEvent.listener(
96+
previousNodeStatus -> this.removeByUri(previousNodeStatus.getExternalUri())));
9697
}
9798

9899
public static SessionMap create(Config config) {

java/src/org/openqa/selenium/grid/sessionmap/local/LocalSessionMap.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,11 @@ public LocalSessionMap(Tracer tracer, EventBus bus) {
6767

6868
bus.addListener(
6969
NodeRestartedEvent.listener(
70-
nodeStatus -> {
70+
previousNodeStatus -> {
7171
List<SessionId> toRemove =
7272
knownSessions.entrySet().stream()
73-
.filter((e) -> e.getValue().getUri().equals(nodeStatus.getExternalUri()))
73+
.filter(
74+
(e) -> e.getValue().getUri().equals(previousNodeStatus.getExternalUri()))
7475
.map(Map.Entry::getKey)
7576
.collect(Collectors.toList());
7677

java/src/org/openqa/selenium/grid/sessionmap/redis/RedisBackedSessionMap.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ public RedisBackedSessionMap(Tracer tracer, URI serverUri, EventBus bus) {
8787
.forEach(this::remove)));
8888

8989
bus.addListener(
90-
NodeRestartedEvent.listener(nodeStatus -> this.removeByUri(nodeStatus.getExternalUri())));
90+
NodeRestartedEvent.listener(
91+
previousNodeStatus -> this.removeByUri(previousNodeStatus.getExternalUri())));
9192
}
9293

9394
public static SessionMap create(Config config) {

0 commit comments

Comments
 (0)