Skip to content

Commit 2702b77

Browse files
authored
Rather than using DiscoveryNode.getVersion.transportVersion, get the transport version from the connection information directly (#93822)
1 parent 6c56c38 commit 2702b77

File tree

16 files changed

+155
-50
lines changed

16 files changed

+155
-50
lines changed

server/src/main/java/org/elasticsearch/cluster/coordination/JoinValidationService.java

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
import org.elasticsearch.node.NodeClosedException;
3434
import org.elasticsearch.threadpool.ThreadPool;
3535
import org.elasticsearch.transport.BytesTransportRequest;
36+
import org.elasticsearch.transport.NodeNotConnectedException;
37+
import org.elasticsearch.transport.Transport;
3638
import org.elasticsearch.transport.TransportRequestOptions;
3739
import org.elasticsearch.transport.TransportResponse;
3840
import org.elasticsearch.transport.TransportService;
@@ -302,14 +304,22 @@ protected void doRun() throws Exception {
302304
assert discoveryNode.getVersion().onOrAfter(Version.V_8_3_0) : discoveryNode.getVersion();
303305
// NB these things never run concurrently to each other, or to the cache cleaner (see IMPLEMENTATION NOTES above) so it is safe
304306
// to do these (non-atomic) things to the (unsynchronized) statesByVersion map.
305-
final var cachedBytes = statesByVersion.get(discoveryNode.getVersion().transportVersion);
306-
final var bytes = Objects.requireNonNullElseGet(cachedBytes, () -> serializeClusterState(discoveryNode));
307+
Transport.Connection connection;
308+
try {
309+
connection = transportService.getConnection(discoveryNode);
310+
} catch (NodeNotConnectedException e) {
311+
listener.onFailure(e);
312+
return;
313+
}
314+
var version = connection.getTransportVersion();
315+
var cachedBytes = statesByVersion.get(version);
316+
var bytes = Objects.requireNonNullElseGet(cachedBytes, () -> serializeClusterState(discoveryNode, version));
307317
assert bytes.hasReferences() : "already closed";
308318
bytes.incRef();
309319
transportService.sendRequest(
310-
discoveryNode,
320+
connection,
311321
JOIN_VALIDATE_ACTION_NAME,
312-
new BytesTransportRequest(bytes, discoveryNode.getVersion().transportVersion),
322+
new BytesTransportRequest(bytes, version),
313323
REQUEST_OPTIONS,
314324
new CleanableResponseHandler<>(
315325
listener,
@@ -339,12 +349,11 @@ public String toString() {
339349
}
340350
}
341351

342-
private ReleasableBytesReference serializeClusterState(DiscoveryNode discoveryNode) {
352+
private ReleasableBytesReference serializeClusterState(DiscoveryNode discoveryNode, TransportVersion version) {
343353
final var bytesStream = transportService.newNetworkBytesStream();
344354
var success = false;
345355
try {
346356
final var clusterState = clusterStateSupplier.get();
347-
final var version = discoveryNode.getVersion().transportVersion;
348357
try (
349358
var stream = new OutputStreamStreamOutput(
350359
CompressorFactory.COMPRESSOR.threadLocalOutputStream(Streams.flushOnCloseStream(bytesStream))

server/src/main/java/org/elasticsearch/cluster/coordination/PublicationTransportHandler.java

Lines changed: 53 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939
import org.elasticsearch.tasks.Task;
4040
import org.elasticsearch.threadpool.ThreadPool;
4141
import org.elasticsearch.transport.BytesTransportRequest;
42+
import org.elasticsearch.transport.NodeNotConnectedException;
43+
import org.elasticsearch.transport.Transport;
4244
import org.elasticsearch.transport.TransportException;
4345
import org.elasticsearch.transport.TransportRequestOptions;
4446
import org.elasticsearch.transport.TransportService;
@@ -225,8 +227,7 @@ public PublicationContext newPublicationContext(ClusterStatePublicationEvent clu
225227
}
226228
}
227229

228-
private ReleasableBytesReference serializeFullClusterState(ClusterState clusterState, DiscoveryNode node) {
229-
final TransportVersion serializeVersion = node.getVersion().transportVersion;
230+
private ReleasableBytesReference serializeFullClusterState(ClusterState clusterState, DiscoveryNode node, TransportVersion version) {
230231
final RecyclerBytesStreamOutput bytesStream = transportService.newNetworkBytesStream();
231232
boolean success = false;
232233
try {
@@ -236,7 +237,7 @@ private ReleasableBytesReference serializeFullClusterState(ClusterState clusterS
236237
CompressorFactory.COMPRESSOR.threadLocalOutputStream(Streams.flushOnCloseStream(bytesStream))
237238
)
238239
) {
239-
stream.setTransportVersion(serializeVersion);
240+
stream.setTransportVersion(version);
240241
stream.writeBoolean(true);
241242
clusterState.writeTo(stream);
242243
uncompressedBytes = stream.position();
@@ -248,7 +249,7 @@ private ReleasableBytesReference serializeFullClusterState(ClusterState clusterS
248249
logger.trace(
249250
"serialized full cluster state version [{}] using transport version [{}] with size [{}]",
250251
clusterState.version(),
251-
serializeVersion,
252+
version,
252253
result.length()
253254
);
254255
success = true;
@@ -260,9 +261,13 @@ private ReleasableBytesReference serializeFullClusterState(ClusterState clusterS
260261
}
261262
}
262263

263-
private ReleasableBytesReference serializeDiffClusterState(ClusterState newState, Diff<ClusterState> diff, DiscoveryNode node) {
264+
private ReleasableBytesReference serializeDiffClusterState(
265+
ClusterState newState,
266+
Diff<ClusterState> diff,
267+
DiscoveryNode node,
268+
TransportVersion version
269+
) {
264270
final long clusterStateVersion = newState.version();
265-
final TransportVersion serializeVersion = node.getVersion().transportVersion;
266271
final RecyclerBytesStreamOutput bytesStream = transportService.newNetworkBytesStream();
267272
boolean success = false;
268273
try {
@@ -272,10 +277,10 @@ private ReleasableBytesReference serializeDiffClusterState(ClusterState newState
272277
CompressorFactory.COMPRESSOR.threadLocalOutputStream(Streams.flushOnCloseStream(bytesStream))
273278
)
274279
) {
275-
stream.setTransportVersion(serializeVersion);
280+
stream.setTransportVersion(version);
276281
stream.writeBoolean(false);
277282
diff.writeTo(stream);
278-
if (serializeVersion.onOrAfter(INCLUDES_LAST_COMMITTED_DATA_VERSION)) {
283+
if (version.onOrAfter(INCLUDES_LAST_COMMITTED_DATA_VERSION)) {
279284
stream.writeBoolean(newState.metadata().clusterUUIDCommitted());
280285
newState.getLastCommittedConfiguration().writeTo(stream);
281286
}
@@ -288,7 +293,7 @@ private ReleasableBytesReference serializeDiffClusterState(ClusterState newState
288293
logger.trace(
289294
"serialized cluster state diff for version [{}] using transport version [{}] with size [{}]",
290295
clusterStateVersion,
291-
serializeVersion,
296+
version,
292297
result.length()
293298
);
294299
success = true;
@@ -315,6 +320,7 @@ public class PublicationContext extends AbstractRefCounted {
315320
private final Task task;
316321
private final boolean sendFullVersion;
317322

323+
private final Map<DiscoveryNode, Transport.Connection> nodeConnections = new HashMap<>();
318324
// All the values of these maps have one ref for the context (while it's open) and one for each in-flight message.
319325
private final Map<TransportVersion, ReleasableBytesReference> serializedStates = new ConcurrentHashMap<>();
320326
private final Map<TransportVersion, ReleasableBytesReference> serializedDiffs = new HashMap<>();
@@ -337,12 +343,23 @@ void buildDiffAndSerializeStates() {
337343
// publication to local node bypasses any serialization
338344
continue;
339345
}
346+
347+
Transport.Connection connection;
348+
try {
349+
connection = transportService.getConnection(node);
350+
} catch (NodeNotConnectedException e) {
351+
// can't send to this node, don't need to serialize anything for it
352+
logger.debug(() -> format("No connection to [%s] available, skipping serialization", node), e);
353+
continue;
354+
}
355+
356+
nodeConnections.put(node, connection);
340357
if (sendFullVersion || previousState.nodes().nodeExists(node) == false) {
341-
serializedStates.computeIfAbsent(node.getVersion().transportVersion, v -> serializeFullClusterState(newState, node));
358+
serializedStates.computeIfAbsent(connection.getTransportVersion(), v -> serializeFullClusterState(newState, node, v));
342359
} else {
343360
serializedDiffs.computeIfAbsent(
344-
node.getVersion().transportVersion,
345-
v -> serializeDiffClusterState(newState, diffSupplier.getOrCompute(), node)
361+
connection.getTransportVersion(),
362+
v -> serializeDiffClusterState(newState, diffSupplier.getOrCompute(), node, v)
346363
);
347364
}
348365
}
@@ -405,34 +422,46 @@ public String toString() {
405422

406423
private void sendFullClusterState(DiscoveryNode destination, ActionListener<PublishWithJoinResponse> listener) {
407424
assert refCount() > 0;
408-
ReleasableBytesReference bytes = serializedStates.get(destination.getVersion().transportVersion);
425+
Transport.Connection connection = nodeConnections.get(destination);
426+
if (connection == null) {
427+
logger.debug("No connection to [{}] available, skipping send", destination);
428+
listener.onFailure(new NodeNotConnectedException(destination, "No connection available"));
429+
return;
430+
}
431+
432+
var version = connection.getTransportVersion();
433+
ReleasableBytesReference bytes = serializedStates.get(version);
409434
if (bytes == null) {
410435
try {
411-
bytes = serializedStates.computeIfAbsent(
412-
destination.getVersion().transportVersion,
413-
v -> serializeFullClusterState(newState, destination)
414-
);
436+
bytes = serializedStates.computeIfAbsent(version, v -> serializeFullClusterState(newState, destination, v));
415437
} catch (Exception e) {
416438
logger.warn(() -> format("failed to serialize cluster state before publishing it to node %s", destination), e);
417439
listener.onFailure(e);
418440
return;
419441
}
420442
}
421-
sendClusterState(destination, bytes, listener);
443+
sendClusterState(connection, bytes, listener);
422444
}
423445

424446
private void sendClusterStateDiff(DiscoveryNode destination, ActionListener<PublishWithJoinResponse> listener) {
425-
final ReleasableBytesReference bytes = serializedDiffs.get(destination.getVersion().transportVersion);
447+
Transport.Connection connection = nodeConnections.get(destination);
448+
if (connection == null) {
449+
logger.debug("No connection to [{}] available, skipping send", destination);
450+
listener.onFailure(new NodeNotConnectedException(destination, "No connection available"));
451+
return;
452+
}
453+
454+
final ReleasableBytesReference bytes = serializedDiffs.get(connection.getTransportVersion());
426455
assert bytes != null
427-
: "failed to find serialized diff for node " + destination + " of version [" + destination.getVersion() + "]";
456+
: "failed to find serialized diff for node " + destination + " of version [" + connection.getTransportVersion() + "]";
428457

429458
// acquire a ref to the context just in case we need to try again with the full cluster state
430459
if (tryIncRef() == false) {
431460
assert false;
432461
listener.onFailure(new IllegalStateException("publication context released before transmission"));
433462
return;
434463
}
435-
sendClusterState(destination, bytes, ActionListener.runAfter(listener.delegateResponse((delegate, e) -> {
464+
sendClusterState(connection, bytes, ActionListener.runAfter(listener.delegateResponse((delegate, e) -> {
436465
if (e instanceof final TransportException transportException) {
437466
if (transportException.unwrapCause() instanceof IncompatibleClusterStateVersionException) {
438467
logger.debug(
@@ -453,7 +482,7 @@ private void sendClusterStateDiff(DiscoveryNode destination, ActionListener<Publ
453482
}
454483

455484
private void sendClusterState(
456-
DiscoveryNode destination,
485+
Transport.Connection connection,
457486
ReleasableBytesReference bytes,
458487
ActionListener<PublishWithJoinResponse> listener
459488
) {
@@ -464,9 +493,9 @@ private void sendClusterState(
464493
return;
465494
}
466495
transportService.sendChildRequest(
467-
destination,
496+
connection,
468497
PUBLISH_STATE_ACTION_NAME,
469-
new BytesTransportRequest(bytes, destination.getVersion().transportVersion),
498+
new BytesTransportRequest(bytes, connection.getTransportVersion()),
470499
task,
471500
STATE_REQUEST_OPTIONS,
472501
new CleanableResponseHandler<>(listener, PublishWithJoinResponse::new, ThreadPool.Names.CLUSTER_COORDINATION, bytes::decRef)

server/src/main/java/org/elasticsearch/transport/Transport.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,7 @@ default Version getVersion() {
133133
/**
134134
* Returns the version of the data to communicate in this channel.
135135
*/
136-
default TransportVersion getTransportVersion() {
137-
return getVersion().transportVersion;
138-
}
136+
TransportVersion getTransportVersion();
139137

140138
/**
141139
* Returns a key that this connection can be cached on. Delegating subclasses must delegate method call to

server/src/main/java/org/elasticsearch/transport/TransportService.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,11 @@ public DiscoveryNode getNode() {
119119
return localNode;
120120
}
121121

122+
@Override
123+
public TransportVersion getTransportVersion() {
124+
return TransportVersion.CURRENT;
125+
}
126+
122127
@Override
123128
public void sendRequest(long requestId, String action, TransportRequest request, TransportRequestOptions options) {
124129
sendLocalRequest(requestId, action, request, options);
@@ -1521,11 +1526,6 @@ public String getChannelType() {
15211526
return "direct";
15221527
}
15231528

1524-
@Override
1525-
public TransportVersion getVersion() {
1526-
return localNode.getVersion().transportVersion;
1527-
}
1528-
15291529
@Override
15301530
public String toString() {
15311531
return Strings.format("DirectResponseChannel{req=%d}{%s}", requestId, action);

server/src/test/java/org/elasticsearch/action/search/SearchAsyncActionTests.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88
package org.elasticsearch.action.search;
99

10+
import org.elasticsearch.TransportVersion;
1011
import org.elasticsearch.Version;
1112
import org.elasticsearch.action.ActionListener;
1213
import org.elasticsearch.action.OriginalIndices;
@@ -794,6 +795,11 @@ public DiscoveryNode getNode() {
794795
return node;
795796
}
796797

798+
@Override
799+
public TransportVersion getTransportVersion() {
800+
return TransportVersion.CURRENT;
801+
}
802+
797803
@Override
798804
public void sendRequest(long requestId, String action, TransportRequest request, TransportRequestOptions options)
799805
throws TransportException {

server/src/test/java/org/elasticsearch/action/search/TransportSearchActionTests.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
import org.apache.lucene.search.TotalHits;
1414
import org.apache.lucene.util.SetOnce;
15+
import org.elasticsearch.TransportVersion;
1516
import org.elasticsearch.Version;
1617
import org.elasticsearch.action.ActionListener;
1718
import org.elasticsearch.action.LatchedActionListener;
@@ -362,6 +363,11 @@ public DiscoveryNode getNode() {
362363
return node;
363364
}
364365

366+
@Override
367+
public TransportVersion getTransportVersion() {
368+
return TransportVersion.CURRENT;
369+
}
370+
365371
@Override
366372
public void sendRequest(long requestId, String action, TransportRequest request, TransportRequestOptions options)
367373
throws TransportException {}

server/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import org.apache.logging.log4j.Level;
1212
import org.elasticsearch.Build;
13+
import org.elasticsearch.TransportVersion;
1314
import org.elasticsearch.Version;
1415
import org.elasticsearch.action.ActionListener;
1516
import org.elasticsearch.action.support.ListenableActionFuture;
@@ -631,6 +632,11 @@ public DiscoveryNode getNode() {
631632
return node;
632633
}
633634

635+
@Override
636+
public TransportVersion getTransportVersion() {
637+
return TransportVersion.CURRENT;
638+
}
639+
634640
@Override
635641
public void sendRequest(long requestId, String action, TransportRequest request, TransportRequestOptions options)
636642
throws TransportException {}

server/src/test/java/org/elasticsearch/cluster/coordination/JoinValidationServiceTests.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ public DiscoveryNode getNode() {
8888
return node;
8989
}
9090

91+
@Override
92+
public TransportVersion getTransportVersion() {
93+
return TransportVersion.CURRENT;
94+
}
95+
9196
@Override
9297
public void sendRequest(long requestId, String action, TransportRequest request, TransportRequestOptions options)
9398
throws TransportException {

0 commit comments

Comments
 (0)