Skip to content

Commit 0d54332

Browse files
Avoid Needless Forking when Closing Transports (#66834)
No need to fork off in the changed spots if we block the calling thread anyway. Also, some other minor cleanups.
1 parent 5db3d9f commit 0d54332

File tree

7 files changed

+59
-110
lines changed

7 files changed

+59
-110
lines changed

modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Transport.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,6 @@ protected void doStart() {
119119
bindServer(profileSettings);
120120
}
121121
}
122-
super.doStart();
123122
success = true;
124123
} finally {
125124
if (success == false) {

plugins/transport-nio/src/main/java/org/elasticsearch/transport/nio/NioTransport.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@ protected void doStart() {
9595
}
9696
}
9797

98-
super.doStart();
9998
success = true;
10099
} catch (IOException e) {
101100
throw new ElasticsearchException(e);

server/src/main/java/org/elasticsearch/transport/StatsTracker.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,6 @@ public long getMessagesReceived() {
3838
return messagesReceived.sum();
3939
}
4040

41-
42-
public MeanMetric getWriteBytes() {
43-
return writeBytesMetric;
44-
}
45-
4641
public long getBytesWritten() {
4742
return writeBytesMetric.sum();
4843
}

server/src/main/java/org/elasticsearch/transport/TcpTransport.java

Lines changed: 23 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import org.elasticsearch.common.component.Lifecycle;
2626
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
2727
import org.elasticsearch.common.io.stream.StreamInput;
28-
import org.elasticsearch.common.metrics.MeanMetric;
2928
import org.elasticsearch.common.network.CloseableChannel;
3029
import org.elasticsearch.common.network.NetworkAddress;
3130
import org.elasticsearch.common.network.NetworkService;
@@ -66,8 +65,6 @@
6665
import java.util.Objects;
6766
import java.util.Set;
6867
import java.util.concurrent.ConcurrentMap;
69-
import java.util.concurrent.CountDownLatch;
70-
import java.util.concurrent.TimeUnit;
7168
import java.util.concurrent.atomic.AtomicBoolean;
7269
import java.util.concurrent.atomic.AtomicLong;
7370
import java.util.concurrent.atomic.AtomicReference;
@@ -162,10 +159,6 @@ public Supplier<CircuitBreaker> getInflightBreaker() {
162159
return () -> circuitBreakerService.getBreaker(CircuitBreaker.IN_FLIGHT_REQUESTS);
163160
}
164161

165-
@Override
166-
protected void doStart() {
167-
}
168-
169162
@Override
170163
public synchronized void setMessageListener(TransportMessageListener listener) {
171164
outboundHandler.setMessageListener(listener);
@@ -288,8 +281,8 @@ public void openConnection(DiscoveryNode node, ConnectionProfile profile, Action
288281
}
289282
}
290283

291-
private List<TcpChannel> initiateConnection(DiscoveryNode node, ConnectionProfile connectionProfile,
292-
ActionListener<Transport.Connection> listener) {
284+
private void initiateConnection(DiscoveryNode node, ConnectionProfile connectionProfile,
285+
ActionListener<Connection> listener) {
293286
int numConnections = connectionProfile.getNumConnections();
294287
assert numConnections > 0 : "A connection profile must be configured with at least one connection";
295288

@@ -303,11 +296,11 @@ private List<TcpChannel> initiateConnection(DiscoveryNode node, ConnectionProfil
303296
} catch (ConnectTransportException e) {
304297
CloseableChannel.closeChannels(channels, false);
305298
listener.onFailure(e);
306-
return channels;
299+
return;
307300
} catch (Exception e) {
308301
CloseableChannel.closeChannels(channels, false);
309302
listener.onFailure(new ConnectTransportException(node, "general node connection failure", e));
310-
return channels;
303+
return;
311304
}
312305
}
313306

@@ -320,7 +313,6 @@ private List<TcpChannel> initiateConnection(DiscoveryNode node, ConnectionProfil
320313

321314
TimeValue connectTimeout = connectionProfile.getConnectTimeout();
322315
threadPool.schedule(channelsConnectedListener::onTimeout, connectTimeout, ThreadPool.Names.GENERIC);
323-
return channels;
324316
}
325317

326318
@Override
@@ -559,42 +551,31 @@ protected final void doClose() {
559551

560552
@Override
561553
protected final void doStop() {
562-
final CountDownLatch latch = new CountDownLatch(1);
563-
// make sure we run it on another thread than a possible IO handler thread
554+
assert Transports.assertNotTransportThread("Must not block transport thread that might be needed for closing channels below");
564555
assert threadPool.generic().isShutdown() == false : "Must stop transport before terminating underlying threadpool";
565-
threadPool.generic().execute(() -> {
566-
closeLock.writeLock().lock();
567-
try {
568-
keepAlive.close();
556+
closeLock.writeLock().lock();
557+
try {
558+
keepAlive.close();
569559

570-
// first stop to accept any incoming connections so nobody can connect to this transport
571-
for (Map.Entry<String, List<TcpServerChannel>> entry : serverChannels.entrySet()) {
572-
String profile = entry.getKey();
573-
List<TcpServerChannel> channels = entry.getValue();
574-
ActionListener<Void> closeFailLogger = ActionListener.wrap(c -> {
560+
// first stop to accept any incoming connections so nobody can connect to this transport
561+
for (Map.Entry<String, List<TcpServerChannel>> entry : serverChannels.entrySet()) {
562+
String profile = entry.getKey();
563+
List<TcpServerChannel> channels = entry.getValue();
564+
ActionListener<Void> closeFailLogger = ActionListener.wrap(c -> {
575565
},
576566
e -> logger.warn(() -> new ParameterizedMessage("Error closing serverChannel for profile [{}]", profile), e));
577-
channels.forEach(c -> c.addCloseListener(closeFailLogger));
578-
CloseableChannel.closeChannels(channels, true);
579-
}
580-
serverChannels.clear();
581-
582-
// close all of the incoming channels. The closeChannels method takes a list so we must convert the set.
583-
CloseableChannel.closeChannels(new ArrayList<>(acceptedChannels), true);
584-
acceptedChannels.clear();
585-
586-
stopInternal();
587-
} finally {
588-
closeLock.writeLock().unlock();
589-
latch.countDown();
567+
channels.forEach(c -> c.addCloseListener(closeFailLogger));
568+
CloseableChannel.closeChannels(channels, true);
590569
}
591-
});
570+
serverChannels.clear();
592571

593-
try {
594-
latch.await(30, TimeUnit.SECONDS);
595-
} catch (InterruptedException e) {
596-
Thread.currentThread().interrupt();
597-
// ignore
572+
// close all of the incoming channels. The closeChannels method takes a list so we must convert the set.
573+
CloseableChannel.closeChannels(new ArrayList<>(acceptedChannels), true);
574+
acceptedChannels.clear();
575+
576+
stopInternal();
577+
} finally {
578+
closeLock.writeLock().unlock();
598579
}
599580
}
600581

@@ -845,7 +826,6 @@ private void ensureOpen() {
845826

846827
@Override
847828
public final TransportStats getStats() {
848-
final MeanMetric writeBytesMetric = statsTracker.getWriteBytes();
849829
final long bytesWritten = statsTracker.getBytesWritten();
850830
final long messagesSent = statsTracker.getMessagesSent();
851831
final long messagesReceived = statsTracker.getMessagesReceived();

server/src/main/java/org/elasticsearch/transport/TransportService.java

Lines changed: 32 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
import org.elasticsearch.common.transport.TransportAddress;
3333
import org.elasticsearch.core.TimeValue;
3434
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
35-
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
3635
import org.elasticsearch.common.util.concurrent.ThreadContext;
3736
import org.elasticsearch.core.internal.io.IOUtils;
3837
import org.elasticsearch.node.NodeClosedException;
@@ -52,7 +51,6 @@
5251
import java.util.Objects;
5352
import java.util.Set;
5453
import java.util.concurrent.CopyOnWriteArrayList;
55-
import java.util.concurrent.ExecutorService;
5654
import java.util.concurrent.atomic.AtomicBoolean;
5755
import java.util.function.Function;
5856
import java.util.function.Predicate;
@@ -201,15 +199,6 @@ protected TaskManager createTaskManager(Settings settings, ThreadPool threadPool
201199
return new TaskManager(settings, threadPool, taskHeaders);
202200
}
203201

204-
/**
205-
* The executor service for this transport service.
206-
*
207-
* @return the executor service
208-
*/
209-
private ExecutorService getExecutorService() {
210-
return threadPool.generic();
211-
}
212-
213202
void setTracerLogInclude(List<String> tracerLogInclude) {
214203
this.tracerLogInclude = tracerLogInclude.toArray(Strings.EMPTY_ARRAY);
215204
}
@@ -247,33 +236,14 @@ protected void doStop() {
247236
// in case the transport is not connected to our local node (thus cleaned on node disconnect)
248237
// make sure to clean any leftover on going handles
249238
for (final Transport.ResponseContext<?> holderToNotify : responseHandlers.prune(h -> true)) {
250-
// callback that an exception happened, but on a different thread since we don't
251-
// want handlers to worry about stack overflows
252-
getExecutorService().execute(new AbstractRunnable() {
253-
@Override
254-
public void onRejection(Exception e) {
255-
// if we get rejected during node shutdown we don't wanna bubble it up
256-
logger.debug(
257-
() -> new ParameterizedMessage(
258-
"failed to notify response handler on rejection, action: {}",
259-
holderToNotify.action()),
260-
e);
261-
}
262-
@Override
263-
public void onFailure(Exception e) {
264-
logger.warn(
265-
() -> new ParameterizedMessage(
266-
"failed to notify response handler on exception, action: {}",
267-
holderToNotify.action()),
268-
e);
269-
}
270-
@Override
271-
public void doRun() {
272-
TransportException ex = new SendRequestTransportException(holderToNotify.connection().getNode(),
273-
holderToNotify.action(), new NodeClosedException(localNode));
274-
holderToNotify.handler().handleException(ex);
275-
}
276-
});
239+
try {
240+
holderToNotify.handler().handleException(new SendRequestTransportException(holderToNotify.connection().getNode(),
241+
holderToNotify.action(), new NodeClosedException(localNode)));
242+
} catch (Exception e) {
243+
assert false : e;
244+
logger.warn(() -> new ParameterizedMessage("failed to notify response handler on exception, action: {}",
245+
holderToNotify.action()), e);
246+
}
277247
}
278248
}
279249
}
@@ -1023,29 +993,33 @@ private void checkForTimeout(long requestId) {
1023993

1024994
@Override
1025995
public void onConnectionClosed(Transport.Connection connection) {
1026-
try {
1027-
List<Transport.ResponseContext<? extends TransportResponse>> pruned =
996+
List<Transport.ResponseContext<? extends TransportResponse>> pruned =
1028997
responseHandlers.prune(h -> h.connection().getCacheKey().equals(connection.getCacheKey()));
1029-
// callback that an exception happened, but on a different thread since we don't
1030-
// want handlers to worry about stack overflows
1031-
getExecutorService().execute(new Runnable() {
1032-
@Override
1033-
@SuppressWarnings("rawtypes")
1034-
public void run() {
1035-
for (Transport.ResponseContext holderToNotify : pruned) {
1036-
holderToNotify.handler().handleException(
1037-
new NodeDisconnectedException(connection.getNode(), holderToNotify.action()));
1038-
}
1039-
}
998+
if (pruned.isEmpty()) {
999+
return;
1000+
}
10401001

1041-
@Override
1042-
public String toString() {
1043-
return "onConnectionClosed(" + connection.getNode() + ")";
1002+
// callback that an exception happened, but on a different thread since we don't
1003+
// want handlers to worry about stack overflows
1004+
threadPool.generic().execute(new AbstractRunnable() {
1005+
@Override
1006+
public void doRun() {
1007+
for (Transport.ResponseContext<?> holderToNotify : pruned) {
1008+
holderToNotify.handler().handleException(new NodeDisconnectedException(connection.getNode(), holderToNotify.action()));
10441009
}
1045-
});
1046-
} catch (EsRejectedExecutionException ex) {
1047-
logger.debug("Rejected execution on onConnectionClosed", ex);
1048-
}
1010+
}
1011+
1012+
@Override
1013+
public void onFailure(Exception e) {
1014+
assert false : e;
1015+
logger.warn(() -> new ParameterizedMessage("failed to notify response handler on connection close [{}]", connection), e);
1016+
}
1017+
1018+
@Override
1019+
public String toString() {
1020+
return "onConnectionClosed(" + connection.getNode() + ")";
1021+
}
1022+
});
10491023
}
10501024

10511025
final class TimeoutHandler implements Runnable {

server/src/test/java/org/elasticsearch/transport/TcpTransportTests.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,10 @@ private void testDefaultSeedAddresses(final Settings settings, Matcher<Iterable<
167167
final TcpTransport tcpTransport = new TcpTransport(settings, Version.CURRENT, testThreadPool,
168168
new MockPageCacheRecycler(settings),
169169
new NoneCircuitBreakerService(), writableRegistry(), new NetworkService(Collections.emptyList())) {
170+
@Override
171+
protected void doStart() {
172+
throw new UnsupportedOperationException();
173+
}
170174

171175
@Override
172176
protected TcpServerChannel bind(String name, InetSocketAddress address) {

test/framework/src/main/java/org/elasticsearch/transport/nio/MockNioTransport.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,6 @@ protected void doStart() {
122122
bindServer(profileSettings);
123123
}
124124
}
125-
126-
super.doStart();
127125
success = true;
128126
} catch (IOException e) {
129127
throw new ElasticsearchException(e);

0 commit comments

Comments
 (0)