Skip to content

Commit ea7884a

Browse files
authored
Supports configuring TCP Keepalive related parameters in Bookie Client. (#4683)
### Motivation In a private environment, network connectivity between data centers is limited by firewall configuration. When the Broker accesses Bookkeeper infrequently, exceeding the firewall's default deactivation time (20 minutes), the firewall will actively disconnect the Broker → Bookkeeper connection. At this point, if a new production or consumption request is made, the Broker cannot safely access the Bookkeeper node again, causing production/consumption failures that cannot be automatically recovered from. The error message primarily indicates connection-level anomalies: <img width="2944" height="738" alt="Clipboard_Screenshot_1763091013" src="https://github.com/user-attachments/assets/e58b53b9-1c79-49cd-a4d7-1040f52569e0" /> The current Broker → Bookkeeper access chain is based on the Netty communication framework, and the keepalive function at the TCP connection layer reuses the system default SO_KEEPALIVE. The code is `PerChannelBookieClient` → `connect()`, as follows: <img width="1982" height="434" alt="Clipboard_Screenshot_1763091110" src="https://github.com/user-attachments/assets/979716b6-d99a-4dfd-9829-cd724c597c20" /> If no policy is explicitly set, the "System Default" option in the table follow will be used directly: TCP_KEEPIDLE | 7200s -- | -- TCP_KEEPINTVL | 75s TCP_KEEPCNT | 9 ### Changes Include the following three configuration items in ClientConfiguration: ``` public static final String TCP_KEEPIDLE = "tcpKeepIdle"; public static final String TCP_KEEPINTVL = "tcpKeepIntvl"; public static final String TCP_KEEPCNT = "tcpKeepCnt"; ``` To maintain compatibility, the system default configuration will still be used by default.
1 parent 1a509e4 commit ea7884a

File tree

3 files changed

+212
-1
lines changed

3 files changed

+212
-1
lines changed

bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ClientConfiguration.java

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,11 @@ public class ClientConfiguration extends AbstractConfiguration<ClientConfigurati
204204
//For batch read api, it the batch read is not stable, we can fail back to single read by this config.
205205
protected static final String BATCH_READ_ENABLED = "batchReadEnabled";
206206

207+
// SO_KEEPALIVE related configurations
208+
public static final String TCP_KEEPIDLE = "tcpKeepIdle";
209+
public static final String TCP_KEEPINTVL = "tcpKeepIntvl";
210+
public static final String TCP_KEEPCNT = "tcpKeepCnt";
211+
207212
/**
208213
* Construct a default client-side configuration.
209214
*/
@@ -2084,6 +2089,60 @@ public boolean isBatchReadEnabled() {
20842089
return getBoolean(BATCH_READ_ENABLED, true);
20852090
}
20862091

2092+
/**
2093+
* Set TCP_KEEPIDLE value for SO_KEEPALIVE.
2094+
* @param keepIdle time in seconds
2095+
* @return client configuration
2096+
*/
2097+
public ClientConfiguration setTcpKeepIdle(int keepIdle) {
2098+
setProperty(TCP_KEEPIDLE, keepIdle);
2099+
return this;
2100+
}
2101+
2102+
/**
2103+
* Get TCP_KEEPIDLE value for SO_KEEPALIVE.
2104+
* @return time in seconds, -1 means use system default
2105+
*/
2106+
public int getTcpKeepIdle() {
2107+
return getInt(TCP_KEEPIDLE, -1);
2108+
}
2109+
2110+
/**
2111+
* Set TCP_KEEPINTVL value for SO_KEEPALIVE.
2112+
* @param keepIntvl time in seconds
2113+
* @return client configuration
2114+
*/
2115+
public ClientConfiguration setTcpKeepIntvl(int keepIntvl) {
2116+
setProperty(TCP_KEEPINTVL, keepIntvl);
2117+
return this;
2118+
}
2119+
2120+
/**
2121+
* Get TCP_KEEPINTVL value for SO_KEEPALIVE.
2122+
* @return time in seconds, -1 means use system default
2123+
*/
2124+
public int getTcpKeepIntvl() {
2125+
return getInt(TCP_KEEPINTVL, -1);
2126+
}
2127+
2128+
/**
2129+
* Set TCP_KEEPCNT value for SO_KEEPALIVE.
2130+
* @param keepCnt count
2131+
* @return client configuration
2132+
*/
2133+
public ClientConfiguration setTcpKeepCnt(int keepCnt) {
2134+
setProperty(TCP_KEEPCNT, keepCnt);
2135+
return this;
2136+
}
2137+
2138+
/**
2139+
* Get TCP_KEEPCNT value for SO_KEEPALIVE.
2140+
* @return count, -1 means use system default
2141+
*/
2142+
public int getTcpKeepCnt() {
2143+
return getInt(TCP_KEEPCNT, -1);
2144+
}
2145+
20872146
@Override
20882147
protected ClientConfiguration getThis() {
20892148
return this;

bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/PerChannelBookieClient.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,18 @@ protected ChannelFuture connect() {
561561
if (!(eventLoopGroup instanceof DefaultEventLoopGroup)) {
562562
bootstrap.option(ChannelOption.TCP_NODELAY, conf.getClientTcpNoDelay());
563563
bootstrap.option(ChannelOption.SO_KEEPALIVE, conf.getClientSockKeepalive());
564-
564+
if (eventLoopGroup instanceof EpollEventLoopGroup) {
565+
// Set TCP keepalive parameters if configured
566+
if (conf.getTcpKeepIdle() > 0) {
567+
bootstrap.option(EpollChannelOption.TCP_KEEPIDLE, conf.getTcpKeepIdle());
568+
}
569+
if (conf.getTcpKeepIntvl() > 0) {
570+
bootstrap.option(EpollChannelOption.TCP_KEEPINTVL, conf.getTcpKeepIntvl());
571+
}
572+
if (conf.getTcpKeepCnt() > 0) {
573+
bootstrap.option(EpollChannelOption.TCP_KEEPCNT, conf.getTcpKeepCnt());
574+
}
575+
}
565576
// if buffer sizes are 0, let OS auto-tune it
566577
if (conf.getClientSendBufferSize() > 0) {
567578
bootstrap.option(ChannelOption.SO_SNDBUF, conf.getClientSendBufferSize());
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.bookkeeper.proto;
20+
21+
import static org.junit.Assert.assertEquals;
22+
import static org.junit.Assert.assertFalse;
23+
import static org.junit.Assert.assertTrue;
24+
25+
import io.netty.channel.nio.NioEventLoopGroup;
26+
import java.util.concurrent.TimeUnit;
27+
import org.apache.bookkeeper.conf.ClientConfiguration;
28+
import org.junit.Test;
29+
30+
/**
31+
* Unit tests for TCP keepalive configuration in PerChannelBookieClient.
32+
* This is a standalone test class that does not inherit from BookKeeperClusterTestCase
33+
* to avoid NativeIO assertion errors during cluster startup.
34+
*/
35+
public class TestPerChannelBookieClientTcpKeepalive {
36+
37+
/**
38+
* Test ClientConfiguration TCP keepalive getter methods.
39+
*/
40+
@Test
41+
public void testClientConfigurationGetters() {
42+
ClientConfiguration conf = new ClientConfiguration();
43+
44+
// Test default values (should be -1 as per implementation)
45+
assertEquals("Default TCP keep idle should be -1", -1, conf.getTcpKeepIdle());
46+
assertEquals("Default TCP keep interval should be -1", -1, conf.getTcpKeepIntvl());
47+
assertEquals("Default TCP keep count should be -1", -1, conf.getTcpKeepCnt());
48+
49+
// Test setter and getter methods
50+
conf.setTcpKeepIdle(60);
51+
conf.setTcpKeepIntvl(30);
52+
conf.setTcpKeepCnt(5);
53+
54+
assertEquals("TCP keep idle should be 60", 60, conf.getTcpKeepIdle());
55+
assertEquals("TCP keep interval should be 30", 30, conf.getTcpKeepIntvl());
56+
assertEquals("TCP keep count should be 5", 5, conf.getTcpKeepCnt());
57+
58+
// Test that -1 means use system default (as per implementation comments)
59+
conf.setTcpKeepIdle(-1);
60+
conf.setTcpKeepIntvl(-1);
61+
conf.setTcpKeepCnt(-1);
62+
63+
assertEquals("TCP keep idle should be -1 for system default", -1, conf.getTcpKeepIdle());
64+
assertEquals("TCP keep interval should be -1 for system default", -1, conf.getTcpKeepIntvl());
65+
assertEquals("TCP keep count should be -1 for system default", -1, conf.getTcpKeepCnt());
66+
}
67+
68+
/**
69+
* Test TCP keepalive configuration conditional logic.
70+
* This test focuses on the conditional logic in PerChannelBookieClient
71+
* without actually creating network connections.
72+
*/
73+
@Test
74+
public void testTcpKeepaliveConditionalLogic() {
75+
// Test various configuration scenarios
76+
ClientConfiguration conf = new ClientConfiguration();
77+
78+
// Test with positive values
79+
conf.setTcpKeepIdle(60);
80+
conf.setTcpKeepIntvl(30);
81+
conf.setTcpKeepCnt(5);
82+
83+
assertTrue("TCP keep idle should be > 0", conf.getTcpKeepIdle() > 0);
84+
assertTrue("TCP keep interval should be > 0", conf.getTcpKeepIntvl() > 0);
85+
assertTrue("TCP keep count should be > 0", conf.getTcpKeepCnt() > 0);
86+
87+
// Test with zero values
88+
conf.setTcpKeepIdle(0);
89+
conf.setTcpKeepIntvl(0);
90+
conf.setTcpKeepCnt(0);
91+
92+
assertFalse("TCP keep idle should not be configured for zero", conf.getTcpKeepIdle() > 0);
93+
assertFalse("TCP keep interval should not be configured for zero", conf.getTcpKeepIntvl() > 0);
94+
assertFalse("TCP keep count should not be configured for zero", conf.getTcpKeepCnt() > 0);
95+
96+
// Test with negative values (system default)
97+
conf.setTcpKeepIdle(-1);
98+
conf.setTcpKeepIntvl(-1);
99+
conf.setTcpKeepCnt(-1);
100+
101+
assertFalse("TCP keep idle should not be configured for negative", conf.getTcpKeepIdle() > 0);
102+
assertFalse("TCP keep interval should not be configured for negative", conf.getTcpKeepIntvl() > 0);
103+
assertFalse("TCP keep count should not be configured for negative", conf.getTcpKeepCnt() > 0);
104+
105+
// Test partial configuration
106+
conf.setTcpKeepIdle(60);
107+
conf.setTcpKeepIntvl(0);
108+
conf.setTcpKeepCnt(5);
109+
110+
assertTrue("TCP keep idle should be configured", conf.getTcpKeepIdle() > 0);
111+
assertFalse("TCP keep interval should not be configured", conf.getTcpKeepIntvl() > 0);
112+
assertTrue("TCP keep count should be configured", conf.getTcpKeepCnt() > 0);
113+
}
114+
115+
/**
116+
* Test Epoll support detection logic.
117+
*/
118+
@Test
119+
public void testEpollSupportDetection() {
120+
// Test Epoll support detection
121+
boolean isEpollSupported = false;
122+
try {
123+
Class.forName("io.netty.channel.epoll.EpollEventLoopGroup");
124+
isEpollSupported = true;
125+
} catch (ClassNotFoundException e) {
126+
// Epoll not supported on this platform
127+
}
128+
129+
// Test NIO event loop group detection
130+
NioEventLoopGroup nioGroup = new NioEventLoopGroup(1);
131+
assertFalse("NIO event loop group should not be detected as Epoll",
132+
nioGroup.getClass().getName().contains("Epoll"));
133+
134+
nioGroup.shutdownGracefully(0, 0, TimeUnit.MILLISECONDS);
135+
136+
// The important thing is that the detection logic works correctly
137+
// The actual result depends on the platform
138+
System.out.println("Epoll support detected: " + isEpollSupported);
139+
System.out.println("Operating system: " + System.getProperty("os.name"));
140+
}
141+
}

0 commit comments

Comments
 (0)