Skip to content

Commit d536349

Browse files
Merge pull request #110 from rabbitmq/rabbitmq-perf-test-106-topology-recovery
Handle connection and topology recovery
2 parents 758c029 + 18545c5 commit d536349

21 files changed

+1250
-145
lines changed

.travis.yml

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,19 @@ addons:
1414
sources:
1515
- sourceline: deb https://packages.erlang-solutions.com/ubuntu trusty contrib
1616
key_url: https://packages.erlang-solutions.com/ubuntu/erlang_solutions.asc
17-
- sourceline: deb https://dl.bintray.com/rabbitmq/debian trusty main
18-
key_url: https://dl.bintray.com/rabbitmq/Keys/rabbitmq-release-signing-key.asc
1917
packages:
20-
- esl-erlang=1:20.1
18+
- esl-erlang=1:20.3
2119
# because of https://github.com/travis-ci/travis-ci/issues/8906
2220
before_install:
2321
- sudo mv /opt/jdk_switcher/jdk_switcher.sh /tmp
24-
- sudo apt-get install rabbitmq-server=3.7.0-1
2522
- sudo mv /tmp/jdk_switcher.sh /opt/jdk_switcher/
26-
services:
27-
- rabbitmq
2823
branches:
2924
only:
3025
- master
31-
script: ./mvnw clean verify -Dtravis=true
26+
before_script:
27+
- ./bin/before_build.sh
28+
29+
script: ./mvnw clean verify -Dtravis=true -Drabbitmqctl.bin='rabbitmq/sbin/rabbitmqctl'
3230
cache:
3331
directories:
3432
- $HOME/.m2

bin/before_build.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/sh
2+
3+
wget https://github.com/rabbitmq/rabbitmq-server/releases/download/v3.7.7/rabbitmq-server-generic-unix-3.7.7.tar.xz
4+
tar xf rabbitmq-server-generic-unix-3.7.7.tar.xz
5+
mv rabbitmq_server-3.7.7 rabbitmq
6+
7+
rabbitmq/sbin/rabbitmq-server -detached
8+
9+
sleep 3
10+
11+
true

pom.xml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
<maven.jar.plugin.version>3.0.2</maven.jar.plugin.version>
8383
<buildnumber.plugin.version>1.4</buildnumber.plugin.version>
8484
<maven.surefire.plugin.version>2.21.0</maven.surefire.plugin.version>
85+
<maven.failsafe.plugin.version>2.21.0</maven.failsafe.plugin.version>
8586

8687
<!-- because of https://issues.apache.org/jira/browse/MRESOURCES-99 -->
8788
<build.timestamp>${maven.build.timestamp}</build.timestamp>
@@ -250,6 +251,11 @@
250251
<groupId>org.apache.maven.plugins</groupId>
251252
<artifactId>maven-surefire-plugin</artifactId>
252253
<version>${maven.surefire.plugin.version}</version>
254+
<configuration>
255+
<includes>
256+
<include>**/*Test.java</include>
257+
</includes>
258+
</configuration>
253259
<dependencies>
254260
<dependency>
255261
<groupId>org.junit.platform</groupId>
@@ -259,6 +265,33 @@
259265
</dependencies>
260266
</plugin>
261267

268+
<plugin>
269+
<artifactId>maven-failsafe-plugin</artifactId>
270+
<version>${maven.failsafe.plugin.version}</version>
271+
<configuration>
272+
<includes>
273+
<include>**/*IT.java</include>
274+
</includes>
275+
<encoding>UTF-8</encoding>
276+
</configuration>
277+
<dependencies>
278+
<dependency>
279+
<groupId>org.junit.platform</groupId>
280+
<artifactId>junit-platform-surefire-provider</artifactId>
281+
<version>${junit.platform.version}</version>
282+
</dependency>
283+
</dependencies>
284+
285+
<executions>
286+
<execution>
287+
<goals>
288+
<goal>integration-test</goal>
289+
<goal>verify</goal>
290+
</goals>
291+
</execution>
292+
</executions>
293+
</plugin>
294+
262295
<plugin>
263296
<groupId>org.codehaus.mojo</groupId>
264297
<artifactId>versions-maven-plugin</artifactId>

src/docs/asciidoc/usage-advanced.adoc

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,13 +188,17 @@ to handle the I/O. That's what the
188188
--producer-scheduler-threads 10
189189
--nio-threads 10
190190

191-
PerfTest will use 2 extra threads for internal bookkeeping needs, so the total will be 12 threads
192-
for I/O over all the connections. With the default blocking I/O mode, each producer (or consumer)
191+
This way PerfTest will use 12 threads for I/O over all the connections.
192+
With the default blocking I/O mode, each producer (or consumer)
193193
uses a thread for the I/O loop, that is 2000 threads to simulate 1000 producers and
194-
1000 consumers.
194+
1000 consumers. Using NIO in PerfTest can dramatically reduce the resources used
195+
to simulate workloads with a large number of connections with appropriate tuning.
196+
197+
Note that in NIO mode the number of threads used can increase temporarily when connections close
198+
unexpectedly and connection recovery kicks in. This is due to the NIO mode dispatching
199+
connection closing to non-I/O threads to avoid deadlocks. Connection recovery can be disabled
200+
with the `--disable-connection-recovery` flag.
195201

196-
Using NIO in PerfTest can dramatically reduce the resources used to simulate workloads with a large
197-
number of connections with appropriate tuning.
198202

199203
== Running Producers and Consumers on Different Machines
200204

src/main/java/com/rabbitmq/perf/AgentBase.java

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,27 @@
1515

1616
package com.rabbitmq.perf;
1717

18+
import com.rabbitmq.client.MissedHeartbeatException;
19+
import com.rabbitmq.client.ShutdownSignalException;
20+
import org.slf4j.Logger;
21+
import org.slf4j.LoggerFactory;
22+
23+
import java.io.IOException;
24+
import java.net.SocketException;
25+
import java.util.function.Predicate;
26+
1827
/**
1928
*
2029
*/
2130
public abstract class AgentBase {
2231

32+
private static final Logger LOGGER = LoggerFactory.getLogger(AgentBase.class);
33+
34+
// FIXME this is the condition to start connection recovery
35+
// ensure it's the appropriate condition and get it from the Java client code
36+
static final Predicate<ShutdownSignalException> CONNECTION_RECOVERY_TRIGGERED =
37+
e -> !e.isInitiatedByApplication() || (e.getCause() instanceof MissedHeartbeatException);
38+
2339
protected void delay(long now, AgentState state) {
2440

2541
long elapsed = now - state.getLastStatsTime();
@@ -38,6 +54,51 @@ protected void delay(long now, AgentState state) {
3854
}
3955
}
4056

57+
protected boolean isConnectionRecoveryTriggered(ShutdownSignalException e) {
58+
return CONNECTION_RECOVERY_TRIGGERED.test(e);
59+
}
60+
61+
protected void handleShutdownSignalExceptionOnWrite(Recovery.RecoveryProcess recoveryProcess, ShutdownSignalException e) {
62+
if (LOGGER.isDebugEnabled()) {
63+
LOGGER.debug(
64+
"Handling write error, recovery process enabled? {}, condition to trigger connection recovery? {}",
65+
recoveryProcess.isEnabled(), isConnectionRecoveryTriggered(e)
66+
);
67+
}
68+
if (shouldStop(recoveryProcess, e)) {
69+
throw e;
70+
}
71+
}
72+
73+
protected boolean shouldStop(Recovery.RecoveryProcess recoveryProcess, ShutdownSignalException e) {
74+
if (recoveryProcess.isEnabled()) {
75+
// we stop only if the error isn't likely to trigger connection recovery
76+
return !isConnectionRecoveryTriggered(e);
77+
} else {
78+
return true;
79+
}
80+
}
81+
82+
protected void dealWithWriteOperation(WriteOperation writeOperation, Recovery.RecoveryProcess recoveryProcess) throws IOException {
83+
try {
84+
writeOperation.call();
85+
} catch (ShutdownSignalException e) {
86+
handleShutdownSignalExceptionOnWrite(recoveryProcess, e);
87+
} catch (SocketException e) {
88+
if (recoveryProcess.isEnabled()) {
89+
if (LOGGER.isDebugEnabled()) {
90+
LOGGER.debug(
91+
"Socket exception in write, recovery process is enabled, ignoring to let connection recovery carry on"
92+
);
93+
}
94+
} else {
95+
throw e;
96+
}
97+
}
98+
}
99+
100+
public abstract void recover(TopologyRecording topologyRecording);
101+
41102
protected interface AgentState {
42103

43104
float getRateLimit();
@@ -48,4 +109,9 @@ protected interface AgentState {
48109

49110
int incrementMessageCount();
50111
}
112+
113+
@FunctionalInterface
114+
interface WriteOperation {
115+
void call() throws IOException;
116+
}
51117
}

src/main/java/com/rabbitmq/perf/Consumer.java

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import com.rabbitmq.client.DefaultConsumer;
2121
import com.rabbitmq.client.Envelope;
2222
import com.rabbitmq.client.ShutdownSignalException;
23+
import org.slf4j.Logger;
24+
import org.slf4j.LoggerFactory;
2325

2426
import java.io.ByteArrayInputStream;
2527
import java.io.DataInputStream;
@@ -34,7 +36,9 @@
3436

3537
public class Consumer extends AgentBase implements Runnable {
3638

37-
private ConsumerImpl q;
39+
private static final Logger LOGGER = LoggerFactory.getLogger(Consumer.class);
40+
41+
private volatile ConsumerImpl q;
3842
private final Channel channel;
3943
private final String id;
4044
private final List<String> queueNames;
@@ -52,12 +56,15 @@ public class Consumer extends AgentBase implements Runnable {
5256

5357
private final ConsumerState state;
5458

59+
private final Recovery.RecoveryProcess recoveryProcess;
60+
5561
public Consumer(Channel channel, String id,
5662
List<String> queueNames, int txSize, boolean autoAck,
5763
int multiAckEvery, Stats stats, float rateLimit, int msgLimit,
5864
int consumerLatencyInMicroSeconds,
5965
TimestampProvider timestampProvider,
60-
MulticastSet.CompletionHandler completionHandler) {
66+
MulticastSet.CompletionHandler completionHandler,
67+
Recovery.RecoveryProcess recoveryProcess) {
6168

6269
this.channel = channel;
6370
this.id = id;
@@ -97,6 +104,8 @@ public Consumer(Channel channel, String id,
97104
}
98105

99106
this.state = new ConsumerState(rateLimit);
107+
this.recoveryProcess = recoveryProcess;
108+
this.recoveryProcess.init(this);
100109
}
101110

102111
public void run() {
@@ -124,21 +133,22 @@ private ConsumerImpl(Channel channel) {
124133
@Override
125134
public void handleDelivery(String consumerTag, Envelope envelope, BasicProperties properties, byte[] body) throws IOException {
126135
int currentMessageCount = state.incrementMessageCount();
127-
128136
if (msgLimit == 0 || currentMessageCount <= msgLimit) {
129137
long messageTimestamp = timestampExtractor.apply(properties, body);
130138
long nowTimestamp = timestampProvider.getCurrentTime();
131139

132140
if (!autoAck) {
133-
if (multiAckEvery == 0) {
134-
channel.basicAck(envelope.getDeliveryTag(), false);
135-
} else if (currentMessageCount % multiAckEvery == 0) {
136-
channel.basicAck(envelope.getDeliveryTag(), true);
137-
}
141+
dealWithWriteOperation(() -> {
142+
if (multiAckEvery == 0) {
143+
channel.basicAck(envelope.getDeliveryTag(), false);
144+
} else if (currentMessageCount % multiAckEvery == 0) {
145+
channel.basicAck(envelope.getDeliveryTag(), true);
146+
}
147+
}, recoveryProcess);
138148
}
139149

140150
if (txSize != 0 && currentMessageCount % txSize == 0) {
141-
channel.txCommit();
151+
dealWithWriteOperation(() -> channel.txCommit(), recoveryProcess);
142152
}
143153

144154
long diff_time = timestampProvider.getDifference(nowTimestamp, messageTimestamp);
@@ -157,7 +167,14 @@ public void handleDelivery(String consumerTag, Envelope envelope, BasicPropertie
157167

158168
@Override
159169
public void handleShutdownSignal(String consumerTag, ShutdownSignalException sig) {
160-
countDown();
170+
LOGGER.debug(
171+
"Consumer received shutdown signal, recovery process enabled? {}, condition to trigger connection recovery? {}",
172+
recoveryProcess.isEnabled(), isConnectionRecoveryTriggered(sig)
173+
);
174+
if (!recoveryProcess.isEnabled()) {
175+
LOGGER.debug("Counting down for consumer");
176+
countDown();
177+
}
161178
}
162179

163180
@Override
@@ -179,6 +196,21 @@ private void countDown() {
179196
}
180197
}
181198

199+
@Override
200+
public void recover(TopologyRecording topologyRecording) {
201+
for (Map.Entry<String, String> entry : consumerTagBranchMap.entrySet()) {
202+
TopologyRecording.RecordedQueue queue = topologyRecording.queue(entry.getValue());
203+
try {
204+
channel.basicConsume(queue.name(), autoAck, entry.getKey(), q);
205+
} catch (IOException e) {
206+
LOGGER.warn(
207+
"Error while recovering consumer {} on queue {} on connection {}",
208+
entry.getKey(), queue.name(), channel.getConnection().getClientProvidedName(), e
209+
);
210+
}
211+
}
212+
}
213+
182214
private static class ConsumerState implements AgentState {
183215

184216
private final float rateLimit;

0 commit comments

Comments
 (0)