Skip to content

Commit 08c1b71

Browse files
JeremyDahlgrenomricohenn
authored andcommitted
Disable logging in ClusterFormationFailureHelper on shutdown. (elastic#125244)
Modifies Coordinator to enable logging in ClusterFormationFailureHelper when started and disables logging in ClusterFormationFailureHelper when stopped. The warning scheduler handling and invariant check in the Coordinator are left as is, with the logging boolean set independently, eliminating the need to hold the mutex in doStop() when Coordinator.stop() is called when the Node is shutdown. Closes elastic#105559. * Update docs/changelog/125244.yaml
1 parent 6ed0d63 commit 08c1b71

File tree

4 files changed

+55
-2
lines changed

4 files changed

+55
-2
lines changed

docs/changelog/125244.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 125244
2+
summary: Disable logging in `ClusterFormationFailureHelper` on shutdown
3+
area: Cluster Coordination
4+
type: bug
5+
issues:
6+
- 105559

server/src/main/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelper.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ public class ClusterFormationFailureHelper {
6868
private final Runnable logLastFailedJoinAttempt;
6969
@Nullable // if no warning is scheduled
7070
private volatile WarningScheduler warningScheduler;
71+
private volatile boolean loggingEnabled;
7172

7273
/**
7374
* Works with the {@link JoinHelper} to log the latest node-join attempt failure and cluster state debug information. Must call
@@ -90,6 +91,11 @@ public ClusterFormationFailureHelper(
9091
this.clusterCoordinationExecutor = threadPool.executor(Names.CLUSTER_COORDINATION);
9192
this.clusterFormationWarningTimeout = DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING.get(settings);
9293
this.logLastFailedJoinAttempt = logLastFailedJoinAttempt;
94+
this.loggingEnabled = true;
95+
}
96+
97+
public void setLoggingEnabled(boolean enabled) {
98+
this.loggingEnabled = enabled;
9399
}
94100

95101
public boolean isRunning() {
@@ -98,7 +104,7 @@ public boolean isRunning() {
98104

99105
/**
100106
* Schedules a warning debug message to be logged in 'clusterFormationWarningTimeout' time, and periodically thereafter, until
101-
* {@link ClusterFormationState#stop()} has been called.
107+
* {@link ClusterFormationFailureHelper#stop()} has been called.
102108
*/
103109
public void start() {
104110
assert warningScheduler == null;
@@ -125,7 +131,7 @@ public void onFailure(Exception e) {
125131

126132
@Override
127133
protected void doRun() {
128-
if (isActive()) {
134+
if (isActive() && loggingEnabled) {
129135
logLastFailedJoinAttempt.run();
130136
logger.warn(
131137
"{}; for troubleshooting guidance, see {}",

server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,6 +1104,7 @@ protected void doStart() {
11041104
applierState = initialState;
11051105
clusterApplier.setInitialState(initialState);
11061106
}
1107+
clusterFormationFailureHelper.setLoggingEnabled(true);
11071108
}
11081109

11091110
public DiscoveryStats stats() {
@@ -1126,6 +1127,7 @@ public void startInitialJoin() {
11261127
protected void doStop() {
11271128
configuredHostsResolver.stop();
11281129
joinValidationService.stop();
1130+
clusterFormationFailureHelper.setLoggingEnabled(false);
11291131
}
11301132

11311133
@Override

server/src/test/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelperTests.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,45 @@ public void testScheduling() {
171171

172172
assertThat(warningCount.get(), is(5L));
173173
assertThat(logLastFailedJoinAttemptWarningCount.get(), is(5L));
174+
175+
// Temporarily disable logging and verify we don't get incremented logging counts.
176+
clusterFormationFailureHelper.setLoggingEnabled(false);
177+
warningCount.set(0);
178+
logLastFailedJoinAttemptWarningCount.set(0);
179+
clusterFormationFailureHelper.start();
180+
clusterFormationFailureHelper.stop();
181+
clusterFormationFailureHelper.start();
182+
final long thirdStartTimeMillis = deterministicTaskQueue.getCurrentTimeMillis();
183+
184+
while (deterministicTaskQueue.getCurrentTimeMillis() - thirdStartTimeMillis < 5 * expectedDelayMillis) {
185+
assertTrue(clusterFormationFailureHelper.isRunning());
186+
if (deterministicTaskQueue.hasRunnableTasks()) {
187+
deterministicTaskQueue.runRandomTask();
188+
} else {
189+
deterministicTaskQueue.advanceTime();
190+
}
191+
}
192+
193+
assertThat(warningCount.get(), is(0L));
194+
assertThat(logLastFailedJoinAttemptWarningCount.get(), is(0L));
195+
196+
// Re-enable logging and verify the logging counts again.
197+
clusterFormationFailureHelper.stop();
198+
clusterFormationFailureHelper.start();
199+
clusterFormationFailureHelper.setLoggingEnabled(true);
200+
final long fourthStartTimeMillis = deterministicTaskQueue.getCurrentTimeMillis();
201+
202+
while (warningCount.get() < 5) {
203+
assertTrue(clusterFormationFailureHelper.isRunning());
204+
if (deterministicTaskQueue.hasRunnableTasks()) {
205+
deterministicTaskQueue.runRandomTask();
206+
} else {
207+
deterministicTaskQueue.advanceTime();
208+
}
209+
}
210+
assertThat(deterministicTaskQueue.getCurrentTimeMillis() - fourthStartTimeMillis, equalTo(5 * expectedDelayMillis));
211+
assertThat(warningCount.get(), is(5L));
212+
assertThat(logLastFailedJoinAttemptWarningCount.get(), is(5L));
174213
}
175214

176215
public void testDescriptionOnMasterIneligibleNodes() {

0 commit comments

Comments
 (0)