Skip to content

Commit 8981fa2

Browse files
authored
IGNITE-27324 Stop the node in a separate thread in failure handler (#7235)
1 parent 92b0c32 commit 8981fa2

File tree

7 files changed

+64
-21
lines changed

7 files changed

+64
-21
lines changed

modules/failure-handler/src/integrationTest/java/org/apache/ignite/internal/failure/handlers/FailureHandlerTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ protected int initialNodes() {
4646

4747
@Test
4848
void testStopNodeFailureHandler() {
49-
testFailureHandler(node -> new StopNodeFailureHandler(node::shutdown));
49+
testFailureHandler(node -> new StopNodeFailureHandler("test-node", node::shutdown));
5050
}
5151

5252
@Test

modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/FailureManager.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ public class FailureManager implements FailureProcessor, IgniteComponent {
6565
private static final String IGNORED_FAILURE_LOG_MSG = "Possible failure suppressed according to a configured handler "
6666
+ "[hnd={}, failureCtx={}, failureCtxId={}]";
6767

68+
/** Ignite node name. */
69+
private final String nodeName;
70+
6871
/** Failure processor configuration. */
6972
private final FailureProcessorConfiguration configuration;
7073

@@ -100,7 +103,9 @@ public class FailureManager implements FailureProcessor, IgniteComponent {
100103
*
101104
* @param handler Handler.
102105
*/
106+
@TestOnly
103107
public FailureManager(FailureHandler handler) {
108+
this.nodeName = "test-node";
104109
this.nodeStopper = () -> {};
105110
this.handler = handler;
106111
this.configuration = null;
@@ -109,10 +114,12 @@ public FailureManager(FailureHandler handler) {
109114
/**
110115
* Creates a new instance of a failure processor.
111116
*
117+
* @param nodeName Node name.
112118
* @param nodeStopper Node stopper.
113119
* @param configuration Failure processor configuration.
114120
*/
115-
public FailureManager(NodeStopper nodeStopper, FailureProcessorConfiguration configuration) {
121+
public FailureManager(String nodeName, NodeStopper nodeStopper, FailureProcessorConfiguration configuration) {
122+
this.nodeName = nodeName;
116123
this.nodeStopper = nodeStopper;
117124
this.configuration = configuration;
118125
}
@@ -263,11 +270,11 @@ private synchronized void reconfigure(FailureProcessorView newConfiguration) {
263270
break;
264271

265272
case StopNodeFailureHandlerConfigurationSchema.TYPE:
266-
hnd = new StopNodeFailureHandler(nodeStopper);
273+
hnd = new StopNodeFailureHandler(nodeName, nodeStopper);
267274
break;
268275

269276
case StopNodeOrHaltFailureHandlerConfigurationSchema.TYPE:
270-
hnd = new StopNodeOrHaltFailureHandler(nodeStopper, (StopNodeOrHaltFailureHandlerView) handlerView);
277+
hnd = new StopNodeOrHaltFailureHandler(nodeName, nodeStopper, (StopNodeOrHaltFailureHandlerView) handlerView);
271278
break;
272279

273280
default:

modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeFailureHandler.java

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,37 @@
1919

2020
import org.apache.ignite.internal.failure.FailureContext;
2121
import org.apache.ignite.internal.failure.NodeStopper;
22+
import org.apache.ignite.internal.logger.IgniteLogger;
23+
import org.apache.ignite.internal.logger.Loggers;
24+
import org.apache.ignite.internal.thread.IgniteThreadFactory;
25+
import org.apache.ignite.internal.thread.ThreadOperation;
2226
import org.apache.ignite.internal.tostring.IgniteToStringExclude;
2327
import org.apache.ignite.internal.tostring.S;
2428

2529
/**
2630
* Handler will stop node in case of critical error using provided {@link NodeStopper}.
2731
*/
2832
public class StopNodeFailureHandler extends AbstractFailureHandler {
33+
private static final IgniteLogger LOG = Loggers.forClass(StopNodeFailureHandler.class);
34+
35+
/** Ignite node name. */
36+
private final String nodeName;
37+
2938
@IgniteToStringExclude
3039
private final NodeStopper nodeStopper;
3140

32-
public StopNodeFailureHandler(NodeStopper nodeStopper) {
41+
public StopNodeFailureHandler(String nodeName, NodeStopper nodeStopper) {
42+
this.nodeName = nodeName;
3343
this.nodeStopper = nodeStopper;
3444
}
3545

3646
@Override
3747
protected boolean handle(FailureContext failureCtx) {
38-
nodeStopper.stopNode();
48+
IgniteThreadFactory threadFactory = IgniteThreadFactory.create(nodeName, "node-stopper", true, LOG, ThreadOperation.values());
49+
50+
Thread nodeStopperThread = threadFactory.newThread(nodeStopper::stopNode);
51+
52+
nodeStopperThread.start();
3953

4054
return true;
4155
}

modules/failure-handler/src/main/java/org/apache/ignite/internal/failure/handlers/StopNodeOrHaltFailureHandler.java

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@
2424
import org.apache.ignite.internal.failure.handlers.configuration.StopNodeOrHaltFailureHandlerView;
2525
import org.apache.ignite.internal.logger.IgniteLogger;
2626
import org.apache.ignite.internal.logger.Loggers;
27-
import org.apache.ignite.internal.thread.LogUncaughtExceptionHandler;
27+
import org.apache.ignite.internal.thread.IgniteThreadFactory;
28+
import org.apache.ignite.internal.thread.ThreadOperation;
2829
import org.apache.ignite.internal.tostring.IgniteToStringExclude;
2930
import org.apache.ignite.internal.tostring.S;
31+
import org.jetbrains.annotations.TestOnly;
3032

3133
/**
3234
* Handler will try to stop node if {@code tryStop} value is {@code true}.
@@ -42,6 +44,9 @@ public class StopNodeOrHaltFailureHandler extends AbstractFailureHandler {
4244
*/
4345
private static final int KILL_EXIT_CODE = 130;
4446

47+
/** Ignite node name. */
48+
private String nodeName;
49+
4550
/** Node stopper. */
4651
@IgniteToStringExclude
4752
private final NodeStopper nodeStopper;
@@ -59,6 +64,7 @@ public class StopNodeOrHaltFailureHandler extends AbstractFailureHandler {
5964
* @param tryStop Try stop.
6065
* @param timeout Stop node timeout in milliseconds.
6166
*/
67+
@TestOnly
6268
public StopNodeOrHaltFailureHandler(NodeStopper nodeStopper, boolean tryStop, long timeout) {
6369
this.nodeStopper = nodeStopper;
6470
this.tryStop = tryStop;
@@ -68,10 +74,12 @@ public StopNodeOrHaltFailureHandler(NodeStopper nodeStopper, boolean tryStop, lo
6874
/**
6975
* Creates a new instance of a failure processor.
7076
*
77+
* @param nodeName Node name.
7178
* @param nodeStopper Node stopper.
7279
* @param view Configuration view.
7380
*/
74-
public StopNodeOrHaltFailureHandler(NodeStopper nodeStopper, StopNodeOrHaltFailureHandlerView view) {
81+
public StopNodeOrHaltFailureHandler(String nodeName, NodeStopper nodeStopper, StopNodeOrHaltFailureHandlerView view) {
82+
this.nodeName = nodeName;
7583
this.nodeStopper = nodeStopper;
7684
tryStop = view.tryStop();
7785
timeout = view.timeoutMillis();
@@ -82,18 +90,33 @@ protected boolean handle(FailureContext failureCtx) {
8290
if (tryStop) {
8391
CountDownLatch latch = new CountDownLatch(1);
8492

85-
Thread stopperThread = new Thread(
93+
IgniteThreadFactory stopThreadFactory = IgniteThreadFactory.create(
94+
nodeName,
95+
"node-stopper",
96+
true,
97+
LOG,
98+
ThreadOperation.values()
99+
);
100+
101+
Thread stopperThread = stopThreadFactory.newThread(
86102
() -> {
87103
nodeStopper.stopNode();
88104

89105
latch.countDown();
90-
},
91-
"node-stopper"
106+
}
92107
);
93-
stopperThread.setUncaughtExceptionHandler(new LogUncaughtExceptionHandler(LOG));
108+
94109
stopperThread.start();
95110

96-
Thread haltOnStopTimeoutThread = new Thread(
111+
IgniteThreadFactory haltThreadFactory = IgniteThreadFactory.create(
112+
nodeName,
113+
"jvm-halt-on-stop-timeout",
114+
true,
115+
LOG,
116+
ThreadOperation.values()
117+
);
118+
119+
Thread haltOnStopTimeoutThread = haltThreadFactory.newThread(
97120
() -> {
98121
try {
99122
if (!latch.await(timeout, TimeUnit.MILLISECONDS)) {
@@ -102,10 +125,9 @@ protected boolean handle(FailureContext failureCtx) {
102125
} catch (InterruptedException e) {
103126
// No-op.
104127
}
105-
},
106-
"jvm-halt-on-stop-timeout"
128+
}
107129
);
108-
haltOnStopTimeoutThread.setUncaughtExceptionHandler(new LogUncaughtExceptionHandler(LOG));
130+
109131
haltOnStopTimeoutThread.start();
110132
} else {
111133
Runtime.getRuntime().halt(KILL_EXIT_CODE);

modules/failure-handler/src/test/java/org/apache/ignite/internal/failure/FailureProcessorTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ public void testIgnoredFailureTypes() {
9494

9595
@Test
9696
public void testDefaultFailureHandlerConfiguration() {
97-
FailureManager failureManager = new FailureManager(() -> {}, failureProcessorConfiguration);
97+
FailureManager failureManager = new FailureManager("test-node", () -> {}, failureProcessorConfiguration);
9898

9999
try {
100100
assertThat(failureManager.startAsync(new ComponentContext()), willSucceedFast());
@@ -114,7 +114,7 @@ public void testDefaultFailureHandlerConfiguration() {
114114

115115
@Test
116116
public void testFailureProcessorReconfiguration() {
117-
FailureManager failureManager = new FailureManager(() -> {}, failureProcessorConfiguration);
117+
FailureManager failureManager = new FailureManager("test-node", () -> {}, failureProcessorConfiguration);
118118

119119
try {
120120
assertThat(failureManager.startAsync(new ComponentContext()), willSucceedFast());

modules/failure-handler/src/test/java/org/apache/ignite/internal/failure/FailureProcessorThreadDumpThrottlingTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ public void testNoThreadDumps() {
7272

7373
logInspector.start();
7474
try {
75-
FailureManager failureManager = new FailureManager(() -> {}, disabledThreadDumpConfiguration);
75+
FailureManager failureManager = new FailureManager("test-node", () -> {}, disabledThreadDumpConfiguration);
7676

7777
try {
7878
assertThat(failureManager.startAsync(new ComponentContext()), willSucceedFast());
@@ -217,7 +217,7 @@ public void testThrottlingPerFailureType() {
217217
* Creates a new instance of {@link FailureManager} with the given configuration and runs the test represented by {@code test} closure.
218218
*/
219219
static void testFailureProcessing(FailureProcessorConfiguration configuration, Consumer<FailureProcessor> test) {
220-
FailureManager failureManager = new FailureManager(() -> {}, configuration);
220+
FailureManager failureManager = new FailureManager("test-node", () -> {}, configuration);
221221

222222
try {
223223
assertThat(failureManager.startAsync(new ComponentContext()), willSucceedFast());

modules/runner/src/main/java/org/apache/ignite/internal/app/IgniteImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -598,7 +598,7 @@ public class IgniteImpl implements Ignite {
598598

599599
FailureProcessorConfiguration failureProcessorConfiguration = nodeConfigRegistry.getConfiguration(
600600
FailureProcessorExtensionConfiguration.KEY).failureHandler();
601-
failureManager = new FailureManager(node::shutdown, failureProcessorConfiguration);
601+
failureManager = new FailureManager(name, node::shutdown, failureProcessorConfiguration);
602602

603603
SystemLocalConfiguration systemConfiguration = nodeConfigRegistry.getConfiguration(SystemLocalExtensionConfiguration.KEY).system();
604604

0 commit comments

Comments
 (0)