Skip to content

Commit 628bdea

Browse files
author
Marcelo Vanzin
committed
[SPARK-17742][CORE] Fail launcher app handle if child process exits with error.
This is a follow up to cba826d; that commit set the app handle state to "LOST" when the child process exited, but that can be ambiguous. This change sets the state to "FAILED" if the exit code was non-zero and the handle state wasn't a failure state, or "LOST" if the exit status was zero. Author: Marcelo Vanzin <[email protected]> Closes apache#19012 from vanzin/SPARK-17742.
1 parent 1813c4a commit 628bdea

File tree

2 files changed

+41
-7
lines changed

2 files changed

+41
-7
lines changed

launcher/src/main/java/org/apache/spark/launcher/ChildProcAppHandle.java

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,15 @@ synchronized void setAppId(String appId) {
156156
* the exit code.
157157
*/
158158
void monitorChild() {
159-
while (childProc.isAlive()) {
159+
Process proc = childProc;
160+
if (proc == null) {
161+
// Process may have already been disposed of, e.g. by calling kill().
162+
return;
163+
}
164+
165+
while (proc.isAlive()) {
160166
try {
161-
childProc.waitFor();
167+
proc.waitFor();
162168
} catch (Exception e) {
163169
LOG.log(Level.WARNING, "Exception waiting for child process to exit.", e);
164170
}
@@ -173,15 +179,24 @@ void monitorChild() {
173179

174180
int ec;
175181
try {
176-
ec = childProc.exitValue();
182+
ec = proc.exitValue();
177183
} catch (Exception e) {
178184
LOG.log(Level.WARNING, "Exception getting child process exit code, assuming failure.", e);
179185
ec = 1;
180186
}
181187

182-
// Only override the success state; leave other fail states alone.
183-
if (!state.isFinal() || (ec != 0 && state == State.FINISHED)) {
184-
state = State.LOST;
188+
State newState = null;
189+
if (ec != 0) {
190+
// Override state with failure if the current state is not final, or is success.
191+
if (!state.isFinal() || state == State.FINISHED) {
192+
newState = State.FAILED;
193+
}
194+
} else if (!state.isFinal()) {
195+
newState = State.LOST;
196+
}
197+
198+
if (newState != null) {
199+
state = newState;
185200
fireEvent(false);
186201
}
187202
}

launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@ public class ChildProcAppHandleSuite extends BaseSuite {
4646
private static final List<String> TEST_SCRIPT = Arrays.asList(
4747
"#!/bin/sh",
4848
"echo \"output\"",
49-
"echo \"error\" 1>&2");
49+
"echo \"error\" 1>&2",
50+
"while [ -n \"$1\" ]; do EC=$1; shift; done",
51+
"exit $EC");
5052

5153
private static File TEST_SCRIPT_PATH;
5254

@@ -176,6 +178,7 @@ public void testRedirectErrorTwiceFails() throws Exception {
176178

177179
@Test
178180
public void testProcMonitorWithOutputRedirection() throws Exception {
181+
assumeFalse(isWindows());
179182
File err = Files.createTempFile("out", "txt").toFile();
180183
SparkAppHandle handle = new TestSparkLauncher()
181184
.redirectError()
@@ -187,13 +190,24 @@ public void testProcMonitorWithOutputRedirection() throws Exception {
187190

188191
@Test
189192
public void testProcMonitorWithLogRedirection() throws Exception {
193+
assumeFalse(isWindows());
190194
SparkAppHandle handle = new TestSparkLauncher()
191195
.redirectToLog(getClass().getName())
192196
.startApplication();
193197
waitFor(handle);
194198
assertEquals(SparkAppHandle.State.LOST, handle.getState());
195199
}
196200

201+
@Test
202+
public void testFailedChildProc() throws Exception {
203+
assumeFalse(isWindows());
204+
SparkAppHandle handle = new TestSparkLauncher(1)
205+
.redirectToLog(getClass().getName())
206+
.startApplication();
207+
waitFor(handle);
208+
assertEquals(SparkAppHandle.State.FAILED, handle.getState());
209+
}
210+
197211
private void waitFor(SparkAppHandle handle) throws Exception {
198212
long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(10);
199213
try {
@@ -212,7 +226,12 @@ private void waitFor(SparkAppHandle handle) throws Exception {
212226
private static class TestSparkLauncher extends SparkLauncher {
213227

214228
TestSparkLauncher() {
229+
this(0);
230+
}
231+
232+
TestSparkLauncher(int ec) {
215233
setAppResource("outputredirtest");
234+
addAppArgs(String.valueOf(ec));
216235
}
217236

218237
@Override

0 commit comments

Comments
 (0)