Skip to content

Commit be5a61b

Browse files
committed
Fix ContainerExecProc joinWithTimeout
This change updates the `ContainerExecProc#kill` method to force the finished countdown latch to decrement. It has been observed in some high load clusters where the `joinWithTimeout` timeout is reached but the proc continues to be blocked. When `joinWithTimeout` is called, the `kill` method is called if the task does not complete in time. https://github.com/jenkinsci/jenkins/blob/368f1ccbc967a85c0ff801f3729cb77a269afd41/core/src/main/java/hudson/Proc.java#L165 But if `kill` fails to trigger the `finished` countdown latch then the `join` method will continue to wait indefinitely. https://github.com/jenkinsci/kubernetes-plugin/blob/676ab933d12ad8b25e4d7f78594a32066aad2569/src/main/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/ContainerExecProc.java#L100 By forcing `finished.countDown()` after `close` the join should be unblocked even if the `ctl-c` command didn't trigger the exec listener. `countDown` is a no-op if the latch is already zero.
1 parent 676ab93 commit be5a61b

File tree

2 files changed

+49
-1
lines changed

2 files changed

+49
-1
lines changed

src/main/java/org/csanchez/jenkins/plugins/kubernetes/pipeline/ContainerExecProc.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,15 @@ public void kill() throws IOException, InterruptedException {
8989
} catch (IOException e) {
9090
LOGGER.log(Level.FINE, "Proc kill failed, ignoring", e);
9191
} finally {
92-
close();
92+
try {
93+
close();
94+
} finally {
95+
// Force finish finished latch count down to ensure join unblocks.
96+
// In the wild it has been observed that sometimes the countdown
97+
// latch is not triggered resulting in joinWithTimeout never
98+
// completing even after the timeout interruption has been triggered.
99+
finished.countDown();
100+
}
93101
}
94102
}
95103

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
package org.csanchez.jenkins.plugins.kubernetes.pipeline;
2+
3+
import hudson.model.TaskListener;
4+
import io.fabric8.kubernetes.client.dsl.ExecWatch;
5+
import java.io.ByteArrayOutputStream;
6+
import java.io.PrintStream;
7+
import java.util.concurrent.CountDownLatch;
8+
import java.util.concurrent.TimeUnit;
9+
import java.util.concurrent.atomic.AtomicBoolean;
10+
import org.junit.Rule;
11+
import org.junit.Test;
12+
import org.junit.runner.RunWith;
13+
import org.jvnet.hudson.test.JenkinsRule;
14+
import org.mockito.Mock;
15+
import org.mockito.junit.MockitoJUnitRunner;
16+
17+
@RunWith(MockitoJUnitRunner.class)
18+
public class ContainerExecProcTest {
19+
20+
@Mock
21+
private ExecWatch execWatch;
22+
23+
@Rule
24+
public JenkinsRule j = new JenkinsRule();
25+
26+
/**
27+
* This test validates that {@link hudson.Proc#joinWithTimeout(long, TimeUnit, TaskListener)} is
28+
* terminated properly when the finished count down latch is never triggered by the watch.
29+
*/
30+
@Test(timeout = 20000)
31+
public void testJoinWithTimeoutFinishCountDown() throws Exception {
32+
AtomicBoolean alive = new AtomicBoolean(true);
33+
CountDownLatch finished = new CountDownLatch(1);
34+
ByteArrayOutputStream stdin = new ByteArrayOutputStream();
35+
PrintStream ps = new PrintStream(new ByteArrayOutputStream());
36+
TaskListener listener = TaskListener.NULL;
37+
ContainerExecProc proc = new ContainerExecProc(execWatch, alive, finished, stdin, ps);
38+
proc.joinWithTimeout(1, TimeUnit.SECONDS, listener);
39+
}
40+
}

0 commit comments

Comments
 (0)