Skip to content

Commit 432ef1e

Browse files
acrocagithub-actions[bot]
authored andcommitted
Terminate app process group on exit (#1602)
* Kill app process group when stopping Signed-off-by: Albert Callarisa <albert@diagrid.io> * Address copilot comments Signed-off-by: Albert Callarisa <albert@diagrid.io> * Addressed copilot comments Signed-off-by: Albert Callarisa <albert@diagrid.io> * Addressed copilot comments Signed-off-by: Albert Callarisa <albert@diagrid.io> * Addressed copilot comments Signed-off-by: Albert Callarisa <albert@diagrid.io> * Addressed copilot comments Signed-off-by: Albert Callarisa <albert@diagrid.io> * Addressed copilot comments Signed-off-by: Albert Callarisa <albert@diagrid.io> --------- Signed-off-by: Albert Callarisa <albert@diagrid.io> (cherry picked from commit 35cba49)
1 parent 172fc69 commit 432ef1e

File tree

4 files changed

+186
-9
lines changed

4 files changed

+186
-9
lines changed

cmd/run.go

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -443,8 +443,8 @@ dapr run --run-file /path/to/directory -k
443443
if output.AppErr != nil {
444444
exitWithError = true
445445
print.FailureStatusEvent(os.Stderr, fmt.Sprintf("Error exiting App: %s", output.AppErr))
446-
} else if output.AppCMD != nil && output.AppCMD.Process != nil && (output.AppCMD.ProcessState == nil || !output.AppCMD.ProcessState.Exited()) {
447-
err = output.AppCMD.Process.Kill()
446+
} else if output.AppCMD != nil && output.AppCMD.Process != nil {
447+
err = killProcessGroup(output.AppCMD.Process)
448448
if err != nil {
449449
// If the process already exited on its own, treat this as a clean shutdown.
450450
if errors.Is(err, os.ErrProcessDone) {
@@ -826,7 +826,7 @@ func stopDaprdAndAppProcesses(runState *runExec.RunExec) bool {
826826
if appErr != nil {
827827
exitWithError = true
828828
print.StatusEvent(runState.AppCMD.ErrorWriter, print.LogFailure, "Error exiting App: %s", appErr)
829-
} else if runState.AppCMD.Command != nil && runState.AppCMD.Command.Process != nil && (runState.AppCMD.Command.ProcessState == nil || !runState.AppCMD.Command.ProcessState.Exited()) {
829+
} else if runState.AppCMD.Command != nil && runState.AppCMD.Command.Process != nil {
830830
err = killAppProcess(runState)
831831
if err != nil {
832832
exitWithError = true
@@ -1002,12 +1002,7 @@ func killAppProcess(runE *runExec.RunExec) error {
10021002
if runE.AppCMD.Command == nil || runE.AppCMD.Command.Process == nil {
10031003
return nil
10041004
}
1005-
// Check if the process has already exited on its own.
1006-
if runE.AppCMD.Command.ProcessState != nil && runE.AppCMD.Command.ProcessState.Exited() {
1007-
// Process already exited, no need to kill it.
1008-
return nil
1009-
}
1010-
err := runE.AppCMD.Command.Process.Kill()
1005+
err := killProcessGroup(runE.AppCMD.Command.Process)
10111006
if err != nil {
10121007
// If the process already exited on its own
10131008
if errors.Is(err, os.ErrProcessDone) {

cmd/run_unix.go

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,77 @@ limitations under the License.
1616
package cmd
1717

1818
import (
19+
"errors"
1920
"fmt"
2021
"os"
2122
"os/exec"
2223
"syscall"
24+
"time"
2325

2426
"github.com/dapr/cli/pkg/print"
2527
runExec "github.com/dapr/cli/pkg/runexec"
2628
)
2729

30+
// killProcessGroup kills the entire process group of the given process so that
31+
// grandchild processes (e.g. the compiled binary spawned by `go run`) are also
32+
// terminated. It sends SIGTERM first; if the process group is still alive after
33+
// a 5-second grace period, it sends SIGKILL.
34+
func killProcessGroup(process *os.Process) error {
35+
var (
36+
pgid int
37+
err error
38+
)
39+
40+
pgid, err = syscall.Getpgid(process.Pid)
41+
if err != nil {
42+
if errors.Is(err, syscall.ESRCH) {
43+
// The group leader may have already exited (e.g. when using `go run`),
44+
// but other processes in the same process group can still be alive.
45+
// Since the app is started with Setpgid=true, the PGID equals the leader
46+
// PID, so fall back to using process.Pid as the PGID.
47+
pgid = process.Pid
48+
} else {
49+
// Can't determine pgid for some other reason — fall back to single-process kill.
50+
killErr := process.Kill()
51+
if errors.Is(killErr, os.ErrProcessDone) {
52+
return nil
53+
}
54+
return killErr
55+
}
56+
}
57+
58+
err = syscall.Kill(-pgid, syscall.SIGTERM)
59+
if err != nil {
60+
if err == syscall.ESRCH {
61+
return nil // process group already gone
62+
}
63+
return fmt.Errorf("failed to send SIGTERM to process group %d: %w", pgid, err)
64+
}
65+
66+
const gracePeriod = 5 * time.Second
67+
deadline := time.Now().Add(gracePeriod)
68+
for time.Now().Before(deadline) {
69+
err = syscall.Kill(-pgid, 0)
70+
if err == nil {
71+
time.Sleep(100 * time.Millisecond)
72+
continue
73+
}
74+
if errors.Is(err, syscall.ESRCH) {
75+
return nil // process group gone
76+
}
77+
return fmt.Errorf("failed to check status of process group %d: %w", pgid, err)
78+
}
79+
// Grace period elapsed — force kill.
80+
err = syscall.Kill(-pgid, syscall.SIGKILL)
81+
if err == syscall.ESRCH {
82+
return nil
83+
}
84+
if err != nil {
85+
return fmt.Errorf("failed to send SIGKILL to process group %d: %w", pgid, err)
86+
}
87+
return nil
88+
}
89+
2890
// setDaprProcessGroupForRun sets the process group on the daprd command so the
2991
// sidecar can be managed independently (e.g. when the app is started via exec).
3092
func setDaprProcessGroupForRun(cmd *exec.Cmd) {

cmd/run_unix_test.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
//go:build !windows
2+
3+
/*
4+
Copyright 2026 The Dapr Authors
5+
Licensed under the Apache License, Version 2.0 (the "License");
6+
you may not use this file except in compliance with the License.
7+
You may obtain a copy of the License at
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
*/
15+
16+
package cmd
17+
18+
import (
19+
"os/exec"
20+
"syscall"
21+
"testing"
22+
"time"
23+
24+
"github.com/stretchr/testify/assert"
25+
"github.com/stretchr/testify/require"
26+
)
27+
28+
// TestKillProcessGroup_KillsEntireGroup verifies that killProcessGroup terminates all
29+
// processes in the group, not just the leader. A second process is explicitly placed
30+
// into the leader's process group using SysProcAttr.Pgid; both must be gone after
31+
// killProcessGroup returns.
32+
func TestKillProcessGroup_KillsEntireGroup(t *testing.T) {
33+
leader := exec.Command("sleep", "100")
34+
leader.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
35+
require.NoError(t, leader.Start())
36+
37+
// Explicitly place a second process in the same process group as the leader.
38+
member := exec.Command("sleep", "100")
39+
member.SysProcAttr = &syscall.SysProcAttr{Setpgid: true, Pgid: leader.Process.Pid}
40+
require.NoError(t, member.Start())
41+
42+
// Wait goroutines reap the processes once killed, so Kill(-pgid, 0) eventually
43+
// returns ESRCH rather than seeing zombies that still count as group members.
44+
go func() { _ = leader.Wait() }()
45+
go func() { _ = member.Wait() }()
46+
47+
require.NoError(t, killProcessGroup(leader.Process))
48+
49+
assert.Eventually(t, func() bool {
50+
return syscall.Kill(-leader.Process.Pid, 0) == syscall.ESRCH
51+
}, 2*time.Second, 50*time.Millisecond, "process group should be gone after killProcessGroup")
52+
}
53+
54+
// TestKillProcessGroup_AlreadyGone verifies that killProcessGroup returns nil when
55+
// the process has already exited cleanly before kill is attempted.
56+
func TestKillProcessGroup_AlreadyGone(t *testing.T) {
57+
cmd := exec.Command("true")
58+
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
59+
require.NoError(t, cmd.Start())
60+
proc := cmd.Process
61+
require.NoError(t, cmd.Wait())
62+
63+
assert.NoError(t, killProcessGroup(proc))
64+
}
65+
66+
// TestKillProcessGroup_LeaderExitedGroupAlive verifies that killProcessGroup still
67+
// kills remaining group members when the leader has already exited — the typical
68+
// `go run` scenario where the wrapper process exits but the compiled binary lives on
69+
// in the same process group.
70+
func TestKillProcessGroup_LeaderExitedGroupAlive(t *testing.T) {
71+
leader := exec.Command("sleep", "100")
72+
leader.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
73+
require.NoError(t, leader.Start())
74+
leaderPGID := leader.Process.Pid
75+
76+
// Place a second process in the same group as the leader.
77+
member := exec.Command("sleep", "100")
78+
member.SysProcAttr = &syscall.SysProcAttr{Setpgid: true, Pgid: leaderPGID}
79+
require.NoError(t, member.Start())
80+
81+
// Kill only the group leader; member stays alive in the same group.
82+
require.NoError(t, leader.Process.Kill())
83+
_ = leader.Wait() // reap immediately so it doesn't linger as a zombie
84+
85+
// Confirm the group still has a live member before we call killProcessGroup.
86+
require.NoError(t, syscall.Kill(-leaderPGID, 0), "member should still be alive in the group")
87+
88+
// killProcessGroup falls back to process.Pid as PGID since Getpgid returns ESRCH
89+
// for the dead leader, then signals and terminates the remaining member.
90+
go func() { _ = member.Wait() }()
91+
92+
require.NoError(t, killProcessGroup(leader.Process))
93+
94+
assert.Eventually(t, func() bool {
95+
return syscall.Kill(-leaderPGID, 0) == syscall.ESRCH
96+
}, 2*time.Second, 50*time.Millisecond, "process group should be gone after leader exits")
97+
}

cmd/run_windows.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,33 @@ import (
1919
"fmt"
2020
"os"
2121
"os/exec"
22+
"strconv"
23+
24+
"github.com/kolesnikovae/go-winjob"
2225

2326
"github.com/dapr/cli/pkg/print"
2427
runExec "github.com/dapr/cli/pkg/runexec"
28+
"github.com/dapr/cli/utils"
2529
)
2630

31+
// killProcessGroup on Windows terminates the entire process tree by terminating the
32+
// job object associated with the current CLI process (keyed by os.Getpid()). The
33+
// process argument is only used for the fallback path when no job object can be
34+
// opened (e.g. the process was never attached to one).
35+
func killProcessGroup(process *os.Process) error {
36+
jobName := utils.GetJobObjectNameFromPID(strconv.Itoa(os.Getpid()))
37+
jbobj, err := winjob.Open(jobName)
38+
if err != nil {
39+
if os.IsNotExist(err) {
40+
// Job object not found — process was never attached. Fall back to single-process kill.
41+
return process.Kill()
42+
}
43+
return fmt.Errorf("failed to open job object %q: %w", jobName, err)
44+
}
45+
defer jbobj.Close()
46+
return jbobj.TerminateWithExitCode(0)
47+
}
48+
2749
// setDaprProcessGroupForRun is a no-op on Windows (SysProcAttr.Setpgid does not exist).
2850
func setDaprProcessGroupForRun(cmd *exec.Cmd) {
2951
// no-op on Windows
@@ -48,6 +70,7 @@ func startAppProcessInBackground(output *runExec.RunOutput, binary string, args
4870
if err := output.AppCMD.Start(); err != nil {
4971
return fmt.Errorf("failed to start app: %w", err)
5072
}
73+
utils.AttachJobObjectToProcess(strconv.Itoa(os.Getpid()), output.AppCMD.Process)
5174

5275
go func() {
5376
waitErr := output.AppCMD.Wait()

0 commit comments

Comments
 (0)