Skip to content

Commit 9f44a4f

Browse files
committed
Fix slow docker warning logic
1 parent fbf257d commit 9f44a4f

File tree

4 files changed

+183
-48
lines changed

4 files changed

+183
-48
lines changed

pkg/drivers/kic/oci/cli_runner.go

Lines changed: 137 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -145,24 +145,38 @@ func runCmd(cmd *exec.Cmd, warnSlow ...bool) (*RunResult, error) {
145145
killTime := 19 * time.Second // this will be applied only if warnSlow is true
146146
warnTime := 2 * time.Second
147147

148-
if cmd.Args[1] == "volume" || cmd.Args[1] == "ps" { // volume and ps requires more time than inspect
149-
killTime = 30 * time.Second
150-
warnTime = 3 * time.Second
151-
}
152-
153-
ctx, cancel := context.WithTimeout(context.Background(), killTime)
154-
defer cancel()
155-
156-
if warn { // convert exec.Command to with context
157-
cmdWithCtx := exec.CommandContext(ctx, cmd.Args[0], cmd.Args[1:]...)
158-
cmdWithCtx.Stdout = cmd.Stdout // copying the original command
159-
cmdWithCtx.Stderr = cmd.Stderr
160-
cmd = cmdWithCtx
148+
// Increase timeout for operations that typically take longer
149+
if len(cmd.Args) > 1 {
150+
switch cmd.Args[1] {
151+
case "volume", "ps":
152+
// volume and ps requires more time than inspect
153+
killTime = 30 * time.Second
154+
warnTime = 3 * time.Second
155+
case "rm", "remove":
156+
// Container/volume removal can take longer, especially if containers are stuck
157+
killTime = 60 * time.Second // CHANGE: Increase from 45 to 60 seconds
158+
warnTime = 5 * time.Second
159+
case "stop", "kill":
160+
// Stop/kill operations can take time if containers are unresponsive
161+
killTime = 30 * time.Second
162+
warnTime = 3 * time.Second
163+
case "network":
164+
// Network operations can be slow during cleanup
165+
killTime = 30 * time.Second
166+
warnTime = 3 * time.Second
167+
case "prune":
168+
// Prune operations can take a very long time
169+
killTime = 120 * time.Second
170+
warnTime = 10 * time.Second
171+
}
161172
}
162173

163174
rr := &RunResult{Args: cmd.Args}
164175
klog.Infof("Run: %v", rr.Command())
165176

177+
// Determine the actual OCI binary name for user-facing messages
178+
actualBinaryName := getActualBinaryName(cmd.Args)
179+
166180
var outb, errb io.Writer
167181
if cmd.Stdout == nil {
168182
var so bytes.Buffer
@@ -182,31 +196,98 @@ func runCmd(cmd *exec.Cmd, warnSlow ...bool) (*RunResult, error) {
182196
cmd.Stderr = errb
183197

184198
start := time.Now()
185-
err := cmd.Run()
186-
elapsed := time.Since(start)
187-
if warn && !out.JSON && !suppressDockerMessage() {
188-
if elapsed > warnTime {
189-
warnLock.Lock()
190-
_, ok := alreadyWarnedCmds[rr.Command()]
191-
if !ok {
192-
alreadyWarnedCmds[rr.Command()] = true
193-
}
194-
warnLock.Unlock()
199+
var err error
200+
201+
if warn {
202+
// Use context with timeout for proper cancellation
203+
ctx, cancel := context.WithTimeout(context.Background(), killTime)
204+
defer cancel()
195205

196-
if !ok {
197-
out.WarningT(`Executing "{{.command}}" took an unusually long time: {{.duration}}`, out.V{"command": rr.Command(), "duration": elapsed})
198-
// Don't show any restarting hint, when running podman locally (on linux, with sudo). Only when having a service.
199-
if cmd.Args[0] != "sudo" {
200-
out.ErrT(style.Tip, `Restarting the {{.name}} service may improve performance.`, out.V{"name": cmd.Args[0]})
206+
// Create command with context
207+
cmdWithCtx := exec.CommandContext(ctx, cmd.Args[0], cmd.Args[1:]...)
208+
cmdWithCtx.Stdout = cmd.Stdout
209+
cmdWithCtx.Stderr = cmd.Stderr
210+
cmdWithCtx.Env = cmd.Env
211+
cmdWithCtx.Dir = cmd.Dir
212+
213+
// Channel to track completion
214+
done := make(chan error, 1)
215+
var warnTimer *time.Timer
216+
217+
// Start the command in a goroutine
218+
go func() {
219+
done <- cmdWithCtx.Run()
220+
}()
221+
222+
// Set up warning timer
223+
if !out.JSON && !suppressDockerMessage() {
224+
warnTimer = time.AfterFunc(warnTime, func() {
225+
warnLock.Lock()
226+
_, ok := alreadyWarnedCmds[rr.Command()]
227+
if !ok {
228+
alreadyWarnedCmds[rr.Command()] = true
229+
warnLock.Unlock()
230+
231+
// Show operation-specific warning messages
232+
if len(cmd.Args) > 1 {
233+
switch cmd.Args[1] {
234+
case "ps":
235+
out.WarningT(`"{{.command}}" is taking longer than expected. This may indicate {{.name}} is hanging.`, out.V{"command": rr.Command(), "name": actualBinaryName})
236+
case "rm", "remove":
237+
out.WarningT(`"{{.command}}" is taking longer than expected. Container may be stuck - please be patient.`, out.V{"command": rr.Command()})
238+
case "volume":
239+
out.WarningT(`"{{.command}}" is taking longer than expected. Volume operations can be slow.`, out.V{"command": rr.Command()})
240+
case "network":
241+
out.WarningT(`"{{.command}}" is taking longer than expected. Network cleanup can be slow.`, out.V{"command": rr.Command()})
242+
case "prune":
243+
out.WarningT(`"{{.command}}" is taking longer than expected. Prune operations can take several minutes.`, out.V{"command": rr.Command()})
244+
default:
245+
out.WarningT(`"{{.command}}" is taking an unusually long time to respond, please be patient.`, out.V{"command": rr.Command()})
246+
}
247+
} else {
248+
out.WarningT(`"{{.command}}" is taking an unusually long time to respond, please be patient.`, out.V{"command": rr.Command()})
249+
}
250+
251+
// Show restart hint using the actual binary name
252+
out.ErrT(style.Tip, `If this continues to hang, consider restarting the {{.name}} service.`, out.V{"name": actualBinaryName})
253+
} else {
254+
warnLock.Unlock()
201255
}
202-
}
256+
})
203257
}
204258

205-
if ctx.Err() == context.DeadlineExceeded {
206-
return rr, context.DeadlineExceeded
259+
// Wait for completion or timeout
260+
select {
261+
case err = <-done:
262+
// Command completed normally
263+
if warnTimer != nil {
264+
warnTimer.Stop()
265+
}
266+
case <-ctx.Done():
267+
// Command timed out
268+
if warnTimer != nil {
269+
warnTimer.Stop()
270+
}
271+
272+
// Kill the process if it's still running
273+
if cmdWithCtx.Process != nil {
274+
klog.Warningf("Killing slow %s process after %v timeout", rr.Command(), killTime)
275+
cmdWithCtx.Process.Kill()
276+
}
277+
278+
out.WarningT(`"{{.command}}" took too long to respond (>{{.duration}}) and was terminated.`, out.V{"command": rr.Command(), "duration": killTime})
279+
out.ErrT(style.Tip, `Consider restarting the {{.name}} service if this problem persists.`, out.V{"name": actualBinaryName})
280+
281+
return rr, fmt.Errorf("command timed out after %v: %s", killTime, rr.Command())
207282
}
283+
} else {
284+
// Run without timeout for non-critical operations
285+
err = cmd.Run()
208286
}
209287

288+
elapsed := time.Since(start)
289+
290+
// Log completion information
210291
if ex, ok := err.(*exec.ExitError); ok {
211292
klog.Warningf("%s returned with exit code %d", rr.Command(), ex.ExitCode())
212293
rr.ExitCode = ex.ExitCode()
@@ -222,3 +303,28 @@ func runCmd(cmd *exec.Cmd, warnSlow ...bool) (*RunResult, error) {
222303

223304
return rr, fmt.Errorf("%s: %v\nstdout:\n%s\nstderr:\n%s", rr.Command(), err, rr.Stdout.String(), rr.Stderr.String())
224305
}
306+
307+
// Helper function to extract the actual OCI binary name from a command
308+
func getActualBinaryName(cmdArgs []string) string {
309+
if len(cmdArgs) == 0 {
310+
return ""
311+
}
312+
313+
// If not using sudo, return the first argument
314+
// Note: This checks for "sudo" specifically because PrefixCmd()
315+
// explicitly adds "sudo" as a string literal when needed
316+
if cmdArgs[0] != "sudo" {
317+
return cmdArgs[0]
318+
}
319+
320+
// Parse sudo command to find the actual binary
321+
for i := 1; i < len(cmdArgs); i++ {
322+
arg := cmdArgs[i]
323+
// Skip sudo flags (those starting with -)
324+
if !strings.HasPrefix(arg, "-") {
325+
return arg
326+
}
327+
}
328+
329+
return cmdArgs[0] // fallback to sudo if we can't parse
330+
}

pkg/drivers/kic/oci/cli_runner_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ func TestRunCmdWarnSlowOnce(t *testing.T) {
4040
t.Errorf("runCmd has error: %v", err)
4141
}
4242

43-
if !strings.Contains(f1.String(), "Executing \"sleep 3\" took an unusually long time") {
43+
if !strings.Contains(f1.String(), "is taking an unusually long time to respond") {
4444
t.Errorf("runCmd does not print the correct log, instead print :%v", f1.String())
4545
}
4646

@@ -54,7 +54,7 @@ func TestRunCmdWarnSlowOnce(t *testing.T) {
5454
t.Errorf("runCmd has error: %v", err)
5555
}
5656

57-
if strings.Contains(f2.String(), "Executing \"sleep 3\" took an unusually long time") {
57+
if strings.Contains(f2.String(), "is taking an unusually long time to respond") {
5858
t.Errorf("runCmd does not print the correct log, instead print :%v", f2.String())
5959
}
6060
}

pkg/drivers/kic/oci/oci.go

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,54 +41,61 @@ import (
4141
)
4242

4343
// DeleteContainersByLabel deletes all containers that have a specific label
44-
// if there no containers found with the given label, it will return nil
44+
// if there no containers found with the given label, it will return nil
4545
func DeleteContainersByLabel(ociBin string, label string) []error {
4646
var deleteErrs []error
4747
ctx := context.Background()
48-
cs, err := ListContainersByLabel(ctx, ociBin, label)
48+
49+
// Enable slow warning for container listing during delete operations
50+
cs, err := ListContainersByLabel(ctx, ociBin, label, true)
4951
if err != nil {
50-
return []error{fmt.Errorf("listing containers by label %q", label)}
52+
return []error{fmt.Errorf("listing containers by label %q: %v", label, err)}
5153
}
5254

5355
if len(cs) == 0 {
5456
return nil
5557
}
5658

5759
for _, c := range cs {
58-
_, err := ContainerStatus(ociBin, c)
60+
// Enable slow warning for container status check
61+
_, err := ContainerStatus(ociBin, c, true)
5962
// only try to delete if docker/podman inspect returns
6063
// if it doesn't it means docker daemon is stuck and needs restart
6164
if err != nil {
6265
deleteErrs = append(deleteErrs, errors.Wrapf(err, "delete container %s: %s daemon is stuck. please try again", c, ociBin))
6366
klog.Errorf("%s daemon seems to be stuck. please try restarting your %s :%v", ociBin, ociBin, err)
6467
continue
6568
}
69+
6670
if err := ShutDown(ociBin, c); err != nil {
6771
klog.Infof("couldn't shut down %s (might be okay): %v ", c, err)
6872
}
6973

70-
if _, err := runCmd(exec.Command(ociBin, "rm", "-f", "-v", c)); err != nil {
74+
// Enable slow warning for container removal
75+
if _, err := runCmd(exec.Command(ociBin, "rm", "-f", "-v", c), true); err != nil {
7176
deleteErrs = append(deleteErrs, errors.Wrapf(err, "delete container %s: output %s", c, err))
7277
}
73-
7478
}
7579
return deleteErrs
7680
}
7781

7882
// DeleteContainer deletes a container by ID or Name
7983
func DeleteContainer(ctx context.Context, ociBin string, name string) error {
80-
_, err := ContainerStatus(ociBin, name)
84+
// Enable slow warning for container status check
85+
_, err := ContainerStatus(ociBin, name, true)
8186
if err == context.DeadlineExceeded {
82-
out.WarningT("{{.ocibin}} is taking an unusually long time to respond, consider restarting {{.ocibin}}", out.V{"ociBin": ociBin})
87+
out.WarningT("{{.ocibin}} is taking an unusually long time to respond, consider restarting {{.ocibin}}", out.V{"ocibin": ociBin})
8388
} else if err != nil {
8489
klog.Warningf("error getting container status, will try to delete anyways: %v", err)
8590
}
91+
8692
// try to delete anyways
8793
if err := ShutDown(ociBin, name); err != nil {
8894
klog.Infof("couldn't shut down %s (might be okay): %v ", name, err)
8995
}
9096

91-
if _, err := runCmd(exec.CommandContext(ctx, ociBin, "rm", "-f", "-v", name)); err != nil {
97+
// Enable slow warning for container removal
98+
if _, err := runCmd(exec.CommandContext(ctx, ociBin, "rm", "-f", "-v", name), true); err != nil {
9299
return errors.Wrapf(err, "delete %s", name)
93100
}
94101
return nil
@@ -368,7 +375,12 @@ func ContainerID(ociBin string, nameOrID string) (string, error) {
368375

369376
// ContainerExists checks if container name exists (either running or exited)
370377
func ContainerExists(ociBin string, name string, warnSlow ...bool) (bool, error) {
371-
rr, err := runCmd(exec.Command(ociBin, "ps", "-a", "--format", "{{.Names}}"), warnSlow...)
378+
shouldWarn := false
379+
if len(warnSlow) > 0 {
380+
shouldWarn = warnSlow[0]
381+
}
382+
383+
rr, err := runCmd(exec.Command(ociBin, "ps", "-a", "--format", "{{.Names}}"), shouldWarn)
372384
if err != nil {
373385
return false, err
374386
}
@@ -540,7 +552,13 @@ func withPortMappings(portMappings []PortMapping) createOpt {
540552

541553
// ListContainersByLabel returns all the container names with a specified label
542554
func ListContainersByLabel(ctx context.Context, ociBin string, label string, warnSlow ...bool) ([]string, error) {
543-
rr, err := runCmd(exec.CommandContext(ctx, ociBin, "ps", "-a", "--filter", fmt.Sprintf("label=%s", label), "--format", "{{.Names}}"), warnSlow...)
555+
// Default to no warnings, but enable if explicitly requested
556+
shouldWarn := false
557+
if len(warnSlow) > 0 {
558+
shouldWarn = warnSlow[0]
559+
}
560+
561+
rr, err := runCmd(exec.CommandContext(ctx, ociBin, "ps", "-a", "--filter", fmt.Sprintf("label=%s", label), "--format", "{{.Names}}"), shouldWarn)
544562
if err != nil {
545563
return nil, err
546564
}
@@ -619,7 +637,12 @@ func PointToHostPodman() error {
619637

620638
// ContainerRunning returns running state of a container
621639
func ContainerRunning(ociBin string, name string, warnSlow ...bool) (bool, error) {
622-
rr, err := runCmd(exec.Command(ociBin, "container", "inspect", name, "--format={{.State.Running}}"), warnSlow...)
640+
shouldWarn := false
641+
if len(warnSlow) > 0 {
642+
shouldWarn = warnSlow[0]
643+
}
644+
645+
rr, err := runCmd(exec.Command(ociBin, "container", "inspect", name, "--format={{.State.Running}}"), shouldWarn)
623646
if err != nil {
624647
return false, err
625648
}
@@ -628,8 +651,13 @@ func ContainerRunning(ociBin string, name string, warnSlow ...bool) (bool, error
628651

629652
// ContainerStatus returns status of a container running,exited,...
630653
func ContainerStatus(ociBin string, name string, warnSlow ...bool) (state.State, error) {
654+
shouldWarn := false
655+
if len(warnSlow) > 0 {
656+
shouldWarn = warnSlow[0]
657+
}
658+
631659
cmd := exec.Command(ociBin, "container", "inspect", name, "--format={{.State.Status}}")
632-
rr, err := runCmd(cmd, warnSlow...)
660+
rr, err := runCmd(cmd, shouldWarn)
633661
o := strings.TrimSpace(rr.Stdout.String())
634662
switch o {
635663
case "configured":

pkg/minikube/delete/delete.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,16 @@ func PossibleLeftOvers(ctx context.Context, cname string, driverName string) {
4747

4848
klog.Infof("deleting possible leftovers for %s (driver=%s) ...", cname, driverName)
4949
delLabel := fmt.Sprintf("%s=%s", oci.ProfileLabelKey, cname)
50-
cs, err := oci.ListContainersByLabel(ctx, bin, delLabel)
50+
51+
// Enable slow warning for container listing during delete operations
52+
cs, err := oci.ListContainersByLabel(ctx, bin, delLabel, true)
5153
if err == nil && len(cs) > 0 {
5254
for _, c := range cs {
5355
out.Step(style.DeletingHost, `Deleting container "{{.name}}" ...`, out.V{"name": cname})
5456
err := oci.DeleteContainer(ctx, bin, c)
5557
if err != nil { // it will error if there is no container to delete
5658
klog.Errorf("error deleting container %q. You may want to delete it manually :\n%v", cname, err)
5759
}
58-
5960
}
6061
}
6162

0 commit comments

Comments
 (0)