diff --git a/pkg/drivers/kic/oci/cli_runner.go b/pkg/drivers/kic/oci/cli_runner.go index 9c39d6168204..ebe126b36466 100644 --- a/pkg/drivers/kic/oci/cli_runner.go +++ b/pkg/drivers/kic/oci/cli_runner.go @@ -145,24 +145,38 @@ func runCmd(cmd *exec.Cmd, warnSlow ...bool) (*RunResult, error) { killTime := 19 * time.Second // this will be applied only if warnSlow is true warnTime := 2 * time.Second - if cmd.Args[1] == "volume" || cmd.Args[1] == "ps" { // volume and ps requires more time than inspect - killTime = 30 * time.Second - warnTime = 3 * time.Second - } - - ctx, cancel := context.WithTimeout(context.Background(), killTime) - defer cancel() - - if warn { // convert exec.Command to with context - cmdWithCtx := exec.CommandContext(ctx, cmd.Args[0], cmd.Args[1:]...) - cmdWithCtx.Stdout = cmd.Stdout // copying the original command - cmdWithCtx.Stderr = cmd.Stderr - cmd = cmdWithCtx + // Increase timeout for operations that typically take longer + if len(cmd.Args) > 1 { + switch cmd.Args[1] { + case "volume", "ps": + // volume and ps requires more time than inspect + killTime = 30 * time.Second + warnTime = 3 * time.Second + case "rm", "remove": + // Container/volume removal can take longer, especially if containers are stuck + killTime = 60 * time.Second // CHANGE: Increase from 45 to 60 seconds + warnTime = 5 * time.Second + case "stop", "kill": + // Stop/kill operations can take time if containers are unresponsive + killTime = 30 * time.Second + warnTime = 3 * time.Second + case "network": + // Network operations can be slow during cleanup + killTime = 30 * time.Second + warnTime = 3 * time.Second + case "prune": + // Prune operations can take a very long time + killTime = 120 * time.Second + warnTime = 10 * time.Second + } } rr := &RunResult{Args: cmd.Args} klog.Infof("Run: %v", rr.Command()) + // Determine the actual OCI binary name for user-facing messages + actualBinaryName := getActualBinaryName(cmd.Args) + var outb, errb io.Writer if cmd.Stdout == nil { var so bytes.Buffer @@ -182,31 +196,98 @@ func runCmd(cmd *exec.Cmd, warnSlow ...bool) (*RunResult, error) { cmd.Stderr = errb start := time.Now() - err := cmd.Run() - elapsed := time.Since(start) - if warn && !out.JSON && !suppressDockerMessage() { - if elapsed > warnTime { - warnLock.Lock() - _, ok := alreadyWarnedCmds[rr.Command()] - if !ok { - alreadyWarnedCmds[rr.Command()] = true - } - warnLock.Unlock() + var err error + + if warn { + // Use context with timeout for proper cancellation + ctx, cancel := context.WithTimeout(context.Background(), killTime) + defer cancel() - if !ok { - out.WarningT(`Executing "{{.command}}" took an unusually long time: {{.duration}}`, out.V{"command": rr.Command(), "duration": elapsed}) - // Don't show any restarting hint, when running podman locally (on linux, with sudo). Only when having a service. - if cmd.Args[0] != "sudo" { - out.ErrT(style.Tip, `Restarting the {{.name}} service may improve performance.`, out.V{"name": cmd.Args[0]}) + // Create command with context + cmdWithCtx := exec.CommandContext(ctx, cmd.Args[0], cmd.Args[1:]...) + cmdWithCtx.Stdout = cmd.Stdout + cmdWithCtx.Stderr = cmd.Stderr + cmdWithCtx.Env = cmd.Env + cmdWithCtx.Dir = cmd.Dir + + // Channel to track completion + done := make(chan error, 1) + var warnTimer *time.Timer + + // Start the command in a goroutine + go func() { + done <- cmdWithCtx.Run() + }() + + // Set up warning timer + if !out.JSON && !suppressDockerMessage() { + warnTimer = time.AfterFunc(warnTime, func() { + warnLock.Lock() + _, ok := alreadyWarnedCmds[rr.Command()] + if !ok { + alreadyWarnedCmds[rr.Command()] = true + warnLock.Unlock() + + // Show operation-specific warning messages + if len(cmd.Args) > 1 { + switch cmd.Args[1] { + case "ps": + out.WarningT(`"{{.command}}" is taking longer than expected. This may indicate {{.name}} is hanging.`, out.V{"command": rr.Command(), "name": actualBinaryName}) + case "rm", "remove": + out.WarningT(`"{{.command}}" is taking longer than expected. Container may be stuck - please be patient.`, out.V{"command": rr.Command()}) + case "volume": + out.WarningT(`"{{.command}}" is taking longer than expected. Volume operations can be slow.`, out.V{"command": rr.Command()}) + case "network": + out.WarningT(`"{{.command}}" is taking longer than expected. Network cleanup can be slow.`, out.V{"command": rr.Command()}) + case "prune": + out.WarningT(`"{{.command}}" is taking longer than expected. Prune operations can take several minutes.`, out.V{"command": rr.Command()}) + default: + out.WarningT(`"{{.command}}" is taking an unusually long time to respond, please be patient.`, out.V{"command": rr.Command()}) + } + } else { + out.WarningT(`"{{.command}}" is taking an unusually long time to respond, please be patient.`, out.V{"command": rr.Command()}) + } + + // Show restart hint using the actual binary name + out.ErrT(style.Tip, `If this continues to hang, consider restarting the {{.name}} service.`, out.V{"name": actualBinaryName}) + } else { + warnLock.Unlock() } - } + }) } - if ctx.Err() == context.DeadlineExceeded { - return rr, context.DeadlineExceeded + // Wait for completion or timeout + select { + case err = <-done: + // Command completed normally + if warnTimer != nil { + warnTimer.Stop() + } + case <-ctx.Done(): + // Command timed out + if warnTimer != nil { + warnTimer.Stop() + } + + // Kill the process if it's still running + if cmdWithCtx.Process != nil { + klog.Warningf("Killing slow %s process after %v timeout", rr.Command(), killTime) + cmdWithCtx.Process.Kill() + } + + out.WarningT(`"{{.command}}" took too long to respond (>{{.duration}}) and was terminated.`, out.V{"command": rr.Command(), "duration": killTime}) + out.ErrT(style.Tip, `Consider restarting the {{.name}} service if this problem persists.`, out.V{"name": actualBinaryName}) + + return rr, fmt.Errorf("command timed out after %v: %s", killTime, rr.Command()) } + } else { + // Run without timeout for non-critical operations + err = cmd.Run() } + elapsed := time.Since(start) + + // Log completion information if ex, ok := err.(*exec.ExitError); ok { klog.Warningf("%s returned with exit code %d", rr.Command(), ex.ExitCode()) rr.ExitCode = ex.ExitCode() @@ -222,3 +303,28 @@ func runCmd(cmd *exec.Cmd, warnSlow ...bool) (*RunResult, error) { return rr, fmt.Errorf("%s: %v\nstdout:\n%s\nstderr:\n%s", rr.Command(), err, rr.Stdout.String(), rr.Stderr.String()) } + +// Helper function to extract the actual OCI binary name from a command +func getActualBinaryName(cmdArgs []string) string { + if len(cmdArgs) == 0 { + return "" + } + + // If not using sudo, return the first argument + // Note: This checks for "sudo" specifically because PrefixCmd() + // explicitly adds "sudo" as a string literal when needed + if cmdArgs[0] != "sudo" { + return cmdArgs[0] + } + + // Parse sudo command to find the actual binary + for i := 1; i < len(cmdArgs); i++ { + arg := cmdArgs[i] + // Skip sudo flags (those starting with -) + if !strings.HasPrefix(arg, "-") { + return arg + } + } + + return cmdArgs[0] // fallback to sudo if we can't parse +} diff --git a/pkg/drivers/kic/oci/cli_runner_test.go b/pkg/drivers/kic/oci/cli_runner_test.go index 0ad18894ed1e..915a6c1fc44f 100644 --- a/pkg/drivers/kic/oci/cli_runner_test.go +++ b/pkg/drivers/kic/oci/cli_runner_test.go @@ -40,7 +40,7 @@ func TestRunCmdWarnSlowOnce(t *testing.T) { t.Errorf("runCmd has error: %v", err) } - if !strings.Contains(f1.String(), "Executing \"sleep 3\" took an unusually long time") { + if !strings.Contains(f1.String(), "is taking an unusually long time to respond") { t.Errorf("runCmd does not print the correct log, instead print :%v", f1.String()) } @@ -54,7 +54,7 @@ func TestRunCmdWarnSlowOnce(t *testing.T) { t.Errorf("runCmd has error: %v", err) } - if strings.Contains(f2.String(), "Executing \"sleep 3\" took an unusually long time") { + if strings.Contains(f2.String(), "is taking an unusually long time to respond") { t.Errorf("runCmd does not print the correct log, instead print :%v", f2.String()) } } diff --git a/pkg/drivers/kic/oci/oci.go b/pkg/drivers/kic/oci/oci.go index b02fb22397a0..64a4583621eb 100644 --- a/pkg/drivers/kic/oci/oci.go +++ b/pkg/drivers/kic/oci/oci.go @@ -41,13 +41,15 @@ import ( ) // DeleteContainersByLabel deletes all containers that have a specific label -// if there no containers found with the given label, it will return nil +// if there no containers found with the given label, it will return nil func DeleteContainersByLabel(ociBin string, label string) []error { var deleteErrs []error ctx := context.Background() - cs, err := ListContainersByLabel(ctx, ociBin, label) + + // Enable slow warning for container listing during delete operations + cs, err := ListContainersByLabel(ctx, ociBin, label, true) if err != nil { - return []error{fmt.Errorf("listing containers by label %q", label)} + return []error{fmt.Errorf("listing containers by label %q: %v", label, err)} } if len(cs) == 0 { @@ -55,7 +57,8 @@ func DeleteContainersByLabel(ociBin string, label string) []error { } for _, c := range cs { - _, err := ContainerStatus(ociBin, c) + // Enable slow warning for container status check + _, err := ContainerStatus(ociBin, c, true) // only try to delete if docker/podman inspect returns // if it doesn't it means docker daemon is stuck and needs restart if err != nil { @@ -63,32 +66,36 @@ func DeleteContainersByLabel(ociBin string, label string) []error { klog.Errorf("%s daemon seems to be stuck. please try restarting your %s :%v", ociBin, ociBin, err) continue } + if err := ShutDown(ociBin, c); err != nil { klog.Infof("couldn't shut down %s (might be okay): %v ", c, err) } - if _, err := runCmd(exec.Command(ociBin, "rm", "-f", "-v", c)); err != nil { + // Enable slow warning for container removal + if _, err := runCmd(exec.Command(ociBin, "rm", "-f", "-v", c), true); err != nil { deleteErrs = append(deleteErrs, errors.Wrapf(err, "delete container %s: output %s", c, err)) } - } return deleteErrs } // DeleteContainer deletes a container by ID or Name func DeleteContainer(ctx context.Context, ociBin string, name string) error { - _, err := ContainerStatus(ociBin, name) + // Enable slow warning for container status check + _, err := ContainerStatus(ociBin, name, true) if err == context.DeadlineExceeded { - out.WarningT("{{.ocibin}} is taking an unusually long time to respond, consider restarting {{.ocibin}}", out.V{"ociBin": ociBin}) + out.WarningT("{{.ocibin}} is taking an unusually long time to respond, consider restarting {{.ocibin}}", out.V{"ocibin": ociBin}) } else if err != nil { klog.Warningf("error getting container status, will try to delete anyways: %v", err) } + // try to delete anyways if err := ShutDown(ociBin, name); err != nil { klog.Infof("couldn't shut down %s (might be okay): %v ", name, err) } - if _, err := runCmd(exec.CommandContext(ctx, ociBin, "rm", "-f", "-v", name)); err != nil { + // Enable slow warning for container removal + if _, err := runCmd(exec.CommandContext(ctx, ociBin, "rm", "-f", "-v", name), true); err != nil { return errors.Wrapf(err, "delete %s", name) } return nil @@ -368,7 +375,12 @@ func ContainerID(ociBin string, nameOrID string) (string, error) { // ContainerExists checks if container name exists (either running or exited) func ContainerExists(ociBin string, name string, warnSlow ...bool) (bool, error) { - rr, err := runCmd(exec.Command(ociBin, "ps", "-a", "--format", "{{.Names}}"), warnSlow...) + shouldWarn := false + if len(warnSlow) > 0 { + shouldWarn = warnSlow[0] + } + + rr, err := runCmd(exec.Command(ociBin, "ps", "-a", "--format", "{{.Names}}"), shouldWarn) if err != nil { return false, err } @@ -540,7 +552,13 @@ func withPortMappings(portMappings []PortMapping) createOpt { // ListContainersByLabel returns all the container names with a specified label func ListContainersByLabel(ctx context.Context, ociBin string, label string, warnSlow ...bool) ([]string, error) { - rr, err := runCmd(exec.CommandContext(ctx, ociBin, "ps", "-a", "--filter", fmt.Sprintf("label=%s", label), "--format", "{{.Names}}"), warnSlow...) + // Default to no warnings, but enable if explicitly requested + shouldWarn := false + if len(warnSlow) > 0 { + shouldWarn = warnSlow[0] + } + + rr, err := runCmd(exec.CommandContext(ctx, ociBin, "ps", "-a", "--filter", fmt.Sprintf("label=%s", label), "--format", "{{.Names}}"), shouldWarn) if err != nil { return nil, err } @@ -619,7 +637,12 @@ func PointToHostPodman() error { // ContainerRunning returns running state of a container func ContainerRunning(ociBin string, name string, warnSlow ...bool) (bool, error) { - rr, err := runCmd(exec.Command(ociBin, "container", "inspect", name, "--format={{.State.Running}}"), warnSlow...) + shouldWarn := false + if len(warnSlow) > 0 { + shouldWarn = warnSlow[0] + } + + rr, err := runCmd(exec.Command(ociBin, "container", "inspect", name, "--format={{.State.Running}}"), shouldWarn) if err != nil { return false, err } @@ -628,8 +651,13 @@ func ContainerRunning(ociBin string, name string, warnSlow ...bool) (bool, error // ContainerStatus returns status of a container running,exited,... func ContainerStatus(ociBin string, name string, warnSlow ...bool) (state.State, error) { + shouldWarn := false + if len(warnSlow) > 0 { + shouldWarn = warnSlow[0] + } + cmd := exec.Command(ociBin, "container", "inspect", name, "--format={{.State.Status}}") - rr, err := runCmd(cmd, warnSlow...) + rr, err := runCmd(cmd, shouldWarn) o := strings.TrimSpace(rr.Stdout.String()) switch o { case "configured": diff --git a/pkg/minikube/delete/delete.go b/pkg/minikube/delete/delete.go index 95dc688f8e53..d4255ea03d91 100644 --- a/pkg/minikube/delete/delete.go +++ b/pkg/minikube/delete/delete.go @@ -47,7 +47,9 @@ func PossibleLeftOvers(ctx context.Context, cname string, driverName string) { klog.Infof("deleting possible leftovers for %s (driver=%s) ...", cname, driverName) delLabel := fmt.Sprintf("%s=%s", oci.ProfileLabelKey, cname) - cs, err := oci.ListContainersByLabel(ctx, bin, delLabel) + + // Enable slow warning for container listing during delete operations + cs, err := oci.ListContainersByLabel(ctx, bin, delLabel, true) if err == nil && len(cs) > 0 { for _, c := range cs { out.Step(style.DeletingHost, `Deleting container "{{.name}}" ...`, out.V{"name": cname}) @@ -55,7 +57,6 @@ func PossibleLeftOvers(ctx context.Context, cname string, driverName string) { if err != nil { // it will error if there is no container to delete klog.Errorf("error deleting container %q. You may want to delete it manually :\n%v", cname, err) } - } }