Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 137 additions & 31 deletions pkg/drivers/kic/oci/cli_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,24 +145,38 @@ func runCmd(cmd *exec.Cmd, warnSlow ...bool) (*RunResult, error) {
killTime := 19 * time.Second // this will be applied only if warnSlow is true
warnTime := 2 * time.Second

if cmd.Args[1] == "volume" || cmd.Args[1] == "ps" { // volume and ps requires more time than inspect
killTime = 30 * time.Second
warnTime = 3 * time.Second
}

ctx, cancel := context.WithTimeout(context.Background(), killTime)
defer cancel()

if warn { // convert exec.Command to with context
cmdWithCtx := exec.CommandContext(ctx, cmd.Args[0], cmd.Args[1:]...)
cmdWithCtx.Stdout = cmd.Stdout // copying the original command
cmdWithCtx.Stderr = cmd.Stderr
cmd = cmdWithCtx
// Increase timeout for operations that typically take longer
if len(cmd.Args) > 1 {
switch cmd.Args[1] {
case "volume", "ps":
// volume and ps requires more time than inspect
killTime = 30 * time.Second
warnTime = 3 * time.Second
case "rm", "remove":
// Container/volume removal can take longer, especially if containers are stuck
killTime = 60 * time.Second // CHANGE: Increase from 45 to 60 seconds
warnTime = 5 * time.Second
case "stop", "kill":
// Stop/kill operations can take time if containers are unresponsive
killTime = 30 * time.Second
warnTime = 3 * time.Second
case "network":
// Network operations can be slow during cleanup
killTime = 30 * time.Second
warnTime = 3 * time.Second
case "prune":
// Prune operations can take a very long time
killTime = 120 * time.Second
warnTime = 10 * time.Second
}
}

rr := &RunResult{Args: cmd.Args}
klog.Infof("Run: %v", rr.Command())

// Determine the actual OCI binary name for user-facing messages
actualBinaryName := getActualBinaryName(cmd.Args)

var outb, errb io.Writer
if cmd.Stdout == nil {
var so bytes.Buffer
Expand All @@ -182,31 +196,98 @@ func runCmd(cmd *exec.Cmd, warnSlow ...bool) (*RunResult, error) {
cmd.Stderr = errb

start := time.Now()
err := cmd.Run()
elapsed := time.Since(start)
if warn && !out.JSON && !suppressDockerMessage() {
if elapsed > warnTime {
warnLock.Lock()
_, ok := alreadyWarnedCmds[rr.Command()]
if !ok {
alreadyWarnedCmds[rr.Command()] = true
}
warnLock.Unlock()
var err error

if warn {
// Use context with timeout for proper cancellation
ctx, cancel := context.WithTimeout(context.Background(), killTime)
defer cancel()

if !ok {
out.WarningT(`Executing "{{.command}}" took an unusually long time: {{.duration}}`, out.V{"command": rr.Command(), "duration": elapsed})
// Don't show any restarting hint, when running podman locally (on linux, with sudo). Only when having a service.
if cmd.Args[0] != "sudo" {
out.ErrT(style.Tip, `Restarting the {{.name}} service may improve performance.`, out.V{"name": cmd.Args[0]})
// Create command with context
cmdWithCtx := exec.CommandContext(ctx, cmd.Args[0], cmd.Args[1:]...)
cmdWithCtx.Stdout = cmd.Stdout
cmdWithCtx.Stderr = cmd.Stderr
cmdWithCtx.Env = cmd.Env
cmdWithCtx.Dir = cmd.Dir

// Channel to track completion
done := make(chan error, 1)
var warnTimer *time.Timer

// Start the command in a goroutine
go func() {
done <- cmdWithCtx.Run()
}()

// Set up warning timer
if !out.JSON && !suppressDockerMessage() {
warnTimer = time.AfterFunc(warnTime, func() {
warnLock.Lock()
_, ok := alreadyWarnedCmds[rr.Command()]
if !ok {
alreadyWarnedCmds[rr.Command()] = true
warnLock.Unlock()

// Show operation-specific warning messages
if len(cmd.Args) > 1 {
switch cmd.Args[1] {
case "ps":
out.WarningT(`"{{.command}}" is taking longer than expected. This may indicate {{.name}} is hanging.`, out.V{"command": rr.Command(), "name": actualBinaryName})
case "rm", "remove":
out.WarningT(`"{{.command}}" is taking longer than expected. Container may be stuck - please be patient.`, out.V{"command": rr.Command()})
case "volume":
out.WarningT(`"{{.command}}" is taking longer than expected. Volume operations can be slow.`, out.V{"command": rr.Command()})
case "network":
out.WarningT(`"{{.command}}" is taking longer than expected. Network cleanup can be slow.`, out.V{"command": rr.Command()})
case "prune":
out.WarningT(`"{{.command}}" is taking longer than expected. Prune operations can take several minutes.`, out.V{"command": rr.Command()})
default:
out.WarningT(`"{{.command}}" is taking an unusually long time to respond, please be patient.`, out.V{"command": rr.Command()})
}
} else {
out.WarningT(`"{{.command}}" is taking an unusually long time to respond, please be patient.`, out.V{"command": rr.Command()})
}

// Show restart hint using the actual binary name
out.ErrT(style.Tip, `If this continues to hang, consider restarting the {{.name}} service.`, out.V{"name": actualBinaryName})
} else {
warnLock.Unlock()
}
}
})
}

if ctx.Err() == context.DeadlineExceeded {
return rr, context.DeadlineExceeded
// Wait for completion or timeout
select {
case err = <-done:
// Command completed normally
if warnTimer != nil {
warnTimer.Stop()
}
case <-ctx.Done():
// Command timed out
if warnTimer != nil {
warnTimer.Stop()
}

// Kill the process if it's still running
if cmdWithCtx.Process != nil {
klog.Warningf("Killing slow %s process after %v timeout", rr.Command(), killTime)
cmdWithCtx.Process.Kill()
}

out.WarningT(`"{{.command}}" took too long to respond (>{{.duration}}) and was terminated.`, out.V{"command": rr.Command(), "duration": killTime})
out.ErrT(style.Tip, `Consider restarting the {{.name}} service if this problem persists.`, out.V{"name": actualBinaryName})

return rr, fmt.Errorf("command timed out after %v: %s", killTime, rr.Command())
}
} else {
// Run without timeout for non-critical operations
err = cmd.Run()
}

elapsed := time.Since(start)

// Log completion information
if ex, ok := err.(*exec.ExitError); ok {
klog.Warningf("%s returned with exit code %d", rr.Command(), ex.ExitCode())
rr.ExitCode = ex.ExitCode()
Expand All @@ -222,3 +303,28 @@ func runCmd(cmd *exec.Cmd, warnSlow ...bool) (*RunResult, error) {

return rr, fmt.Errorf("%s: %v\nstdout:\n%s\nstderr:\n%s", rr.Command(), err, rr.Stdout.String(), rr.Stderr.String())
}

// Helper function to extract the actual OCI binary name from a command
func getActualBinaryName(cmdArgs []string) string {
if len(cmdArgs) == 0 {
return ""
}

// If not using sudo, return the first argument
// Note: This checks for "sudo" specifically because PrefixCmd()
// explicitly adds "sudo" as a string literal when needed
if cmdArgs[0] != "sudo" {
return cmdArgs[0]
}

// Parse sudo command to find the actual binary
for i := 1; i < len(cmdArgs); i++ {
arg := cmdArgs[i]
// Skip sudo flags (those starting with -)
if !strings.HasPrefix(arg, "-") {
return arg
}
}

return cmdArgs[0] // fallback to sudo if we can't parse
}
4 changes: 2 additions & 2 deletions pkg/drivers/kic/oci/cli_runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func TestRunCmdWarnSlowOnce(t *testing.T) {
t.Errorf("runCmd has error: %v", err)
}

if !strings.Contains(f1.String(), "Executing \"sleep 3\" took an unusually long time") {
if !strings.Contains(f1.String(), "is taking an unusually long time to respond") {
t.Errorf("runCmd does not print the correct log, instead print :%v", f1.String())
}

Expand All @@ -54,7 +54,7 @@ func TestRunCmdWarnSlowOnce(t *testing.T) {
t.Errorf("runCmd has error: %v", err)
}

if strings.Contains(f2.String(), "Executing \"sleep 3\" took an unusually long time") {
if strings.Contains(f2.String(), "is taking an unusually long time to respond") {
t.Errorf("runCmd does not print the correct log, instead print :%v", f2.String())
}
}
54 changes: 41 additions & 13 deletions pkg/drivers/kic/oci/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,54 +41,61 @@ import (
)

// DeleteContainersByLabel deletes all containers that have a specific label
// if there no containers found with the given label, it will return nil
// if there no containers found with the given label, it will return nil
func DeleteContainersByLabel(ociBin string, label string) []error {
var deleteErrs []error
ctx := context.Background()
cs, err := ListContainersByLabel(ctx, ociBin, label)

// Enable slow warning for container listing during delete operations
cs, err := ListContainersByLabel(ctx, ociBin, label, true)
if err != nil {
return []error{fmt.Errorf("listing containers by label %q", label)}
return []error{fmt.Errorf("listing containers by label %q: %v", label, err)}
}

if len(cs) == 0 {
return nil
}

for _, c := range cs {
_, err := ContainerStatus(ociBin, c)
// Enable slow warning for container status check
_, err := ContainerStatus(ociBin, c, true)
// only try to delete if docker/podman inspect returns
// if it doesn't it means docker daemon is stuck and needs restart
if err != nil {
deleteErrs = append(deleteErrs, errors.Wrapf(err, "delete container %s: %s daemon is stuck. please try again", c, ociBin))
klog.Errorf("%s daemon seems to be stuck. please try restarting your %s :%v", ociBin, ociBin, err)
continue
}

if err := ShutDown(ociBin, c); err != nil {
klog.Infof("couldn't shut down %s (might be okay): %v ", c, err)
}

if _, err := runCmd(exec.Command(ociBin, "rm", "-f", "-v", c)); err != nil {
// Enable slow warning for container removal
if _, err := runCmd(exec.Command(ociBin, "rm", "-f", "-v", c), true); err != nil {
deleteErrs = append(deleteErrs, errors.Wrapf(err, "delete container %s: output %s", c, err))
}

}
return deleteErrs
}

// DeleteContainer deletes a container by ID or Name
func DeleteContainer(ctx context.Context, ociBin string, name string) error {
_, err := ContainerStatus(ociBin, name)
// Enable slow warning for container status check
_, err := ContainerStatus(ociBin, name, true)
if err == context.DeadlineExceeded {
out.WarningT("{{.ocibin}} is taking an unusually long time to respond, consider restarting {{.ocibin}}", out.V{"ociBin": ociBin})
out.WarningT("{{.ocibin}} is taking an unusually long time to respond, consider restarting {{.ocibin}}", out.V{"ocibin": ociBin})
} else if err != nil {
klog.Warningf("error getting container status, will try to delete anyways: %v", err)
}

// try to delete anyways
if err := ShutDown(ociBin, name); err != nil {
klog.Infof("couldn't shut down %s (might be okay): %v ", name, err)
}

if _, err := runCmd(exec.CommandContext(ctx, ociBin, "rm", "-f", "-v", name)); err != nil {
// Enable slow warning for container removal
if _, err := runCmd(exec.CommandContext(ctx, ociBin, "rm", "-f", "-v", name), true); err != nil {
return errors.Wrapf(err, "delete %s", name)
}
return nil
Expand Down Expand Up @@ -368,7 +375,12 @@ func ContainerID(ociBin string, nameOrID string) (string, error) {

// ContainerExists checks if container name exists (either running or exited)
func ContainerExists(ociBin string, name string, warnSlow ...bool) (bool, error) {
rr, err := runCmd(exec.Command(ociBin, "ps", "-a", "--format", "{{.Names}}"), warnSlow...)
shouldWarn := false
if len(warnSlow) > 0 {
shouldWarn = warnSlow[0]
}

rr, err := runCmd(exec.Command(ociBin, "ps", "-a", "--format", "{{.Names}}"), shouldWarn)
if err != nil {
return false, err
}
Expand Down Expand Up @@ -540,7 +552,13 @@ func withPortMappings(portMappings []PortMapping) createOpt {

// ListContainersByLabel returns all the container names with a specified label
func ListContainersByLabel(ctx context.Context, ociBin string, label string, warnSlow ...bool) ([]string, error) {
rr, err := runCmd(exec.CommandContext(ctx, ociBin, "ps", "-a", "--filter", fmt.Sprintf("label=%s", label), "--format", "{{.Names}}"), warnSlow...)
// Default to no warnings, but enable if explicitly requested
shouldWarn := false
if len(warnSlow) > 0 {
shouldWarn = warnSlow[0]
}

rr, err := runCmd(exec.CommandContext(ctx, ociBin, "ps", "-a", "--filter", fmt.Sprintf("label=%s", label), "--format", "{{.Names}}"), shouldWarn)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -619,7 +637,12 @@ func PointToHostPodman() error {

// ContainerRunning returns running state of a container
func ContainerRunning(ociBin string, name string, warnSlow ...bool) (bool, error) {
rr, err := runCmd(exec.Command(ociBin, "container", "inspect", name, "--format={{.State.Running}}"), warnSlow...)
shouldWarn := false
if len(warnSlow) > 0 {
shouldWarn = warnSlow[0]
}

rr, err := runCmd(exec.Command(ociBin, "container", "inspect", name, "--format={{.State.Running}}"), shouldWarn)
if err != nil {
return false, err
}
Expand All @@ -628,8 +651,13 @@ func ContainerRunning(ociBin string, name string, warnSlow ...bool) (bool, error

// ContainerStatus returns status of a container running,exited,...
func ContainerStatus(ociBin string, name string, warnSlow ...bool) (state.State, error) {
shouldWarn := false
if len(warnSlow) > 0 {
shouldWarn = warnSlow[0]
}

cmd := exec.Command(ociBin, "container", "inspect", name, "--format={{.State.Status}}")
rr, err := runCmd(cmd, warnSlow...)
rr, err := runCmd(cmd, shouldWarn)
o := strings.TrimSpace(rr.Stdout.String())
switch o {
case "configured":
Expand Down
5 changes: 3 additions & 2 deletions pkg/minikube/delete/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,16 @@ func PossibleLeftOvers(ctx context.Context, cname string, driverName string) {

klog.Infof("deleting possible leftovers for %s (driver=%s) ...", cname, driverName)
delLabel := fmt.Sprintf("%s=%s", oci.ProfileLabelKey, cname)
cs, err := oci.ListContainersByLabel(ctx, bin, delLabel)

// Enable slow warning for container listing during delete operations
cs, err := oci.ListContainersByLabel(ctx, bin, delLabel, true)
if err == nil && len(cs) > 0 {
for _, c := range cs {
out.Step(style.DeletingHost, `Deleting container "{{.name}}" ...`, out.V{"name": cname})
err := oci.DeleteContainer(ctx, bin, c)
if err != nil { // it will error if there is no container to delete
klog.Errorf("error deleting container %q. You may want to delete it manually :\n%v", cname, err)
}

}
}

Expand Down