Skip to content

Commit 18e9f33

Browse files
committed
Remove unhealthy symlink only for dead containers
Signed-off-by: Ted Yu <[email protected]>
1 parent 84dc704 commit 18e9f33

File tree

3 files changed

+47
-1
lines changed

3 files changed

+47
-1
lines changed

pkg/kubelet/kuberuntime/kuberuntime_gc.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,9 +356,35 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
356356
logSymlinks, _ := osInterface.Glob(filepath.Join(legacyContainerLogsDir, fmt.Sprintf("*.%s", legacyLogSuffix)))
357357
for _, logSymlink := range logSymlinks {
358358
if _, err := osInterface.Stat(logSymlink); os.IsNotExist(err) {
359+
if containerID, err := getContainerIDFromLegacyLogSymlink(logSymlink); err == nil {
360+
status, err := cgc.manager.runtimeService.ContainerStatus(containerID)
361+
if err != nil {
362+
// TODO: we should handle container not found (i.e. container was deleted) case differently
363+
// once https://github.com/kubernetes/kubernetes/issues/63336 is resolved
364+
klog.Infof("Error getting ContainerStatus for containerID %q: %v", containerID, err)
365+
} else if status.State != runtimeapi.ContainerState_CONTAINER_EXITED {
366+
// Here is how container log rotation works (see containerLogManager#rotateLatestLog):
367+
//
368+
// 1. rename current log to rotated log file whose filename contains current timestamp (fmt.Sprintf("%s.%s", log, timestamp))
369+
// 2. reopen the container log
370+
// 3. if #2 fails, rename rotated log file back to container log
371+
//
372+
// There is small but indeterministic amount of time during which log file doesn't exist (between steps #1 and #2, between #1 and #3).
373+
// Hence the symlink may be deemed unhealthy during that period.
374+
// See https://github.com/kubernetes/kubernetes/issues/52172
375+
//
376+
// We only remove unhealthy symlink for dead containers
377+
klog.V(5).Infof("Container %q is still running, not removing symlink %q.", containerID, logSymlink)
378+
continue
379+
}
380+
} else {
381+
klog.V(4).Infof("unable to obtain container Id: %v", err)
382+
}
359383
err := osInterface.Remove(logSymlink)
360384
if err != nil {
361385
klog.Errorf("Failed to remove container log dead symlink %q: %v", logSymlink, err)
386+
} else {
387+
klog.V(4).Infof("removed symlink %s", logSymlink)
362388
}
363389
}
364390
}

pkg/kubelet/kuberuntime/legacy.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package kuberuntime
1919
import (
2020
"fmt"
2121
"path"
22+
"strings"
2223

2324
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
2425
)
@@ -44,6 +45,25 @@ func legacyLogSymlink(containerID string, containerName, podName, podNamespace s
4445
containerName, containerID)
4546
}
4647

48+
// getContainerIDFromLegacyLogSymlink returns error if container Id cannot be parsed
49+
func getContainerIDFromLegacyLogSymlink(logSymlink string) (string, error) {
50+
parts := strings.Split(logSymlink, "-")
51+
if len(parts) == 0 {
52+
return "", fmt.Errorf("unable to find separator in %q", logSymlink)
53+
}
54+
containerIDWithSuffix := parts[len(parts)-1]
55+
suffix := fmt.Sprintf(".%s", legacyLogSuffix)
56+
if !strings.HasSuffix(containerIDWithSuffix, suffix) {
57+
return "", fmt.Errorf("%q doesn't end with %q", logSymlink, suffix)
58+
}
59+
containerIDWithoutSuffix := strings.TrimSuffix(containerIDWithSuffix, suffix)
60+
// container can be retrieved with container Id as short as 6 characters
61+
if len(containerIDWithoutSuffix) < 6 {
62+
return "", fmt.Errorf("container Id %q is too short", containerIDWithoutSuffix)
63+
}
64+
return containerIDWithoutSuffix, nil
65+
}
66+
4767
func logSymlink(containerLogsDir, podFullName, containerName, containerID string) string {
4868
suffix := fmt.Sprintf(".%s", legacyLogSuffix)
4969
logPath := fmt.Sprintf("%s_%s-%s", podFullName, containerName, containerID)

pkg/kubelet/remote/remote_runtime.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,7 @@ func (r *RemoteRuntimeService) ContainerStats(containerID string) (*runtimeapi.C
473473
})
474474
if err != nil {
475475
if r.logReduction.ShouldMessageBePrinted(err.Error(), containerID) {
476-
klog.Errorf("ContainerStatus %q from runtime service failed: %v", containerID, err)
476+
klog.Errorf("ContainerStats %q from runtime service failed: %v", containerID, err)
477477
}
478478
return nil, err
479479
}

0 commit comments

Comments
 (0)