Skip to content

Commit c7a4f0b

Browse files
committed
fix: retry logs streaming after 3.5 hours to workaround 4h kubelet limit
Signed-off-by: Ilya Lesikov <ilya@lesikov.com>
1 parent befdedb commit c7a4f0b

File tree

1 file changed

+27
-11
lines changed

1 file changed

+27
-11
lines changed

pkg/tracker/pod/tracker.go

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package pod
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"io"
78
"os"
@@ -25,6 +26,8 @@ import (
2526
"github.com/werf/kubedog/pkg/trackers/dyntracker/util"
2627
)
2728

29+
var errLogStreamingTimeout = errors.New("log streaming timeout reached")
30+
2831
type ContainerError struct {
2932
Message string
3033
ContainerName string
@@ -425,21 +428,17 @@ func (pod *Tracker) handleContainersState(object *corev1.Pod) error {
425428
return nil
426429
}
427430

428-
func (pod *Tracker) followContainerLogs(ctx context.Context, containerName string) error {
429-
if pod.ignoreLogs {
430-
return nil
431-
}
431+
func (pod *Tracker) followContainerLogs(ctx context.Context, containerName string, sinceTime *metav1.Time) error {
432+
// See: https://github.com/kubernetes/kubernetes/issues/104580#issuecomment-905744137
433+
ctx, _ = context.WithTimeoutCause(ctx, 3*time.Hour, errLogStreamingTimeout)
432434

433435
logOpts := &corev1.PodLogOptions{
434436
Container: containerName,
435437
Timestamps: true,
436438
Follow: true,
439+
SinceTime: sinceTime,
437440
}
438-
if !pod.LogsFromTime.IsZero() {
439-
logOpts.SinceTime = &metav1.Time{
440-
Time: pod.LogsFromTime,
441-
}
442-
}
441+
443442
req := pod.Kube.CoreV1().
444443
Pods(pod.Namespace).
445444
GetLogs(pod.ResourceName, logOpts)
@@ -496,6 +495,12 @@ func (pod *Tracker) followContainerLogs(ctx context.Context, containerName strin
496495
fmt.Printf("Follow container logs for pod %q context canceled: %s\n", pod.ResourceName, context.Cause(ctx))
497496
}
498497

498+
if errors.Is(ctx.Err(), errLogStreamingTimeout) {
499+
return pod.followContainerLogs(ctx, containerName, &metav1.Time{
500+
Time: time.Now(),
501+
})
502+
}
503+
499504
return nil
500505
default:
501506
}
@@ -510,12 +515,23 @@ func (pod *Tracker) trackContainer(ctx context.Context, containerName string, co
510515
case state := <-containerTrackerStateChanges:
511516
switch state {
512517
case tracker.FollowingContainerLogs:
513-
err := pod.followContainerLogs(ctx, containerName)
514-
if err != nil {
518+
if pod.ignoreLogs {
519+
return nil
520+
}
521+
522+
var sinceTime *metav1.Time
523+
if !pod.LogsFromTime.IsZero() {
524+
sinceTime = &metav1.Time{
525+
Time: pod.LogsFromTime,
526+
}
527+
}
528+
529+
if err := pod.followContainerLogs(ctx, containerName, sinceTime); err != nil {
515530
if debug.Debug() {
516531
fmt.Fprintf(os.Stderr, "pod/%s container/%s logs streaming error: %s\n", pod.ResourceName, containerName, err)
517532
}
518533
}
534+
519535
return nil
520536

521537
case tracker.ContainerTrackerDone:

0 commit comments

Comments
 (0)