Skip to content

Commit 7160ef4

Browse files
committed
e2e: node: add code to track the pod creator code
e2e_node test depend on very specific shared state (node state). Pod leakages between tests oftentimes cause the test preconditions to be silently corrupted, causing hard to debug CI failures. We add the option to add an annotation to pods which records the code line (source code:line) which triggered the pod creation, so it becomes easier to track which test needs better cleanup. The relevant e2e framework code is used in all the e2e suites, so to minimize any unwanted consequences we make the feature opt-in, planning to enable it initially (and likely only) in the e2e_node tests. Signed-off-by: Francesco Romani <[email protected]>
1 parent 9d63e57 commit 7160ef4

File tree

1 file changed

+52
-7
lines changed

1 file changed

+52
-7
lines changed

test/e2e/framework/pod/pod_client.go

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import (
3636
"k8s.io/kubectl/pkg/util/podutils"
3737

3838
"github.com/onsi/ginkgo/v2"
39+
ginkgotypes "github.com/onsi/ginkgo/v2/types"
3940
"github.com/onsi/gomega"
4041

4142
"k8s.io/kubernetes/test/e2e/framework"
@@ -56,6 +57,19 @@ const (
5657

5758
// it is copied from k8s.io/kubernetes/pkg/kubelet/sysctl
5859
forbiddenReason = "SysctlForbidden"
60+
61+
// which test created this pod?
62+
AnnotationTestOwner = "owner.test"
63+
)
64+
65+
// global flags so we can enable features per-suite instead of per-client.
66+
var (
67+
// GlobalOwnerTracking controls if newly created PodClients should automatically annotate
68+
// the pod with the owner test. The owner test is identified by "sourcecodepath:linenumber".
69+
// Annotating the pods this way is useful to troubleshoot tests which do insufficient cleanup.
70+
// Default is false to maximize backward compatibility.
71+
// See also: WithOwnerTracking, AnnotationTestOwner
72+
GlobalOwnerTracking bool
5973
)
6074

6175
// ImagePrePullList is the images used in the current test suite. It should be initialized in test suite and
@@ -68,9 +82,10 @@ var ImagePrePullList sets.String
6882
// node e2e pod scheduling.
6983
func NewPodClient(f *framework.Framework) *PodClient {
7084
return &PodClient{
71-
f: f,
72-
PodInterface: f.ClientSet.CoreV1().Pods(f.Namespace.Name),
73-
namespace: f.Namespace.Name,
85+
f: f,
86+
PodInterface: f.ClientSet.CoreV1().Pods(f.Namespace.Name),
87+
namespace: f.Namespace.Name,
88+
ownerTracking: GlobalOwnerTracking,
7489
}
7590
}
7691

@@ -79,29 +94,45 @@ func NewPodClient(f *framework.Framework) *PodClient {
7994
// node e2e pod scheduling.
8095
func PodClientNS(f *framework.Framework, namespace string) *PodClient {
8196
return &PodClient{
82-
f: f,
83-
PodInterface: f.ClientSet.CoreV1().Pods(namespace),
84-
namespace: namespace,
97+
f: f,
98+
PodInterface: f.ClientSet.CoreV1().Pods(namespace),
99+
namespace: namespace,
100+
ownerTracking: GlobalOwnerTracking,
85101
}
86102
}
87103

88104
// PodClient is a struct for pod client.
89105
type PodClient struct {
90106
f *framework.Framework
91107
v1core.PodInterface
92-
namespace string
108+
namespace string
109+
ownerTracking bool
110+
}
111+
112+
// WithOwnerTracking controls automatic add of annotations recording the code location
113+
// which created a pod. This is helpful when troubleshooting e2e tests (like e2e_node)
114+
// which leak pods because insufficient cleanup.
115+
// Note we want a shallow clone to avoid mutating the receiver.
116+
// The default is the value of GlobalOwnerTracking *when the client was created*.
117+
func (c PodClient) WithOwnerTracking(value bool) *PodClient {
118+
c.ownerTracking = value
119+
return &c
93120
}
94121

95122
// Create creates a new pod according to the framework specifications (don't wait for it to start).
96123
func (c *PodClient) Create(ctx context.Context, pod *v1.Pod) *v1.Pod {
124+
ginkgo.GinkgoHelper()
97125
c.mungeSpec(pod)
126+
c.setOwnerAnnotation(pod)
98127
p, err := c.PodInterface.Create(ctx, pod, metav1.CreateOptions{})
99128
framework.ExpectNoError(err, "Error creating Pod")
100129
return p
130+
101131
}
102132

103133
// CreateSync creates a new pod according to the framework specifications, and wait for it to start and be running and ready.
104134
func (c *PodClient) CreateSync(ctx context.Context, pod *v1.Pod) *v1.Pod {
135+
ginkgo.GinkgoHelper()
105136
p := c.Create(ctx, pod)
106137
framework.ExpectNoError(WaitTimeoutForPodReadyInNamespace(ctx, c.f.ClientSet, p.Name, c.namespace, framework.PodStartTimeout))
107138
// Get the newest pod after it becomes running and ready, some status may change after pod created, such as pod ip.
@@ -112,6 +143,7 @@ func (c *PodClient) CreateSync(ctx context.Context, pod *v1.Pod) *v1.Pod {
112143

113144
// CreateBatch create a batch of pods. All pods are created before waiting.
114145
func (c *PodClient) CreateBatch(ctx context.Context, pods []*v1.Pod) []*v1.Pod {
146+
ginkgo.GinkgoHelper()
115147
ps := make([]*v1.Pod, len(pods))
116148
var wg sync.WaitGroup
117149
for i, pod := range pods {
@@ -192,6 +224,19 @@ func (c *PodClient) DeleteSync(ctx context.Context, name string, options metav1.
192224
framework.ExpectNoError(WaitForPodNotFoundInNamespace(ctx, c.f.ClientSet, name, c.namespace, timeout), "wait for pod %q to disappear", name)
193225
}
194226

227+
// addTestOrigin adds annotations to help identifying tests which incorrectly leak pods because insufficient cleanup
228+
func (c *PodClient) setOwnerAnnotation(pod *v1.Pod) {
229+
if !c.ownerTracking {
230+
return
231+
}
232+
ginkgo.GinkgoHelper()
233+
location := ginkgotypes.NewCodeLocation(0)
234+
if pod.Annotations == nil {
235+
pod.Annotations = make(map[string]string)
236+
}
237+
pod.Annotations[AnnotationTestOwner] = fmt.Sprintf("%s:%d", location.FileName, location.LineNumber)
238+
}
239+
195240
// mungeSpec apply test-suite specific transformations to the pod spec.
196241
func (c *PodClient) mungeSpec(pod *v1.Pod) {
197242
if !framework.TestContext.NodeE2E {

0 commit comments

Comments
 (0)