@@ -19,8 +19,13 @@ package node
19
19
import (
20
20
"context"
21
21
"encoding/json"
22
+ "fmt"
23
+ "math/rand"
22
24
"net/http"
25
+ "regexp"
23
26
"strconv"
27
+ "strings"
28
+ "sync"
24
29
"time"
25
30
26
31
v1 "k8s.io/api/core/v1"
@@ -29,6 +34,7 @@ import (
29
34
"k8s.io/apimachinery/pkg/labels"
30
35
"k8s.io/apimachinery/pkg/util/uuid"
31
36
"k8s.io/apimachinery/pkg/util/wait"
37
+ "k8s.io/apimachinery/pkg/watch"
32
38
"k8s.io/kubernetes/test/e2e/framework"
33
39
e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
34
40
imageutils "k8s.io/kubernetes/test/utils/image"
@@ -197,4 +203,227 @@ var _ = SIGDescribe("Pods Extended", func() {
197
203
framework .ExpectEqual (pod .Status .QOSClass , v1 .PodQOSGuaranteed )
198
204
})
199
205
})
206
+
207
+ framework .KubeDescribe ("Pod Container Status" , func () {
208
+ var podClient * framework.PodClient
209
+ ginkgo .BeforeEach (func () {
210
+ podClient = f .PodClient ()
211
+ })
212
+
213
+ ginkgo .It ("should never report success for a pending container" , func () {
214
+ ginkgo .By ("creating pods that should always exit 1 and terminating the pod after a random delay" )
215
+
216
+ var reBug88766 = regexp .MustCompile (`ContainerCannotRun.*rootfs_linux\.go.*kubernetes\.io~secret.*no such file or directory` )
217
+
218
+ var (
219
+ lock sync.Mutex
220
+ errs []error
221
+
222
+ wg sync.WaitGroup
223
+ )
224
+
225
+ const delay = 2000
226
+ const workers = 3
227
+ const pods = 15
228
+ var min , max time.Duration
229
+ for i := 0 ; i < workers ; i ++ {
230
+ wg .Add (1 )
231
+ go func (i int ) {
232
+ defer wg .Done ()
233
+ for retries := 0 ; retries < pods ; retries ++ {
234
+ name := fmt .Sprintf ("pod-submit-status-%d-%d" , i , retries )
235
+ value := strconv .Itoa (time .Now ().Nanosecond ())
236
+ one := int64 (1 )
237
+ pod := & v1.Pod {
238
+ ObjectMeta : metav1.ObjectMeta {
239
+ Name : name ,
240
+ Labels : map [string ]string {
241
+ "name" : "foo" ,
242
+ "time" : value ,
243
+ },
244
+ },
245
+ Spec : v1.PodSpec {
246
+ RestartPolicy : v1 .RestartPolicyNever ,
247
+ TerminationGracePeriodSeconds : & one ,
248
+ Containers : []v1.Container {
249
+ {
250
+ Name : "busybox" ,
251
+ Image : imageutils .GetE2EImage (imageutils .BusyBox ),
252
+ Command : []string {
253
+ "/bin/false" ,
254
+ },
255
+ Resources : v1.ResourceRequirements {
256
+ Requests : v1.ResourceList {
257
+ v1 .ResourceCPU : resource .MustParse ("5m" ),
258
+ v1 .ResourceMemory : resource .MustParse ("10Mi" ),
259
+ },
260
+ },
261
+ },
262
+ },
263
+ },
264
+ }
265
+
266
+ // create the pod, capture the change events, then delete the pod
267
+ start := time .Now ()
268
+ created := podClient .Create (pod )
269
+ ch := make (chan []watch.Event )
270
+ go func () {
271
+ defer close (ch )
272
+ w , err := podClient .Watch (context .TODO (), metav1.ListOptions {
273
+ ResourceVersion : created .ResourceVersion ,
274
+ FieldSelector : fmt .Sprintf ("metadata.name=%s" , pod .Name ),
275
+ })
276
+ if err != nil {
277
+ framework .Logf ("Unable to watch pod %s: %v" , pod .Name , err )
278
+ return
279
+ }
280
+ defer w .Stop ()
281
+ events := []watch.Event {
282
+ {Type : watch .Added , Object : created },
283
+ }
284
+ for event := range w .ResultChan () {
285
+ events = append (events , event )
286
+ if event .Type == watch .Deleted {
287
+ break
288
+ }
289
+ }
290
+ ch <- events
291
+ }()
292
+
293
+ t := time .Duration (rand .Intn (delay )) * time .Millisecond
294
+ time .Sleep (t )
295
+ err := podClient .Delete (context .TODO (), pod .Name , nil )
296
+ framework .ExpectNoError (err , "failed to delete pod" )
297
+
298
+ events , ok := <- ch
299
+ if ! ok {
300
+ continue
301
+ }
302
+ if len (events ) < 2 {
303
+ framework .Fail ("only got a single event" )
304
+ }
305
+
306
+ end := time .Now ()
307
+
308
+ // check the returned events for consistency
309
+ var duration , completeDuration time.Duration
310
+ var hasContainers , hasTerminated , hasTerminalPhase , hasRunningContainers bool
311
+ verifyFn := func (event watch.Event ) error {
312
+ var ok bool
313
+ pod , ok = event .Object .(* v1.Pod )
314
+ if ! ok {
315
+ framework .Logf ("Unexpected event object: %s %#v" , event .Type , event .Object )
316
+ return nil
317
+ }
318
+
319
+ if len (pod .Status .InitContainerStatuses ) != 0 {
320
+ return fmt .Errorf ("pod %s on node %s had incorrect init containers: %#v" , pod .Name , pod .Spec .NodeName , pod .Status .InitContainerStatuses )
321
+ }
322
+ if len (pod .Status .ContainerStatuses ) == 0 {
323
+ if hasContainers {
324
+ return fmt .Errorf ("pod %s on node %s had incorrect containers: %#v" , pod .Name , pod .Spec .NodeName , pod .Status .ContainerStatuses )
325
+ }
326
+ return nil
327
+ }
328
+ hasContainers = true
329
+ if len (pod .Status .ContainerStatuses ) != 1 {
330
+ return fmt .Errorf ("pod %s on node %s had incorrect containers: %#v" , pod .Name , pod .Spec .NodeName , pod .Status .ContainerStatuses )
331
+ }
332
+ status := pod .Status .ContainerStatuses [0 ]
333
+ t := status .State .Terminated
334
+ if hasTerminated {
335
+ if status .State .Waiting != nil || status .State .Running != nil {
336
+ return fmt .Errorf ("pod %s on node %s was terminated and then changed state: %#v" , pod .Name , pod .Spec .NodeName , status )
337
+ }
338
+ if t == nil {
339
+ return fmt .Errorf ("pod %s on node %s was terminated and then had termination cleared: %#v" , pod .Name , pod .Spec .NodeName , status )
340
+ }
341
+ }
342
+ hasRunningContainers = status .State .Waiting == nil && status .State .Terminated == nil
343
+ if t != nil {
344
+ if ! t .FinishedAt .Time .IsZero () {
345
+ duration = t .FinishedAt .Sub (t .StartedAt .Time )
346
+ completeDuration = t .FinishedAt .Sub (pod .CreationTimestamp .Time )
347
+ }
348
+
349
+ defer func () { hasTerminated = true }()
350
+ switch {
351
+ case t .ExitCode == 1 :
352
+ // expected
353
+ case t .ExitCode == 128 && reBug88766 .MatchString (t .Message ):
354
+ // pod volume teardown races with container start in CRI, which reports a failure
355
+ framework .Logf ("pod %s on node %s failed with the symptoms of https://github.com/kubernetes/kubernetes/issues/88766" )
356
+ default :
357
+ return fmt .Errorf ("pod %s on node %s container unexpected exit code %d: start=%s end=%s reason=%s message=%s" , pod .Name , pod .Spec .NodeName , t .ExitCode , t .StartedAt , t .FinishedAt , t .Reason , t .Message )
358
+ }
359
+ }
360
+ if pod .Status .Phase == v1 .PodFailed || pod .Status .Phase == v1 .PodSucceeded {
361
+ hasTerminalPhase = true
362
+ } else {
363
+ if hasTerminalPhase {
364
+ return fmt .Errorf ("pod %s on node %s was in a terminal phase and then reverted: %#v" , pod .Name , pod .Spec .NodeName , pod .Status )
365
+ }
366
+ }
367
+ return nil
368
+ }
369
+
370
+ var eventErr error
371
+ for _ , event := range events [1 :] {
372
+ if err := verifyFn (event ); err != nil {
373
+ eventErr = err
374
+ break
375
+ }
376
+ }
377
+ func () {
378
+ defer lock .Unlock ()
379
+ lock .Lock ()
380
+
381
+ if eventErr != nil {
382
+ errs = append (errs , eventErr )
383
+ return
384
+ }
385
+
386
+ if ! hasTerminalPhase {
387
+ var names []string
388
+ for _ , status := range pod .Status .ContainerStatuses {
389
+ if status .State .Terminated != nil || status .State .Running != nil {
390
+ names = append (names , status .Name )
391
+ }
392
+ }
393
+ switch {
394
+ case len (names ) > 0 :
395
+ errs = append (errs , fmt .Errorf ("pod %s on node %s did not reach a terminal phase before being deleted but had running containers: phase=%s, running-containers=%s" , pod .Name , pod .Spec .NodeName , pod .Status .Phase , strings .Join (names , "," )))
396
+ case pod .Status .Phase != v1 .PodPending :
397
+ errs = append (errs , fmt .Errorf ("pod %s on node %s was not Pending but has no running containers: phase=%s" , pod .Name , pod .Spec .NodeName , pod .Status .Phase ))
398
+ }
399
+ }
400
+ if hasRunningContainers {
401
+ data , _ := json .MarshalIndent (pod .Status .ContainerStatuses , "" , " " )
402
+ errs = append (errs , fmt .Errorf ("pod %s on node %s had running or unknown container status before being deleted:\n %s" , pod .Name , pod .Spec .NodeName , string (data )))
403
+ }
404
+ }()
405
+
406
+ if duration < min {
407
+ min = duration
408
+ }
409
+ if duration > max || max == 0 {
410
+ max = duration
411
+ }
412
+ framework .Logf ("Pod %s on node %s timings total=%s t=%s run=%s execute=%s" , pod .Name , pod .Spec .NodeName , end .Sub (start ), t , completeDuration , duration )
413
+ }
414
+
415
+ }(i )
416
+ }
417
+
418
+ wg .Wait ()
419
+
420
+ if len (errs ) > 0 {
421
+ var messages []string
422
+ for _ , err := range errs {
423
+ messages = append (messages , err .Error ())
424
+ }
425
+ framework .Failf ("%d errors:\n %v" , len (errs ), strings .Join (messages , "\n " ))
426
+ }
427
+ })
428
+ })
200
429
})
0 commit comments