@@ -538,6 +538,23 @@ func logPodStates(pods []v1.Pod) {
538
538
Logf ("" ) // Final empty line helps for readability.
539
539
}
540
540
541
+ // logPodTerminationMessages logs termination messages for failing pods. It's a short snippet (much smaller than full logs), but it often shows
542
+ // why pods crashed and since it is in the API, it's fast to retrieve.
543
+ func logPodTerminationMessages (pods []v1.Pod ) {
544
+ for _ , pod := range pods {
545
+ for _ , status := range pod .Status .InitContainerStatuses {
546
+ if status .LastTerminationState .Terminated != nil && len (status .LastTerminationState .Terminated .Message ) > 0 {
547
+ Logf ("%s[%s].initContainer[%s]=%s" , pod .Name , pod .Namespace , status .Name , status .LastTerminationState .Terminated .Message )
548
+ }
549
+ }
550
+ for _ , status := range pod .Status .ContainerStatuses {
551
+ if status .LastTerminationState .Terminated != nil && len (status .LastTerminationState .Terminated .Message ) > 0 {
552
+ Logf ("%s[%s].container[%s]=%s" , pod .Name , pod .Namespace , status .Name , status .LastTerminationState .Terminated .Message )
553
+ }
554
+ }
555
+ }
556
+ }
557
+
541
558
// errorBadPodsStates create error message of basic info of bad pods for debugging.
542
559
func errorBadPodsStates (badPods []v1.Pod , desiredPods int , ns , desiredState string , timeout time.Duration ) string {
543
560
errStr := fmt .Sprintf ("%d / %d pods in namespace %q are NOT in %s state in %v\n " , len (badPods ), desiredPods , ns , desiredState , timeout )
@@ -2422,14 +2439,15 @@ func DumpAllNamespaceInfo(c clientset.Interface, namespace string) {
2422
2439
return c .CoreV1 ().Events (ns ).List (opts )
2423
2440
}, namespace )
2424
2441
2442
+ dumpAllPodInfoForNamespace (c , namespace )
2443
+
2425
2444
// If cluster is large, then the following logs are basically useless, because:
2426
2445
// 1. it takes tens of minutes or hours to grab all of them
2427
2446
// 2. there are so many of them that working with them are mostly impossible
2428
2447
// So we dump them only if the cluster is relatively small.
2429
2448
maxNodesForDump := TestContext .MaxNodesToGather
2430
2449
if nodes , err := c .CoreV1 ().Nodes ().List (metav1.ListOptions {}); err == nil {
2431
2450
if len (nodes .Items ) <= maxNodesForDump {
2432
- dumpAllPodInfo (c )
2433
2451
dumpAllNodeInfo (c )
2434
2452
} else {
2435
2453
Logf ("skipping dumping cluster info - cluster too large" )
@@ -2452,12 +2470,13 @@ func (o byFirstTimestamp) Less(i, j int) bool {
2452
2470
return o [i ].FirstTimestamp .Before (& o [j ].FirstTimestamp )
2453
2471
}
2454
2472
2455
- func dumpAllPodInfo (c clientset.Interface ) {
2456
- pods , err := c .CoreV1 ().Pods ("" ).List (metav1.ListOptions {})
2473
+ func dumpAllPodInfoForNamespace (c clientset.Interface , namespace string ) {
2474
+ pods , err := c .CoreV1 ().Pods (namespace ).List (metav1.ListOptions {})
2457
2475
if err != nil {
2458
2476
Logf ("unable to fetch pod debug info: %v" , err )
2459
2477
}
2460
2478
logPodStates (pods .Items )
2479
+ logPodTerminationMessages (pods .Items )
2461
2480
}
2462
2481
2463
2482
func dumpAllNodeInfo (c clientset.Interface ) {
0 commit comments