@@ -31,12 +31,13 @@ import (
3131)
3232
3333type CollectCopyFromHost struct {
34- Collector * troubleshootv1beta2.CopyFromHost
35- BundlePath string
36- Namespace string
37- ClientConfig * rest.Config
38- Client kubernetes.Interface
39- Context context.Context
34+ Collector * troubleshootv1beta2.CopyFromHost
35+ BundlePath string
36+ Namespace string
37+ ClientConfig * rest.Config
38+ Client kubernetes.Interface
39+ Context context.Context
40+ RetryFailedMount bool
4041 RBACErrors
4142}
4243
@@ -73,7 +74,7 @@ func (c *CollectCopyFromHost) Collect(progressChan chan<- interface{}) (Collecto
7374 namespace , _ , _ = kubeconfig .Namespace ()
7475 }
7576
76- _ , cleanup , err := copyFromHostCreateDaemonSet (c .Context , c .Client , c .Collector , hostDir , namespace , "troubleshoot-copyfromhost-" , labels )
77+ _ , cleanup , err := c . copyFromHostCreateDaemonSet (c .Context , c .Client , c .Collector , hostDir , namespace , "troubleshoot-copyfromhost-" , labels )
7778 defer cleanup ()
7879 if err != nil {
7980 return nil , errors .Wrap (err , "create daemonset" )
@@ -125,7 +126,7 @@ func (c *CollectCopyFromHost) Collect(progressChan chan<- interface{}) (Collecto
125126 }
126127}
127128
128- func copyFromHostCreateDaemonSet (ctx context.Context , client kubernetes.Interface , collector * troubleshootv1beta2.CopyFromHost , hostPath string , namespace string , generateName string , labels map [string ]string ) (name string , cleanup func (), err error ) {
129+ func ( c * CollectCopyFromHost ) copyFromHostCreateDaemonSet (ctx context.Context , client kubernetes.Interface , collector * troubleshootv1beta2.CopyFromHost , hostPath string , namespace string , generateName string , labels map [string ]string ) (name string , cleanup func (), err error ) {
129130 pullPolicy := corev1 .PullIfNotPresent
130131 volumeType := corev1 .HostPathDirectory
131132 if collector .ImagePullPolicy != "" {
@@ -229,6 +230,11 @@ func copyFromHostCreateDaemonSet(ctx context.Context, client kubernetes.Interfac
229230 for {
230231 select {
231232 case <- time .After (1 * time .Second ):
233+ err = checkDaemonPodStatus (client , ctx , labels , namespace , c .RetryFailedMount )
234+ if err != nil {
235+ return createdDS .Name , cleanup , err
236+ }
237+
232238 case <- childCtx .Done ():
233239 klog .V (2 ).Infof ("Timed out waiting for daemonset %s to be ready" , createdDS .Name )
234240 return createdDS .Name , cleanup , errors .Wrap (ctx .Err (), "wait for daemonset" )
@@ -373,7 +379,14 @@ func copyFilesFromHost(ctx context.Context, dstPath string, clientConfig *restcl
373379
374380func deleteDaemonSet (client kubernetes.Interface , ctx context.Context , createdDS * appsv1.DaemonSet , namespace string , labels map [string ]string ) {
375381 klog .V (2 ).Infof ("Daemonset %s has been scheduled for deletion" , createdDS .Name )
376- if err := client .AppsV1 ().DaemonSets (namespace ).Delete (context .Background (), createdDS .Name , metav1.DeleteOptions {}); err != nil {
382+ zeroGracePeriod := int64 (0 )
383+ // Foreground is used to delete the DaemonSet pods before deleting the DaemonSet
384+ deletePropagationForeground := metav1 .DeletePropagationForeground
385+
386+ if err := client .AppsV1 ().DaemonSets (namespace ).Delete (ctx , createdDS .Name , metav1.DeleteOptions {
387+ GracePeriodSeconds : & zeroGracePeriod ,
388+ PropagationPolicy : & deletePropagationForeground ,
389+ }); err != nil {
377390 klog .Errorf ("Failed to delete daemonset %s: %v" , createdDS .Name , err )
378391 return
379392 }
@@ -383,10 +396,24 @@ func deleteDaemonSet(client kubernetes.Interface, ctx context.Context, createdDS
383396 labelSelector = append (labelSelector , fmt .Sprintf ("%s=%s" , k , v ))
384397 }
385398
386- dsPods := & corev1.PodList {}
399+ dsPods , err := client .CoreV1 ().Pods (namespace ).List (ctx , metav1.ListOptions {LabelSelector : strings .Join (labelSelector , "," )})
400+ if err != nil {
401+ klog .Errorf ("Failed to list pods for DaemonSet %s: %v" , createdDS .Name , err )
402+ return
403+ }
404+
405+ for _ , pod := range dsPods .Items {
406+ klog .V (2 ).Infof ("Deleting pod %s" , pod .Name )
407+ if err := client .CoreV1 ().Pods (namespace ).Delete (ctx , pod .Name , metav1.DeleteOptions {
408+ GracePeriodSeconds : & zeroGracePeriod ,
409+ }); err != nil {
410+ klog .Errorf ("Failed to delete pod %s: %v" , pod .Name , err )
411+ }
412+ }
413+
387414 klog .V (2 ).Infof ("Continuously poll each second for Pod deletion of DaemontSet %s for maximum %d seconds" , createdDS .Name , constants .MAX_TIME_TO_WAIT_FOR_POD_DELETION / time .Second )
388415
389- err : = wait .PollUntilContextTimeout (ctx , time .Second , constants .MAX_TIME_TO_WAIT_FOR_POD_DELETION , true , func (ctx context.Context ) (bool , error ) {
416+ err = wait .PollUntilContextTimeout (ctx , time .Second , constants .MAX_TIME_TO_WAIT_FOR_POD_DELETION , true , func (ctx context.Context ) (bool , error ) {
390417 pods , listErr := client .CoreV1 ().Pods (namespace ).List (ctx , metav1.ListOptions {
391418 LabelSelector : strings .Join (labelSelector , "," ),
392419 })
@@ -410,7 +437,6 @@ func deleteDaemonSet(client kubernetes.Interface, ctx context.Context, createdDS
410437 // If there was an error from the polling (e.g., the context deadline was exceeded before all pods were deleted),
411438 // delete each remaining pod with a zero-second grace period
412439 if err != nil {
413- zeroGracePeriod := int64 (0 )
414440 for _ , pod := range dsPods .Items {
415441 klog .V (2 ).Infof ("Pod %s forcefully deleted after reaching the maximum wait time of %d seconds" , pod .Name , constants .MAX_TIME_TO_WAIT_FOR_POD_DELETION / time .Second )
416442 err := client .CoreV1 ().Pods (namespace ).Delete (context .TODO (), pod .Name , metav1.DeleteOptions {
@@ -424,3 +450,34 @@ func deleteDaemonSet(client kubernetes.Interface, ctx context.Context, createdDS
424450 }
425451 }
426452}
453+
454+ func checkDaemonPodStatus (client kubernetes.Interface , ctx context.Context , labels map [string ]string , namespace string , retryFailedMount bool ) error {
455+ var labelSelector []string
456+ for k , v := range labels {
457+ labelSelector = append (labelSelector , fmt .Sprintf ("%s=%s" , k , v ))
458+ }
459+ pods , err := client .CoreV1 ().Pods (namespace ).List (ctx , metav1.ListOptions {
460+ LabelSelector : strings .Join (labelSelector , "," ),
461+ })
462+ if err != nil {
463+ return errors .Wrap (err , "get daemonset pods" )
464+ }
465+
466+ for _ , pod := range pods .Items {
467+ if pod .Status .Phase != corev1 .PodRunning {
468+ events , _ := client .CoreV1 ().Events (namespace ).List (ctx , metav1.ListOptions {
469+ FieldSelector : fmt .Sprintf ("involvedObject.uid=%s" , pod .UID ),
470+ })
471+
472+ for _ , event := range events .Items {
473+ // If the pod has a FailedMount event, it means that the pod failed to mount the volume and the pod will be stuck in the Pending state.
474+ // In this case, we return an error to the caller to indicate that path does not exist.
475+ if event .Reason == "FailedMount" && ! retryFailedMount {
476+ klog .V (2 ).Infof ("pod %s has a FailedMount event: %s" , pod .Name , event .Message )
477+ return errors .Errorf ("path does not exist" )
478+ }
479+ }
480+ }
481+ }
482+ return nil
483+ }
0 commit comments