@@ -216,6 +216,66 @@ func GetClusterSecret(ctx context.Context, kubeFactory kube.Factory, namespace s
216
216
return res , nil
217
217
}
218
218
219
+ func WaitForJob (ctx context.Context , f kube.Factory , ns , jobName string ) error {
220
+ var attempt int32
221
+ return f .Wait (ctx , & kube.WaitOptions {
222
+ Interval : time .Second * 10 ,
223
+ Timeout : time .Minute * 11 , // BackOffLimit of 6 is a total of 630s, or 10m30s
224
+ Resources : []kube.Resource {
225
+ {
226
+ Name : jobName ,
227
+ Namespace : ns ,
228
+ WaitFunc : func (ctx context.Context , f kube.Factory , ns , name string ) (bool , error ) {
229
+ cs , err := f .KubernetesClientSet ()
230
+ if err != nil {
231
+ return false , err
232
+ }
233
+
234
+ j , err := cs .BatchV1 ().Jobs (ns ).Get (ctx , name , metav1.GetOptions {})
235
+ if err != nil {
236
+ return false , err
237
+ }
238
+
239
+ if j .Status .Failed > attempt {
240
+ attempt = j .Status .Failed
241
+ log .G (ctx ).Warnf ("Attempt #%d/%d failed:" , attempt , * j .Spec .BackoffLimit )
242
+ printJobLogs (ctx , cs , j )
243
+ } else if j .Status .Succeeded == 1 {
244
+ attempt += 1
245
+ log .G (ctx ).Infof ("Attempt #%d/%d succeeded:" , attempt , * j .Spec .BackoffLimit )
246
+ printJobLogs (ctx , cs , j )
247
+ }
248
+
249
+ for _ , cond := range j .Status .Conditions {
250
+ if cond .Type == batchv1 .JobFailed {
251
+ err = fmt .Errorf ("add-cluster-job failed after %d attempts" , j .Status .Failed )
252
+ break
253
+ }
254
+ }
255
+
256
+ return j .Status .Succeeded == 1 || j .Status .Failed == * j .Spec .BackoffLimit , err
257
+ },
258
+ },
259
+ },
260
+ })
261
+ }
262
+
263
+ func printJobLogs (ctx context.Context , client kubernetes.Interface , job * batchv1.Job ) {
264
+ p , err := getPodByJob (ctx , client , job )
265
+ if err != nil {
266
+ log .G (ctx ).Errorf ("Failed getting pod for job: $s" , err .Error ())
267
+ return
268
+ }
269
+
270
+ logs , err := getPodLogs (ctx , client , p .GetNamespace (), p .GetName ())
271
+ if err != nil {
272
+ log .G (ctx ).Errorf ("Failed getting logs for pod: $s" , err .Error ())
273
+ return
274
+ }
275
+
276
+ fmt .Printf ("=====\n %s\n =====\n \n " , logs )
277
+ }
278
+
219
279
func runNetworkTest (ctx context.Context , kubeFactory kube.Factory , urls ... string ) error {
220
280
const networkTestsTimeout = 120 * time .Second
221
281
@@ -426,8 +486,8 @@ func deleteJob(ctx context.Context, client kubernetes.Interface, job *batchv1.Jo
426
486
}
427
487
428
488
func getPodByJob (ctx context.Context , client kubernetes.Interface , job * batchv1.Job ) (* v1.Pod , error ) {
429
- pods , err := client .CoreV1 ().Pods (store . Get (). DefaultNamespace ).List (ctx , metav1.ListOptions {
430
- LabelSelector : "controller-uid=" + job .GetLabels () ["controller-uid" ],
489
+ pods , err := client .CoreV1 ().Pods (job . GetNamespace () ).List (ctx , metav1.ListOptions {
490
+ LabelSelector : "controller-uid=" + job .Spec . Selector . MatchLabels ["controller-uid" ],
431
491
})
432
492
if err != nil {
433
493
return nil , err
0 commit comments