@@ -271,5 +271,100 @@ var _ = Describe("TensorFusionConnection Controller", func() {
271271 connection .Status .ConnectionURL == "native+192.168.1.2+8081+test-worker-2-0"
272272 }, time .Second * 5 , time .Millisecond * 100 ).Should (BeTrue ())
273273 })
274+
275+ It ("should update status to WorkerPending when worker selection fails" , func () {
276+ By ("Creating a TensorFusionWorkload without worker status" )
277+
278+ // Create a workload with no workers (empty WorkerStatuses)
279+ failWorkloadName := "test-workload-no-workers"
280+ failWorkloadNamespacedName := types.NamespacedName {
281+ Name : failWorkloadName ,
282+ Namespace : "default" ,
283+ }
284+
285+ failWorkload := & tfv1.TensorFusionWorkload {
286+ ObjectMeta : metav1.ObjectMeta {
287+ Name : failWorkloadName ,
288+ Namespace : "default" ,
289+ },
290+ Spec : tfv1.TensorFusionWorkloadSpec {
291+ PoolName : "mock-empty" ,
292+ Resources : tfv1.Resources {
293+ Requests : tfv1.Resource {
294+ Tflops : resource .MustParse ("1" ),
295+ Vram : resource .MustParse ("1Gi" ),
296+ },
297+ Limits : tfv1.Resource {
298+ Tflops : resource .MustParse ("1" ),
299+ Vram : resource .MustParse ("1Gi" ),
300+ },
301+ },
302+ },
303+ Status : tfv1.TensorFusionWorkloadStatus {
304+ Replicas : 0 ,
305+ ReadyReplicas : 0 ,
306+ // Empty WorkerStatuses to force selection failure
307+ WorkerStatuses : []tfv1.WorkerStatus {},
308+ },
309+ }
310+ Expect (k8sClient .Create (ctx , failWorkload )).To (Succeed ())
311+ // Update status
312+ Expect (k8sClient .Status ().Update (ctx , failWorkload )).To (Succeed ())
313+
314+ // Verify workload was created properly
315+ createdWorkload := & tfv1.TensorFusionWorkload {}
316+ Eventually (func () bool {
317+ if err := k8sClient .Get (ctx , failWorkloadNamespacedName , createdWorkload ); err != nil {
318+ return false
319+ }
320+ return len (createdWorkload .Status .WorkerStatuses ) == 0
321+ }, time .Second * 5 , time .Millisecond * 100 ).Should (BeTrue ())
322+
323+ By ("Creating a connection to the workload with no workers" )
324+ failConnectionName := "test-connection-fail"
325+ failConnectionNamespacedName := types.NamespacedName {
326+ Name : failConnectionName ,
327+ Namespace : "default" ,
328+ }
329+
330+ failConnection := & tfv1.TensorFusionConnection {
331+ ObjectMeta : metav1.ObjectMeta {
332+ Name : failConnectionName ,
333+ Namespace : "default" ,
334+ Labels : map [string ]string {
335+ constants .WorkloadKey : failWorkloadName ,
336+ },
337+ },
338+ Spec : tfv1.TensorFusionConnectionSpec {
339+ WorkloadName : failWorkloadName ,
340+ },
341+ }
342+ Expect (k8sClient .Create (ctx , failConnection )).To (Succeed ())
343+
344+ By ("Reconciling the connection to trigger worker selection failure" )
345+ controllerReconciler := & TensorFusionConnectionReconciler {
346+ Client : k8sClient ,
347+ Scheme : k8sClient .Scheme (),
348+ Recorder : record .NewFakeRecorder (10 ),
349+ }
350+
351+ _ , err := controllerReconciler .Reconcile (ctx , reconcile.Request {
352+ NamespacedName : failConnectionNamespacedName ,
353+ })
354+ // We expect an error since worker selection should fail
355+ Expect (err ).To (HaveOccurred ())
356+
357+ By ("Verifying the connection status is updated to WorkerPending" )
358+ Eventually (func () bool {
359+ if err := k8sClient .Get (ctx , failConnectionNamespacedName , failConnection ); err != nil {
360+ return false
361+ }
362+ return failConnection .Status .Phase == tfv1 .WorkerPending
363+ }, time .Second * 5 , time .Millisecond * 100 ).Should (BeTrue ())
364+
365+ By ("Cleaning up test resources" )
366+ Expect (k8sClient .Delete (ctx , failConnection )).To (Succeed ())
367+ Expect (k8sClient .Delete (ctx , failWorkload )).To (Succeed ())
368+ })
274369 })
275370})
0 commit comments