@@ -525,3 +525,210 @@ def test_multi_dependency_graph_task(tmp_path):
525525 assert sorted (
526526 [e .id for e in entries [task_id ].current_status .dependencies ]
527527 ) == sorted (deps )
528+
529+
530+ def test_has_more_jobs (tmp_path ):
531+ worker = Graphband (
532+ sequential_task (),
533+ db = f"sqlite:///{ tmp_path } /graphband.sqlite" ,
534+ lock = Lock (f"{ tmp_path } /graphband.lock" ),
535+ )
536+ assert worker .has_more_jobs is True
537+ for item in worker :
538+ if item .id == "task_5" :
539+ break
540+ # there are still 4 remaining tasks
541+ assert worker .has_more_jobs is True
542+
543+ assert len (list (worker )) == 4
544+ # won't pick up the failed job
545+ assert worker .has_more_jobs is False
546+
547+ w2 = Graphband (
548+ sequential_task (),
549+ db = f"sqlite:///{ tmp_path } /graphband.sqlite" ,
550+ lock = Lock (f"{ tmp_path } /graphband.lock" ),
551+ max_failed_retries = 2 ,
552+ identifier = "retry-worker" ,
553+ )
554+ # will pick up the failed job
555+ assert w2 .has_more_jobs is True
556+ assert len (list (w2 )) == 1
557+ assert w2 .has_more_jobs is False
558+
559+ w3 = Graphband (
560+ sequential_task (),
561+ db = f"sqlite:///{ tmp_path } /graphband.sqlite" ,
562+ lock = Lock (f"{ tmp_path } /graphband.lock" ),
563+ max_failed_retries = 2 ,
564+ identifier = "retry-worker-2" ,
565+ )
566+ assert w3 .has_more_jobs is False # all jobs are no completed successfully
567+
568+
569+ def blocked_dependency_graph_task ():
570+ """Create a graph where task b blocks task c due to label requirements.
571+
572+ Graph structure:
573+ a --> b --> c
574+ Where b requires 'special-worker' label, but a and c require 'main' label.
575+ """
576+ digraph = nx .DiGraph ()
577+ edges = [
578+ ("a" , "b" ),
579+ ("b" , "c" ),
580+ ]
581+ digraph .add_edges_from (edges )
582+ digraph .nodes ["b" ]["requirements" ] = {"special-worker" }
583+ for node in nx .topological_sort (digraph ):
584+ yield Task (
585+ id = node ,
586+ data = node ,
587+ dependencies = set (digraph .predecessors (node )),
588+ requirements = digraph .nodes [node ].get ("requirements" , {"main" }),
589+ )
590+
591+
592+ def test_has_more_jobs_with_blocked_dependencies (tmp_path ):
593+ """Test has_more_jobs when dependencies are blocked by label mismatches.
594+
595+ This test verifies the scenario where:
596+ - a --> b --> c dependency chain
597+ - b needs a different label than a, c
598+ - has_more_jobs for the a/c worker should be true until c is completed
599+ - but the worker won't be able to pick up c because b isn't completed
600+ """
601+ # Worker that can process 'main' tasks (a and c) but not 'special-worker' tasks (b)
602+ main_worker = Graphband (
603+ blocked_dependency_graph_task (),
604+ db = f"sqlite:///{ tmp_path } /graphband.sqlite" ,
605+ lock = Lock (f"{ tmp_path } /graphband.lock" ),
606+ labels = {"main" },
607+ identifier = "main-worker" ,
608+ )
609+
610+ # Initially should have jobs available
611+ assert main_worker .has_more_jobs is True
612+
613+ # Process available tasks - should only get task 'a'
614+ items = list (main_worker )
615+ assert len (items ) == 1
616+ assert items [0 ].id == "a"
617+
618+ # After processing 'a', should still have more jobs (task 'c' exists but is blocked)
619+ # This is the key behavior: has_more_jobs returns True even though this worker
620+ # cannot make progress because 'c' depends on 'b' which requires different labels
621+ assert main_worker .has_more_jobs is True
622+
623+ # Trying to iterate again should yield nothing since 'c' is blocked by 'b'
624+ items_second = list (main_worker )
625+ assert len (items_second ) == 0
626+
627+ # Should still report more jobs available (task 'c' is incomplete)
628+ assert main_worker .has_more_jobs is True
629+
630+ # Now create a worker that can process the 'special-worker' task 'b'
631+ special_worker = Graphband (
632+ blocked_dependency_graph_task (),
633+ db = f"sqlite:///{ tmp_path } /graphband.sqlite" ,
634+ lock = Lock (f"{ tmp_path } /graphband.lock" ),
635+ labels = {"special-worker" },
636+ identifier = "special-worker" ,
637+ )
638+
639+ # Special worker should have jobs (task 'b')
640+ assert special_worker .has_more_jobs is True
641+
642+ # Process task 'b'
643+ items_special = list (special_worker )
644+ assert len (items_special ) == 1
645+ assert items_special [0 ].id == "b"
646+
647+ # After 'b' is complete, special worker should have no more jobs
648+ assert special_worker .has_more_jobs is False
649+
650+ # Now main worker should be able to process task 'c'
651+ assert main_worker .has_more_jobs is True
652+
653+ items_final = list (main_worker )
654+ assert len (items_final ) == 1
655+ assert items_final [0 ].id == "c"
656+
657+ # Finally, no more jobs for either worker
658+ assert main_worker .has_more_jobs is False
659+ assert special_worker .has_more_jobs is False
660+
661+
662+ def test_has_more_jobs_with_killed_workers (tmp_path ):
663+ """Test has_more_jobs behavior when workers are killed
664+ and tasks exceed retry limits."""
665+ # Test case where killed tasks cannot be retried (max_killed_retries=0)
666+ lock_path = f"{ tmp_path } /graphband.lock"
667+ db = f"sqlite:///{ tmp_path } /graphband.sqlite"
668+ file = f"{ tmp_path } /output.txt"
669+
670+ # Start a worker that will be killed with no retry allowance
671+ proc = multiprocessing .Process (
672+ target = task_worker ,
673+ args = (sequential_task , lock_path , db , file , 3 ),
674+ kwargs = {
675+ "heartbeat_timeout" : 1 ,
676+ "heartbeat_interval" : 0.5 ,
677+ "max_killed_retries" : 0 , # No retries allowed for killed tasks
678+ "identifier" : "killed-worker" ,
679+ },
680+ )
681+ proc .start ()
682+ time .sleep (2 ) # Let the worker start one task
683+ proc .kill ()
684+ proc .join ()
685+
686+ time .sleep (2 )
687+ # need to start another worker to mark the job as killed in the db
688+ _ = Graphband (
689+ sequential_task (),
690+ db = db ,
691+ lock = Lock (lock_path ),
692+ heartbeat_timeout = 2 ,
693+ heartbeat_interval = 1 ,
694+ identifier = "update-worker" ,
695+ )
696+ time .sleep (2 )
697+
698+ # Verify the killed task is permanently blocked
699+ engine = create_engine (db )
700+ with Session (engine ) as session :
701+ tasks = session .query (TaskEntry ).all ()
702+ # Verify we have exactly one killed task and 9 completed tasks
703+ killed_tasks = [
704+ t for t in tasks if t .current_status .status == TaskStatusEnum .KILLED
705+ ]
706+ assert len (killed_tasks ) == 1
707+
708+ no_retries_worker = Graphband (
709+ sequential_task (),
710+ db = db ,
711+ lock = Lock (lock_path ),
712+ heartbeat_timeout = 2 ,
713+ heartbeat_interval = 1 ,
714+ max_killed_retries = 0 , # No retries allowed
715+ identifier = "no-retries-worker" ,
716+ )
717+
718+ retries_worker = Graphband (
719+ sequential_task (),
720+ db = db ,
721+ lock = Lock (lock_path ),
722+ heartbeat_timeout = 2 ,
723+ heartbeat_interval = 1 ,
724+ max_killed_retries = 2 , # Allow retries
725+ identifier = "retries-worker" ,
726+ )
727+
728+ assert no_retries_worker .has_more_jobs is True
729+ assert retries_worker .has_more_jobs is True
730+ assert len (list (no_retries_worker )) == 9
731+ assert no_retries_worker .has_more_jobs is False
732+ assert retries_worker .has_more_jobs is True
733+ assert len (list (retries_worker )) == 1
734+ assert retries_worker .has_more_jobs is False
0 commit comments