2424import numpy as np
2525import pandas
2626import pandas as pd
27+ import pandas .testing
2728import pytest
2829import requests
2930import shapely .geometry
@@ -745,7 +746,6 @@ def get_status(job_id, current_status):
745746 filled_running_start_time = final_df .iloc [0 ]["running_start_time" ]
746747 assert isinstance (rfc3339 .parse_datetime (filled_running_start_time ), datetime .datetime )
747748
748-
749749 def test_process_threadworker_updates (self , tmp_path , caplog ):
750750 pool = _JobManagerWorkerThreadPool (max_workers = 2 )
751751 stats = collections .defaultdict (int )
@@ -755,8 +755,6 @@ def test_process_threadworker_updates(self, tmp_path, caplog):
755755 pool .submit_task (DummyTask ("j-1" , df_idx = 1 , db_update = {"status" : "queued" }, stats_update = None ))
756756 pool .submit_task (DummyTask ("j-2" , df_idx = 2 , db_update = None , stats_update = {"queued" : 1 }))
757757 pool .submit_task (DummyTask ("j-3" , df_idx = 3 , db_update = None , stats_update = None ))
758- # Invalid index (not in DB)
759- pool .submit_task (DummyTask ("j-missing" , df_idx = 4 , db_update = {"status" : "created" }, stats_update = None ))
760758
761759 df_initial = pd .DataFrame (
762760 {
@@ -768,23 +766,62 @@ def test_process_threadworker_updates(self, tmp_path, caplog):
768766
769767 mgr = MultiBackendJobManager (root_dir = tmp_path / "jobs" )
770768
771- with caplog .at_level (logging .ERROR ):
772- mgr ._process_threadworker_updates (worker_pool = pool , job_db = job_db , stats = stats )
769+ mgr ._process_threadworker_updates (worker_pool = pool , job_db = job_db , stats = stats )
773770
774771 df_final = job_db .read ()
772+ pandas .testing .assert_frame_equal (
773+ df_final [["id" , "status" ]],
774+ pandas .DataFrame (
775+ {
776+ "id" : ["j-0" , "j-1" , "j-2" , "j-3" ],
777+ "status" : ["queued" , "queued" , "created" , "created" ],
778+ }
779+ ),
780+ )
781+ assert stats == dirty_equals .IsPartialDict (
782+ {
783+ "queued" : 2 ,
784+ "job_db persist" : 1 ,
785+ }
786+ )
787+ assert caplog .messages == []
788+
789+ def test_process_threadworker_updates_unknown (self , tmp_path , caplog ):
790+ pool = _JobManagerWorkerThreadPool (max_workers = 2 )
791+ stats = collections .defaultdict (int )
792+
793+ pool .submit_task (DummyTask ("j-123" , df_idx = 0 , db_update = {"status" : "queued" }, stats_update = {"queued" : 1 }))
794+ pool .submit_task (DummyTask ("j-unknown" , df_idx = 4 , db_update = {"status" : "created" }, stats_update = None ))
795+
796+ df_initial = pd .DataFrame (
797+ {
798+ "id" : ["j-123" , "j-456" ],
799+ "status" : ["created" , "created" ],
800+ }
801+ )
802+ job_db = CsvJobDatabase (tmp_path / "jobs.csv" ).initialize_from_df (df_initial )
775803
776- # Assert no rows were appended
777- assert len (df_final ) == 4
804+ mgr = MultiBackendJobManager (root_dir = tmp_path / "jobs" )
778805
779- # Assert updates
780- assert df_final .loc [0 , "status" ] == "queued"
781- assert df_final .loc [1 , "status" ] == "queued"
782- assert df_final .loc [2 , "status" ] == "created"
783- assert df_final .loc [3 , "status" ] == "created"
806+ mgr ._process_threadworker_updates (worker_pool = pool , job_db = job_db , stats = stats )
784807
785- # Assert stats
786- assert stats .get ("queued" , 0 ) == 2
787- assert stats ["job_db persist" ] == 1
808+ df_final = job_db .read ()
809+ pandas .testing .assert_frame_equal (
810+ df_final [["id" , "status" ]],
811+ pandas .DataFrame (
812+ {
813+ "id" : ["j-123" , "j-456" ],
814+ "status" : ["queued" , "created" ],
815+ }
816+ ),
817+ )
818+ assert stats == dirty_equals .IsPartialDict (
819+ {
820+ "queued" : 1 ,
821+ "job_db persist" : 1 ,
822+ }
823+ )
824+ assert caplog .messages == [dirty_equals .IsStr (regex = ".*Ignoring unknown.*indices.*4.*" )]
788825
789826 def test_no_results_leaves_db_and_stats_untouched (self , tmp_path , caplog ):
790827 pool = _JobManagerWorkerThreadPool (max_workers = 2 )
0 commit comments