@@ -15,30 +15,44 @@ def test_dag_loads(self):
1515 """Ensure the DAG is registered with the expected ID and task."""
1616 self .assertEqual (DAG .dag_id , "ocrmypdf_batch" )
1717 task_ids = [task .task_id for task in DAG .tasks ]
18- self .assertEqual (task_ids , ["run_ocrmypdf" ])
18+ self .assertEqual (task_ids , ["run_ocrmypdf" , "move_processed_pdfs" ])
1919
2020 @mock .patch ("centralized_metadata.ocrmypdf_processing_dag.subprocess.run" )
2121 def test_process_pdfs_runs_command_per_file (self , mock_run ):
2222 """Touch PDFs and verify we issue an OCR command for each."""
2323 with tempfile .TemporaryDirectory () as tmp_dir :
24- pdf_one = os .path .join (tmp_dir , "file_one.pdf" )
25- pdf_two = os .path .join (tmp_dir , "file_two.pdf" )
24+ pdf_one = os .path .realpath ( os . path . join (tmp_dir , "file_one.pdf" ) )
25+ pdf_two = os .path .realpath ( os . path . join (tmp_dir , "file_two.pdf" ) )
2626 open (pdf_one , "wb" ).close ()
2727 open (pdf_two , "wb" ).close ()
2828
2929 result = process_pdfs (params = {"pdf_directory" : tmp_dir })
3030
31- self .assertEqual (result , "processed-2-pdfs" )
31+ self .assertEqual (
32+ result ,
33+ [
34+ {
35+ "original" : pdf_one ,
36+ "optimized" : os .path .realpath (
37+ os .path .join (tmp_dir , "file_one_opti.pdf" )
38+ ),
39+ },
40+ {
41+ "original" : pdf_two ,
42+ "optimized" : os .path .realpath (
43+ os .path .join (tmp_dir , "file_two_opti.pdf" )
44+ ),
45+ },
46+ ],
47+ )
3248 self .assertEqual (mock_run .call_count , 2 )
3349
3450 expected_files = [Path (pdf_one ), Path (pdf_two )]
3551 for call_args , expected_file in zip (mock_run .call_args_list , expected_files ):
3652 command = call_args .args [0 ]
3753 self .assertEqual (command [:3 ], ["ocrmypdf" , "--optimize" , "1" ])
3854 self .assertEqual (Path (command [3 ]).name , expected_file .name )
39- self .assertEqual (
40- Path (command [4 ]).name , f"{ expected_file .stem } _ocr.pdf"
41- )
55+ self .assertEqual (Path (command [4 ]).name , f"{ expected_file .stem } _opti.pdf" )
4256 self .assertTrue (call_args .kwargs .get ("check" ))
4357
4458 @mock .patch ("centralized_metadata.ocrmypdf_processing_dag.subprocess.run" )
@@ -47,30 +61,42 @@ def test_process_pdfs_handles_empty_directory(self, mock_run):
4761 with tempfile .TemporaryDirectory () as tmp_dir :
4862 result = process_pdfs (params = {"pdf_directory" : tmp_dir })
4963
50- self .assertEqual (result , "no-pdfs-found" )
64+ self .assertEqual (result , [] )
5165 mock_run .assert_not_called ()
5266
5367 @mock .patch ("centralized_metadata.ocrmypdf_processing_dag.subprocess.run" )
5468 def test_process_pdfs_prefers_dag_run_conf (self , mock_run ):
5569 """dag_run.conf should override params and defaults."""
5670 with tempfile .TemporaryDirectory () as tmp_dir :
57- pdf_path = os .path .join (tmp_dir , "file.pdf" )
71+ pdf_path = os .path .realpath ( os . path . join (tmp_dir , "file.pdf" ) )
5872 open (pdf_path , "wb" ).close ()
5973
6074 dag_run_mock = mock .Mock ()
6175 dag_run_mock .conf = {"pdf_directory" : tmp_dir }
6276
63- result = process_pdfs (dag_run = dag_run_mock , params = {"pdf_directory" : "/unused" })
64-
65- self .assertEqual (result , "processed-1-pdfs" )
77+ result = process_pdfs (
78+ dag_run = dag_run_mock , params = {"pdf_directory" : "/unused" }
79+ )
80+
81+ self .assertEqual (
82+ result ,
83+ [
84+ {
85+ "original" : pdf_path ,
86+ "optimized" : os .path .realpath (
87+ os .path .join (tmp_dir , "file_opti.pdf" )
88+ ),
89+ }
90+ ],
91+ )
6692 mock_run .assert_called_once ()
6793
6894 @mock .patch ("centralized_metadata.ocrmypdf_processing_dag.Variable.get" )
6995 @mock .patch ("centralized_metadata.ocrmypdf_processing_dag.subprocess.run" )
7096 def test_process_pdfs_uses_share_root_and_relative_path (self , mock_run , mock_variable_get ):
7197 """Variable-based root + relative path should resolve to final directory."""
7298 with tempfile .TemporaryDirectory () as tmp_dir :
73- share_root = tmp_dir
99+ share_root = os . path . realpath ( tmp_dir )
74100 relative_path = "incoming"
75101 target_dir = Path (share_root ) / relative_path
76102 target_dir .mkdir ()
@@ -88,7 +114,15 @@ def fake_variable_get(key, default_var=None):
88114
89115 result = process_pdfs ()
90116
91- self .assertEqual (result , "processed-1-pdfs" )
117+ self .assertEqual (
118+ result ,
119+ [
120+ {
121+ "original" : str (pdf_path .resolve ()),
122+ "optimized" : str ((target_dir / "variable_opti.pdf" ).resolve ()),
123+ }
124+ ],
125+ )
92126 command = mock_run .call_args .args [0 ]
93127 self .assertEqual (Path (command [3 ]).resolve (), pdf_path .resolve ())
94128
0 commit comments