@@ -17,8 +17,8 @@ def test_dag_loads(self):
1717 task_ids = [task .task_id for task in DAG .tasks ]
1818 self .assertEqual (task_ids , ["run_ocrmypdf" , "move_processed_pdfs" , "success" ])
1919
20- @mock .patch ("centralized_metadata.optimize_pdf_dag.subprocess.run " )
21- def test_process_pdfs_runs_command_per_file (self , mock_run ):
20+ @mock .patch ("centralized_metadata.optimize_pdf_dag.run_and_stream " )
21+ def test_process_pdfs_runs_command_per_file (self , mock_run_and_stream ):
2222 """Touch PDFs and verify we issue an OCR command for each."""
2323 with tempfile .TemporaryDirectory () as tmp_dir :
2424 pdf_one = os .path .realpath (os .path .join (tmp_dir , "file_one.pdf" ))
@@ -45,27 +45,33 @@ def test_process_pdfs_runs_command_per_file(self, mock_run):
4545 },
4646 ],
4747 )
48- self .assertEqual (mock_run .call_count , 2 )
48+ self .assertEqual (mock_run_and_stream .call_count , 2 )
4949
5050 expected_files = [Path (pdf_one ), Path (pdf_two )]
51- for call_args , expected_file in zip (mock_run .call_args_list , expected_files ):
51+ for call_args , expected_file in zip (
52+ mock_run_and_stream .call_args_list , expected_files
53+ ):
5254 command = call_args .args [0 ]
53- self .assertEqual (command [:3 ], ["ocrmypdf" , "--optimize" , "1" ])
54- self .assertEqual (Path (command [3 ]).name , expected_file .name )
55- self .assertEqual (Path (command [4 ]).name , f"{ expected_file .stem } _opti.pdf" )
56- self .assertTrue (call_args .kwargs .get ("check" ))
57-
58- @mock .patch ("centralized_metadata.optimize_pdf_dag.subprocess.run" )
59- def test_process_pdfs_handles_empty_directory (self , mock_run ):
55+ self .assertEqual (
56+ command [:4 ], ["ocrmypdf" , "--skip-text" , "--optimize" , "1" ]
57+ )
58+ self .assertEqual (Path (command [4 ]).name , expected_file .name )
59+ self .assertEqual (
60+ Path (command [5 ]).name , f"{ expected_file .stem } _opti.pdf"
61+ )
62+ self .assertEqual (call_args .kwargs .get ("prefix" ), expected_file .name )
63+
64+ @mock .patch ("centralized_metadata.optimize_pdf_dag.run_and_stream" )
65+ def test_process_pdfs_handles_empty_directory (self , mock_run_and_stream ):
6066 """Confirm we short-circuit gracefully when no PDFs exist."""
6167 with tempfile .TemporaryDirectory () as tmp_dir :
6268 result = process_pdfs (params = {"pdf_directory" : tmp_dir })
6369
6470 self .assertEqual (result , [])
65- mock_run .assert_not_called ()
71+ mock_run_and_stream .assert_not_called ()
6672
67- @mock .patch ("centralized_metadata.optimize_pdf_dag.subprocess.run " )
68- def test_process_pdfs_prefers_dag_run_conf (self , mock_run ):
73+ @mock .patch ("centralized_metadata.optimize_pdf_dag.run_and_stream " )
74+ def test_process_pdfs_prefers_dag_run_conf (self , mock_run_and_stream ):
6975 """dag_run.conf should override params and defaults."""
7076 with tempfile .TemporaryDirectory () as tmp_dir :
7177 pdf_path = os .path .realpath (os .path .join (tmp_dir , "file.pdf" ))
@@ -89,11 +95,13 @@ def test_process_pdfs_prefers_dag_run_conf(self, mock_run):
8995 }
9096 ],
9197 )
92- mock_run .assert_called_once ()
98+ mock_run_and_stream .assert_called_once ()
9399
94100 @mock .patch ("centralized_metadata.optimize_pdf_dag.Variable.get" )
95- @mock .patch ("centralized_metadata.optimize_pdf_dag.subprocess.run" )
96- def test_process_pdfs_uses_share_root_and_relative_path (self , mock_run , mock_variable_get ):
101+ @mock .patch ("centralized_metadata.optimize_pdf_dag.run_and_stream" )
102+ def test_process_pdfs_uses_share_root_and_relative_path (
103+ self , mock_run_and_stream , mock_variable_get
104+ ):
97105 """Variable-based root + relative path should resolve to final directory."""
98106 with tempfile .TemporaryDirectory () as tmp_dir :
99107 share_root = os .path .realpath (tmp_dir )
@@ -123,8 +131,8 @@ def fake_variable_get(key, default_var=None):
123131 }
124132 ],
125133 )
126- command = mock_run .call_args .args [0 ]
127- self .assertEqual (Path (command [3 ]).resolve (), pdf_path .resolve ())
134+ command = mock_run_and_stream .call_args .args [0 ]
135+ self .assertEqual (Path (command [4 ]).resolve (), pdf_path .resolve ())
128136
129137if __name__ == "__main__" :
130138 unittest .main ()
0 commit comments