88from pyonb .analysis .metrics import cer , ned , wer
99
1010
11- def read_file (file_path : Path ) -> str | dict :
11+ def read_file (file_path : Path , file_encoding : str | None = None ) -> str | dict :
1212 """Read .txt or .json file."""
13- with Path .open (file_path , "r" ) as f :
13+ with Path .open (file_path , "r" , encoding = file_encoding ) as f :
1414 file_type = file_path .suffix .lower ()
1515
1616 if file_type == ".json" :
@@ -31,14 +31,10 @@ def evaluate_metrics(gt_text: str, ocr_text: str) -> dict:
3131 return {"cer" : cer_result , "wer" : wer_result , "ned" : ned_result }
3232
3333
34- if __name__ == "__main__" :
35- parser = argparse .ArgumentParser (description = "Run and evaluate OCR performance metrics." )
36- parser .add_argument ("-gt" , "--ground_truth_file" , type = str , required = True , help = "[.txt] Path to ground truth file." )
37- parser .add_argument ("-ocr" , "--ocr_file" , type = str , required = True , help = "[.json/.txt] Path to OCR processed file." )
38- args = parser .parse_args ()
39-
40- gt_file_output = read_file (Path (args .ground_truth_file ))
41- ocr_file_output = read_file (Path (args .ocr_file ))
34+ def run (gt_path : Path , ocr_path : Path ) -> dict :
35+ """Run OCR evaluation given ground truth and OCR file paths."""
36+ gt_file_output = read_file (gt_path )
37+ ocr_file_output = read_file (ocr_path )
4238
4339 if isinstance (ocr_file_output , str ):
4440 result = evaluate_metrics (str (gt_file_output ), str (ocr_file_output ))
@@ -49,4 +45,14 @@ def evaluate_metrics(gt_text: str, ocr_text: str) -> dict:
4945 msg = "OCR file is not .txt or .json."
5046 raise TypeError (msg )
5147
52- print (f"OCR Evaluation results:\n { result } " ) # noqa: T201
48+ return result
49+
50+
51+ if __name__ == "__main__" :
52+ parser = argparse .ArgumentParser (description = "Run and evaluate OCR performance metrics." )
53+ parser .add_argument ("-gt" , "--ground_truth_file" , type = str , required = True , help = "[.txt] Path to ground truth file." )
54+ parser .add_argument ("-ocr" , "--ocr_file" , type = str , required = True , help = "[.json/.txt] Path to OCR processed file." )
55+ args = parser .parse_args ()
56+
57+ results = run (Path (args .ground_truth_file ), Path (args .ocr_file ))
58+ print (f"OCR Evaluation results:\n { results } " ) # noqa: T201
0 commit comments