1+ import cv2
2+ import typing
3+ import numpy as np
4+
5+ from mltu .inferenceModel import OnnxInferenceModel
6+ from mltu .utils .text_utils import ctc_decoder , get_cer
7+
8+ class ImageToWordModel (OnnxInferenceModel ):
9+ def __init__ (self , char_list : typing .Union [str , list ], * args , ** kwargs ):
10+ super ().__init__ (* args , ** kwargs )
11+ self .char_list = char_list
12+
13+ def predict (self , image : np .ndarray ):
14+ image = cv2 .resize (image , self .input_shape [:2 ][::- 1 ])
15+
16+ image_pred = np .expand_dims (image , axis = 0 ).astype (np .float32 )
17+
18+ preds = self .model .run (None , {self .input_name : image_pred })[0 ]
19+
20+ text = ctc_decoder (preds , self .char_list )[0 ]
21+
22+ return text
23+
24+
25+ if __name__ == "__main__" :
26+ import pandas as pd
27+ from tqdm import tqdm
28+ from mltu .configs import BaseModelConfigs
29+
30+ configs = BaseModelConfigs .load ("Models/1_image_to_word/202211270035/configs.yaml" )
31+
32+ model = ImageToWordModel (model_path = configs .model_path , char_list = configs .vocab )
33+
34+ df = pd .read_csv ("Models/1_image_to_word/202211270035/val.csv" ).dropna ().values .tolist ()
35+
36+ accum_cer = []
37+ for image_path , label in tqdm (df [:20 ]):
38+ image = cv2 .imread (image_path )
39+
40+ try :
41+ prediction_text = model .predict (image )
42+
43+ cer = get_cer (prediction_text , label )
44+ print (f"Image: { image_path } , Label: { label } , Prediction: { prediction_text } , CER: { cer } " )
45+
46+ # resize image by 3 times for visualization
47+ # image = cv2.resize(image, (image.shape[1] * 3, image.shape[0] * 3))
48+ # cv2.imshow(prediction_text, image)
49+ # cv2.waitKey(0)
50+ # cv2.destroyAllWindows()
51+ except :
52+ continue
53+
54+ accum_cer .append (cer )
55+
56+ print (f"Average CER: { np .average (accum_cer )} " )
0 commit comments