@@ -50,13 +50,15 @@ def __init__(
5050 self .format = format
5151
5252 @abstractmethod
53- def _analyze_image (self , img : "Image" ) -> str :
53+ def _analyze_image (self , img : "Image" , format : str ) -> str :
5454 """
5555 Abstract method to analyze an image and extract textual content.
5656
5757 Args:
5858 img (Image):
5959 The image to be analyzed.
60+ format (str):
61+ The format to use if it's possible
6062
6163 Returns:
6264 str:
@@ -84,7 +86,12 @@ def lazy_parse(self, blob: Blob) -> Iterator[Document]:
8486 img = Img .fromarray (numpy .load (buf ))
8587 else :
8688 img = Img .open (buf )
87- content = self ._analyze_image (img )
89+ format = (
90+ "text"
91+ if self .format in ("markdown-link" , "html-img" )
92+ else self .format
93+ )
94+ content = self ._analyze_image (img , format )
8895 if content :
8996 source = blob .source or "#"
9097 if self .format == "markdown-link" :
@@ -143,13 +150,15 @@ def __init__(
143150 super ().__init__ (format = format )
144151 self .ocr = None
145152
146- def _analyze_image (self , img : "Image" ) -> str :
153+ def _analyze_image (self , img : "Image" , format : str ) -> str :
147154 """
148155 Analyzes an image and extracts text using RapidOCR.
149156
150157 Args:
151158 img (Image):
152159 The image to be analyzed.
160+ format (str):
161+ The format to use if it's possible
153162
154163 Returns:
155164 str:
@@ -211,13 +220,15 @@ def __init__(
211220 super ().__init__ (format = format )
212221 self .langs = list (langs )
213222
214- def _analyze_image (self , img : "Image" ) -> str :
223+ def _analyze_image (self , img : "Image" , format : str ) -> str :
215224 """
216225 Analyzes an image and extracts text using Tesseract OCR.
217226
218227 Args:
219228 img (Image):
220229 The image to be analyzed.
230+ format (str):
231+ The format to use if it's possible
221232
222233 Returns:
223234 str: The extracted text content.
@@ -287,7 +298,7 @@ def __init__(
287298 self .model = model
288299 self .prompt = prompt
289300
290- def _analyze_image (self , img : "Image" ) -> str :
301+ def _analyze_image (self , img : "Image" , format : str ) -> str :
291302 """
292303 Analyzes an image using the provided language model.
293304
@@ -308,7 +319,7 @@ def _analyze_image(self, img: "Image") -> str:
308319 content = [
309320 {
310321 "type" : "text" ,
311- "text" : self .prompt .format (format = self . format ),
322+ "text" : self .prompt .format (format = format ),
312323 },
313324 {
314325 "type" : "image_url" ,
0 commit comments