1010from langchain_core .documents import Document
1111from langchain_core .language_models import BaseChatModel
1212from langchain_core .messages import HumanMessage
13+ from langchain_core .prompts import BasePromptTemplate , PromptTemplate
1314
1415from langchain_community .document_loaders .base import BaseBlobParser
1516from langchain_community .document_loaders .blob_loaders import Blob
@@ -231,12 +232,14 @@ def _analyze_image(self, img: "Image") -> str:
231232 return pytesseract .image_to_string (img , lang = "+" .join (self .langs )).strip ()
232233
233234
234- _PROMPT_IMAGES_TO_DESCRIPTION = (
235- "You are an assistant tasked with summarizing "
236- "images for retrieval. "
237- "These summaries will be embedded and used to retrieve the raw image. "
238- "Give a concise summary of the image that is well optimized for retrieval "
239- "and extract all the text from the image."
235+ _PROMPT_IMAGES_TO_DESCRIPTION : BasePromptTemplate = PromptTemplate .from_template (
236+ "You are an assistant tasked with summarizing images for retrieval. "
237+ "1. These summaries will be embedded and used to retrieve the raw image. "
238+ "Give a concise summary of the image that is well optimized for retrieval\n "
239+ "2. extract all the text from the image. "
240+ "Do not exclude any content from the page.\n "
241+ "Format response in {format} format without explanatory text "
242+ "and without markdown delimiter ``` at the beginning.\n "
240243)
241244
242245
@@ -252,6 +255,8 @@ class LLMImageBlobParser(BaseImageBlobParser):
252255 pointing to (`![body)(#)`]
253256 - "html-img" = wrap the content as the `alt` text of an tag and link to
254257 (`<img alt="{body}" src="#"/>`)
258+ - "markdown" = return markdown content
259+ - "html" = return html content
255260 model (BaseChatModel):
256261 The language model to use for analysis.
257262 prompt (str):
@@ -261,9 +266,11 @@ class LLMImageBlobParser(BaseImageBlobParser):
261266 def __init__ (
262267 self ,
263268 * ,
264- format : Literal ["text" , "markdown-link" , "html-img" ] = "text" ,
269+ format : Literal [
270+ "text" , "markdown-link" , "html-img" , "markdown" , "html"
271+ ] = "text" ,
265272 model : BaseChatModel ,
266- prompt : str = _PROMPT_IMAGES_TO_DESCRIPTION ,
273+ prompt : BasePromptTemplate = _PROMPT_IMAGES_TO_DESCRIPTION ,
267274 ):
268275 """
269276 Initializes the LLMImageBlobParser.
@@ -299,7 +306,10 @@ def _analyze_image(self, img: "Image") -> str:
299306 [
300307 HumanMessage (
301308 content = [
302- {"type" : "text" , "text" : self .prompt },
309+ {
310+ "type" : "text" ,
311+ "text" : self .prompt .format (format = self .format ),
312+ },
303313 {
304314 "type" : "image_url" ,
305315 "image_url" : {
0 commit comments