@@ -177,20 +177,11 @@ def clear_history_example(request: gr.Request):
177177 ) * 5
178178
179179
180- # TODO(Chris): At some point, we would like this to be a live-reporting feature.
181180def report_csam_image (state , image ):
182181 pass
183182
184183
185184def wrap_pdfchat_query (query , document ):
186- # TODO: Considering redesign the context format.
187- # document_context = f"""
188- # The following is the content of a document:
189- # {document}
190- # Based on this document, answer the following question:
191- # {query}
192- # """
193-
194185 reformatted_query_context = (
195186 f"Answer the user query given the context.\n "
196187 f"[QUERY CONTEXT]\n "
@@ -203,57 +194,6 @@ def wrap_pdfchat_query(query, document):
203194
204195 return reformatted_query_context
205196
206-
207- # LLAMA_PARSE_MAX_RETRY = 2
208- # LLAMAPARSE_SUPPORTED_LANGS = {
209- # "English": "en",
210- # "Chinese": "ch_sim",
211- # "Russian": "ru",
212- # "Spanish": "es",
213- # "Japanese": "ja",
214- # "Korean": "ko",
215- # "French": "fr",
216- # "German": "de",
217- # "Vietnamese": "vi",
218- # }
219-
220-
221- # def parse_pdf(file_path):
222- # from llama_parse import LlamaParse
223- # from llama_index.core.schema import ImageDocument, TextNode
224-
225- # from PIL import Image
226-
227- # parser = LlamaParse(
228- # api_key=os.getenv("LLAMA_CLOUD_API_KEY"),
229- # result_type="markdown",
230- # )
231-
232- # def get_image_nodes(json_objs: List[dict], download_path: str):
233- # image_dicts = parser.get_images(json_objs, download_path=download_path)
234- # return [ImageDocument(image_path=image_dict["path"]) for image_dict in image_dicts]
235-
236- # json_objs = parser.get_json_result(file_path)
237- # json_list = json_objs[0]["pages"]
238-
239- # text = ""
240- # for page in json_list:
241- # text += f"Page {page['page']}:\n{page['md']}\n"
242- # if (page['images']):
243- # for i, image in enumerate(page['images']):
244- # text += f"page{page['page']}_figure{i + 1}\n"
245-
246- # image_documents = get_image_nodes(json_objs, ".")
247- # images = []
248-
249- # for image_doc in image_documents:
250- # image_path = image_doc.image_path
251- # image = Image.open(image_path)
252- # images.append(image)
253-
254- # return text, images
255-
256-
257197PDFPARSE_MAX_RETRY = 2
258198PDFPARSE_SUPPORTED_LANGS = {
259199 "English" : "en" ,
@@ -336,16 +276,6 @@ def _prepare_text_with_image(state, text, images, csam_flag):
336276
337277 return text
338278
339-
340- # def _prepare_text_with_pdf(text, pdfs):
341- # if len(pdfs) > 0:
342- # document_content = parse_pdf(pdfs[0])
343- # print("Document processed")
344- # text = wrap_pdfchat_query(text, document_content)
345-
346- # return text
347-
348-
349279def _prepare_text_with_pdf (text , pdfs ):
350280 if len (pdfs ) > 0 :
351281 parsed_text , imgs = parse_pdf (pdfs [0 ])
@@ -360,7 +290,6 @@ def _prepare_text_with_pdf(text, pdfs):
360290 return text
361291
362292
363- # NOTE(chris): take multiple images later on
364293def convert_images_to_conversation_format (images ):
365294 import base64
366295
0 commit comments