Skip to content

Commit 32c6724

Browse files
committed
add pdf maximum page limit
1 parent 0daef32 commit 32c6724

File tree

4 files changed

+36
-42
lines changed

4 files changed

+36
-42
lines changed

fastchat/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@
4040
)
4141
# Maximum conversation turns
4242
CONVERSATION_TURN_LIMIT = 50
43+
# Maximum PDF Page Limit
44+
PDF_PAGE_LIMIT = 50
45+
PDF_LIMIT_MSG = f"YOU HAVE REACHED THE MAXIMUM PDF PAGE LIMIT ({PDF_PAGE_LIMIT} PAGES). PLEASE UPLOAD A SMALLER DOCUMENT."
4346
# Session expiration time
4447
SESSION_EXPIRATION_TIME = 3600
4548
# The output dir of log files

fastchat/serve/gradio_block_arena_vision.py

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -260,46 +260,6 @@ def wrap_pdfchat_query(query, document):
260260
}
261261

262262

263-
# TODO: P1: Integrate this.
264-
def pdf_moderator(images):
265-
import base64
266-
from openai import OpenAI
267-
from io import BytesIO
268-
269-
base64_urls = []
270-
for image in images:
271-
buffer = BytesIO()
272-
image.save(buffer, format="JPEG")
273-
274-
image_bytes = buffer.getvalue()
275-
image_b64 = base64.b64encode(image_bytes).decode("utf-8")
276-
277-
# convert to openai format
278-
base64_urls.append(
279-
{
280-
"type": "image_url",
281-
"image_url": {
282-
"url": f"data:image/jpeg;base64,{image_b64}",
283-
},
284-
}
285-
)
286-
287-
# OpenAI's maximum number of images is 1 at the moment.
288-
client = OpenAI()
289-
moderations = []
290-
for url in base64_urls:
291-
try:
292-
response = client.moderations.create(
293-
model="omni-moderation-latest",
294-
input=url,
295-
)
296-
moderations.append(response[0].results.flagged)
297-
except Exception as e:
298-
print(e)
299-
300-
return all(moderations)
301-
302-
303263
def detect_language_from_doc(pdf_file_path):
304264
from pdf2image import convert_from_path
305265
from polyglot.detect import Detector

fastchat/serve/gradio_block_arena_vision_anony.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
TEXT_MODERATION_MSG,
1515
IMAGE_MODERATION_MSG,
1616
PDF_MODERATION_MSG,
17+
PDF_LIMIT_MSG,
18+
PDF_PAGE_LIMIT,
1719
MODERATION_MSG,
1820
CONVERSATION_LIMIT_MSG,
1921
SLOW_MODEL_MSG,
@@ -78,6 +80,7 @@
7880
build_logger,
7981
moderation_filter,
8082
image_moderation_filter,
83+
get_pdf_num_page,
8184
upload_pdf_file_to_gcs,
8285
hash_pdf,
8386
)
@@ -339,6 +342,25 @@ def add_text(
339342
+ [""]
340343
)
341344

345+
if len(pdfs) > 0 and get_pdf_num_page(pdfs[0]) > PDF_PAGE_LIMIT:
346+
logger.info(f"pdf page limit exceeded. ip: {ip}. text: {text}")
347+
for i in range(num_sides):
348+
states[i].skip_next = True
349+
return (
350+
states
351+
+ [x.to_gradio_chatbot() for x in states]
352+
+ [
353+
{
354+
"text": PDF_LIMIT_MSG
355+
+ " PLEASE CLICK 🎲 NEW ROUND TO START A NEW CONVERSATION."
356+
},
357+
"",
358+
no_change_btn,
359+
]
360+
+ [no_change_btn] * 7
361+
+ [""]
362+
)
363+
342364
model_list = [states[i].model_name for i in range(num_sides)]
343365

344366
images = convert_images_to_conversation_format(images)
@@ -356,11 +378,12 @@ def add_text(
356378
return (
357379
states
358380
+ [x.to_gradio_chatbot() for x in states]
359-
+ [{"text": CONVERSATION_LIMIT_MSG}, "", no_change_btn]
360381
+ [
382+
{"text": CONVERSATION_LIMIT_MSG},
383+
"",
361384
no_change_btn,
362385
]
363-
* 7
386+
+ [no_change_btn] * 7
364387
+ [""]
365388
)
366389

fastchat/utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,14 @@ def hash_pdf(file_path):
474474
return hashlib.md5(file_content).hexdigest()
475475

476476

477+
def get_pdf_num_page(file_path):
478+
from pypdf import PdfReader
479+
480+
reader = PdfReader(file_path)
481+
482+
return len(reader.pages)
483+
484+
477485
def image_moderation_request(image_bytes, endpoint, api_key):
478486
headers = {"Content-Type": "image/jpeg", "Ocp-Apim-Subscription-Key": api_key}
479487

0 commit comments

Comments
 (0)