Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 24 additions & 25 deletions modules/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
import re
import shutil
import time
import mimetypes
from datetime import datetime
from functools import partial
from pathlib import Path

import filetype
import gradio as gr
import yaml
from jinja2.ext import loopcontrols
Expand All @@ -25,7 +27,7 @@
convert_to_markdown,
make_thumbnail
)
from modules.image_utils import open_image_safely
from modules.image_utils import is_mime_type_vision_supported, open_image_safely
from modules.logging_colors import logger
from modules.text_generation import (
generate_reply,
Expand Down Expand Up @@ -239,7 +241,7 @@ def generate_chat_prompt(user_input, state, **kwargs):
image_refs = ""

for attachment in metadata[user_key]["attachments"]:
if attachment.get("type") == "image":
if is_mime_type_vision_supported(attachment.get("type")):
# Add image reference for multimodal models
image_refs += "<__media__>"
elif state.get('include_past_attachments', True):
Expand Down Expand Up @@ -280,7 +282,7 @@ def generate_chat_prompt(user_input, state, **kwargs):
image_refs = ""

for attachment in metadata[user_key]["attachments"]:
if attachment.get("type") == "image":
if is_mime_type_vision_supported(attachment.get("type")):
image_refs += "<__media__>"
else:
filename = attachment.get("name", "file")
Expand Down Expand Up @@ -590,51 +592,48 @@ def add_message_attachment(history, row_idx, file_path, is_user=True):
# Get file info using pathlib
path = Path(file_path)
filename = path.name
file_extension = path.suffix.lower()

# Get MIME type from path
mime_type: str | None
mime_type, _ = mimetypes.guess_file_type(path)

# Get MIME type from file
if mime_type is None:
mime_type = filetype.guess_mime(path)

try:
# Handle image files
if file_extension in ['.jpg', '.jpeg', '.png', '.webp', '.bmp', '.gif']:
if is_mime_type_vision_supported(mime_type):
# Handle image files
# Convert image to base64
with open(path, 'rb') as f:
image_data = base64.b64encode(f.read()).decode('utf-8')

# Determine MIME type from extension
mime_type_map = {
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.webp': 'image/webp',
'.bmp': 'image/bmp',
'.gif': 'image/gif'
}
mime_type = mime_type_map.get(file_extension, 'image/jpeg')

# Format as data URL
data_url = f"data:{mime_type};base64,{image_data}"

# Generate unique image ID
image_id = len([att for att in history['metadata'][key]["attachments"] if att.get("type") == "image"]) + 1
image_id = len([att for att in history['metadata'][key]["attachments"] if is_mime_type_vision_supported(att.get("type"))]) + 1

attachment = {
"name": filename,
"type": "image",
"type": mime_type,
"image_data": data_url,
"image_id": image_id,
}
elif file_extension == '.pdf':
elif mime_type == 'application/pdf':
# Process PDF file
content = extract_pdf_text(path)
attachment = {
"name": filename,
"type": "application/pdf",
"type": mime_type,
"content": content,
}
elif file_extension == '.docx':
elif mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
# Process .docx file
content = extract_docx_text(path)
attachment = {
"name": filename,
"type": "application/docx",
"type": mime_type,
"content": content,
}
else:
Expand All @@ -644,7 +643,7 @@ def add_message_attachment(history, row_idx, file_path, is_user=True):

attachment = {
"name": filename,
"type": "text/plain",
"type": mime_type or "text/plain",
"content": content,
}

Expand Down Expand Up @@ -858,7 +857,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
user_key = f"user_{i}"
if user_key in output['metadata'] and "attachments" in output['metadata'][user_key]:
for attachment in output['metadata'][user_key]["attachments"]:
if attachment.get("type") == "image":
if is_mime_type_vision_supported(attachment.get("type")):
all_image_attachments.append(attachment)

# Add all collected image attachments to state for the generation
Expand Down
6 changes: 4 additions & 2 deletions modules/html_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from PIL import Image, ImageOps

from modules import shared
from modules.image_utils import is_mime_type_vision_supported
from modules.sane_markdown_lists import SaneListExtension
from modules.utils import get_available_chat_styles

Expand Down Expand Up @@ -462,12 +463,13 @@ def format_message_attachments(history, role, index):
attachments_html = '<div class="message-attachments">'
for attachment in attachments:
name = html.escape(attachment["name"])
mime_type = attachment.get("type")

if attachment.get("type") == "image":
if is_mime_type_vision_supported(mime_type):
image_data = attachment.get("image_data", "")
attachments_html += (
f'<div class="attachment-box image-attachment">'
f'<img src="{image_data}" alt="{name}" class="image-preview" />'
f'<img src="{image_data}" alt="{name}" type="{mime_type}" class="image-preview" />'
f'<div class="attachment-name">{name}</div>'
f'</div>'
)
Expand Down
41 changes: 40 additions & 1 deletion modules/image_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def convert_image_attachments_to_pil(image_attachments: List[dict]) -> List[Imag
"""Convert webui image_attachments format to PIL Images."""
pil_images = []
for attachment in image_attachments:
if attachment.get('type') == 'image' and 'image_data' in attachment:
if is_mime_type_vision_supported(attachment.get('type')) and 'image_data' in attachment:
try:
image = decode_base64_image(attachment['image_data'])
if image.mode != 'RGB':
Expand All @@ -116,3 +116,42 @@ def convert_openai_messages_to_images(messages: List[dict]) -> List[Image.Image]
_, images = process_message_content(message['content'])
all_images.extend(images)
return all_images


def is_mime_type_vision_supported(mime_type: str) -> bool:
return mime_type in {
'image/jpeg',
'image/png',
'image/webp',
'image/bmp',
'image/gif',
'image/tiff',
'image/avif',
# Uncommon pillow readable mime types
'image/x-dds',
'image/dds',
'image/x-eps',
'image/x-icns',
'image/x-icon',
'vnd.microsoft.icon',
'image/jp2',
'image/x-jp2-codestream',
'image/jpx',
'image/vnd.zbrush.pcx',
'image/x-portable-pixmap',
'image/qoi',
'image/x-qoi',
'image/x-sgi',
'image/x-tga',
'image/x-xbitmap',
'image/x-win-bitmap',
'image/fits',
'image/vnd.fpx',
'image/x-fpx',
'image/x-photo-cd',
'image/vnd.adobe.photoshop',
'image/x-sun-raster',
'image/emf',
'image/wmf',
'image/x-xpixmap',
}
1 change: 1 addition & 0 deletions requirements/full/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
filetype==1.2.0
flash-linear-attention==0.4.*
html2text==2025.4.15
huggingface-hub==0.36.0
Expand Down
1 change: 1 addition & 0 deletions requirements/full/requirements_amd.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/full/requirements_amd_noavx2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/full/requirements_apple_intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/full/requirements_apple_silicon.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/full/requirements_cpu_only.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/full/requirements_cpu_only_noavx2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/full/requirements_noavx2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
filetype==1.2.0
flash-linear-attention==0.4.*
html2text==2025.4.15
huggingface-hub==0.36.0
Expand Down
1 change: 1 addition & 0 deletions requirements/full/requirements_nowheels.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ datasets
diffusers==0.36.*
einops
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/portable/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/portable/requirements_amd.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/portable/requirements_amd_noavx2.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/portable/requirements_apple_intel.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/portable/requirements_apple_silicon.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/portable/requirements_cpu_only.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/portable/requirements_cpu_only_noavx2.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/portable/requirements_noavx2.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/portable/requirements_nowheels.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/portable/requirements_vulkan.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down
1 change: 1 addition & 0 deletions requirements/portable/requirements_vulkan_noavx2.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
filetype==1.2.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
Expand Down