Merge pull request #62 from led-mirage/feature/v1.34.0

led-mirage · web-flow · commit b1fb176df152 · 2025-11-02T16:17:53.000+09:00
Feature/v1.34.0
diff --git a/Readme.en.md b/Readme.en.md
@@ -42,7 +42,7 @@ Here are the highlights of this app ✨
 * Compatible with Linux Mint (Cinnamon/x64 only, Japanese input via IBus only, printing not supported)
 * Dark mode supported
 * Custom CSS support
-* Image-based questions are now supported (experimental, OpenAI only) ✨
+* Image-based questions are now supported (experimental) ✨
 
 ## 💎 Language Support
 
@@ -69,7 +69,7 @@ The following values can be configured:
 - Raspberry Pi OS Bookworm 64bit
 - Linux Mint 22.1 Cinnamon Edition
 - Python 3.10–3.13 (development environment: 3.12.0)
-- VOICEVOX 0.22.3
+- VOICEVOX 0.25.0
 - A.I.VOICE Editor 1.4.10.0
 - COEIROINK v.2.3.4
 
@@ -363,6 +363,11 @@ License：MIT License
 Homepage：https://github.com/mhammond/pywin32  
 License：Python Software Foundation License (PSF)
 
+### 🔖 Pillow 12.0.0
+
+Homepage： https://github.com/python-pillow/Pillow  
+License：MIT-CMU license
+
 ### 🔖 MathJax 3.2.2
 
 Homepage： https://github.com/mathjax/MathJax  
diff --git a/Readme.md b/Readme.md
@@ -45,7 +45,7 @@ APIを使ってAIとチャットするアプリなのだ。
 - Linux Mint対応（Cinnamon/x64、日本語入力はIBus限定、印刷機能は非対応）
 - ダークモード対応
 - スタイルシート（CSS）をカスタマイズ可能
-- 画像を使った質問が可能（実験的機能、OpenAIのみ）✨
+- 画像を使った質問が可能（実験的機能）✨
 
 アプリの紹介と、もっとも手軽な導入方法を[Zennの記事](https://zenn.dev/ledmirage/articles/7650f36d3a784a)にしたので、そちらも参考にしてほしいのだ✨
 
@@ -60,7 +60,7 @@ Raspberry Pi、Linuxへの導入に関しても[Zennの記事](https://zenn.dev/
 - Raspberry Pi OS Bookworm 64bit
 - Linux Mint 22.1 Cinnamon Edition
 - Python 3.10-3.13（開発環境は 3.12.0）
-- VOICEVOX 0.23.0
+- VOICEVOX 0.25.0
 - A.I.VOICE Editor 1.4.10.0
 - COEIROINK v.2.3.4
 
@@ -206,7 +206,7 @@ Windowsの場合は、Windowsの検索窓で「環境変数を編集」で検索
 
 以下のリンクから ZundaGPT2.ZIP をダウンロードして、作成したフォルダに展開するのだ。
 
-https://github.com/led-mirage/ZundaGPT2/releases/tag/v1.33.0
+https://github.com/led-mirage/ZundaGPT2/releases/tag/v1.34.0
 
 #### 3. 実行
 
@@ -354,11 +354,11 @@ CSSを知らない人はなんのことかわからないかもしれないけ
 
 VirusTotalでのチェック結果はこちらなのだ。
 
-- Windows版: [71個中1個のアンチウィルスエンジンで検出 :2025/10/31 v1.33.0](https://www.virustotal.com/gui/file/e93d31b74243520d3d202a6307723e41d1fb1517d31b585409739423913ffd76/detection)
-- Raspberry Pi版: [61個中0個のアンチウィルスエンジンで検出 :2025/10/31 v1.33.0](https://www.virustotal.com/gui/file/409a69577a8c8e25ee7f0ae24a0fe0a8f1376ba3d81746dd1ada6bcde98b8ce5/detection)
-- Linux版: [63個中0個のアンチウィルスエンジンで検出 :2025/10/31 v1.33.0](https://www.virustotal.com/gui/file/cd622f4756cc4b111da71db6bfcfe156beeee662539e91f9b85e9862cb9194ef/detection)
+- Windows版: [72個中2個のアンチウィルスエンジンで検出 :2025/11/02 v1.34.0](https://www.virustotal.com/gui/file/ee47b5a5c9c70fca1c3d080bf4f63356bf86d38a5000a375e8a8db910f6e3754/detection)
+- Raspberry Pi版: [60個中0個のアンチウィルスエンジンで検出 :2025/11/02 v1.34.0](https://www.virustotal.com/gui/file/8d8cdb96624cdf83da439297b762e5ae2595486d799142ad8457cfd8071bddf3/detection)
+- Linux版: [62個中0個のアンチウィルスエンジンで検出 :2025/11/02 v1.34.0](https://www.virustotal.com/gui/file/23315c375d2badf83a31c84dfb986fc9addb250d69bb68b33d36780f0e39e2ff/detection)
 
-<img src="doc/images/virustotal_1.33.0.png" width="600">
+<img src="doc/images/virustotal_1.34.0.png" width="600">
 
 ### ⚡ ご利用について
 
@@ -440,6 +440,11 @@ VirusTotalでのチェック結果はこちらなのだ。
 ホームページ：https://github.com/mhammond/pywin32  
 ライセンス：Python Software Foundation License (PSF)
 
+### 🔖 Pillow 12.0.0
+
+ホームページ： https://github.com/python-pillow/Pillow  
+ライセンス： MIT-CMUライセンス
+
 ### 🔖 MathJax 3.2.2
 
 ホームページ： https://github.com/mathjax/MathJax  
@@ -489,6 +494,12 @@ VirusTotalでのチェック結果はこちらなのだ。
 
 ## 💎 バージョン履歴
 
+### 1.34.0 (2025/11/02)
+
+- 画像送信機能の追加（実験的機能）
+  - Claude、Geminiにも対応
+  - 画像をクリックすることで拡大表示する機能を追加
+
 ### 1.33.0 (2025/10/31)
 
 - 画像送信機能の追加（実験的機能）
diff --git a/app/chat/chat_claude.py b/app/chat/chat_claude.py
@@ -6,13 +6,15 @@
 # このソースコードは MITライセンス の下でライセンスされています。
 # ライセンスの詳細については、このプロジェクトのLICENSEファイルを参照してください。
 
+import copy
 import os
 from datetime import datetime
 
 import anthropic
 
 from .chat import Chat
 from .listener import SendMessageListener
+from utility.utils import parse_data_url, resize_base64_image
 from utility.multi_lang import get_text_resource
 
 
@@ -66,7 +68,22 @@ def send_message(
             self.stop_send_event.clear()
 
             self.messages.append({"role": "user", "content": text})
-            messages = self.get_history()
+            messages = copy.deepcopy(self.get_history())
+
+            if images and len(images) > 0:
+                messages = messages[:-1]
+                content = []
+                if text:
+                    content.append({"type": "text", "text": text})
+
+                for image in images:
+                    media_type, image_format, b64 = parse_data_url(image)
+                    b64 = resize_base64_image(b64, max_size_mb=3.0, output_format=image_format)
+                    content.append(
+                        {"type": "image", "source": {"type": "base64", "media_type": media_type, "data": b64}}
+                    )
+
+                messages.append({"role": "user", "content": content})
 
             content = ""
             sentence = ""
diff --git a/app/chat/chat_gemini.py b/app/chat/chat_gemini.py
@@ -16,6 +16,7 @@
 
 from .chat import Chat
 from .listener import SendMessageListener
+from utility.utils import parse_data_url, resize_base64_image
 from utility.multi_lang import get_text_resource
 
 
@@ -62,9 +63,25 @@ def send_message(
         try:
             self.stop_send_event.clear()
 
-            self.messages.append({"role": "user", "content": text})
+            user_parts = [{"text": text}]
+            for img_dataurl in images or []:
+                media_type, image_format, b64 = parse_data_url(img_dataurl)
+                b64 = resize_base64_image(b64, max_size_mb=15.0, output_format=image_format)
+                user_parts.append({
+                    "inline_data": {
+                        "mime_type": media_type,
+                        "data": b64
+                    }
+                })
+
             messages = copy.deepcopy(self.get_history())
             messages = self.convert_messages(messages)
+            messages.append({"role": "user", "parts": user_parts})
+            self.messages.append({"role": "user", "content": text})
+
+            #self.messages.append({"role": "user", "content": text})
+            #messages = copy.deepcopy(self.get_history())
+            #messages = self.convert_messages(messages)
 
             stream = self.client.models.generate_content_stream(
                 model=self.model,
diff --git a/app/const.py b/app/const.py
@@ -7,5 +7,5 @@
 # ライセンスの詳細については、このプロジェクトのLICENSEファイルを参照してください。
 
 APP_NAME = "ZundaGPT2"
-APP_VERSION = "1.33.0"
+APP_VERSION = "1.34.0"
 COPYRIGHT = "© 2024-2025 led-mirage"
diff --git a/app/html/js/index.js b/app/html/js/index.js
@@ -1454,12 +1454,42 @@ function addImagePreview(src) {
     container.appendChild(img);
     container.appendChild(delBtn);
     chatMessages.appendChild(container);
+    enlargeImage(img);
 
     g_pastedImages.push({ src, sent: false, element: container });
 
     scrollToBottom();
 }
 
+// 貼り付けた画像の拡大処理
+function enlargeImage(img) {
+    img.addEventListener('click', () => {
+        const modal = document.createElement('div');
+        modal.style.position = 'fixed';
+        modal.style.top = 0;
+        modal.style.left = 0;
+        modal.style.width = '100vw';
+        modal.style.height = '100vh';
+        modal.style.background = 'rgba(0,0,0,0.7)';
+        modal.style.display = 'flex';
+        modal.style.alignItems = 'center';
+        modal.style.justifyContent = 'center';
+        modal.style.zIndex = 99999;
+
+        const bigImg = document.createElement('img');
+        bigImg.src = img.src;
+        bigImg.style.maxWidth = '90%';
+        bigImg.style.maxHeight = '90%';
+        bigImg.style.borderRadius = '8px';
+        bigImg.style.boxShadow = '0 0 10px rgba(0,0,0,0.5)';
+
+        modal.appendChild(bigImg);
+        document.body.appendChild(modal);
+
+        modal.addEventListener('click', () => modal.remove());
+    });
+}
+
 // Pythonから呼び出される関数（グローバルスコープに登録）
 window.applyCustomCSS = applyCustomCSS;
 window.setChatInfo = setChatInfo;
diff --git a/app/utility/utils.py b/app/utility/utils.py
@@ -8,13 +8,17 @@
 
 import base64
 import inspect
+import io
 import mimetypes
 import os
+import re
 import sys
 import tkinter as tk
 from pathlib import Path
 from urllib.parse import urlparse
 
+from PIL import Image
+
 
 # 文字をエスケープする
 def escape_js_string(s: str):
@@ -90,3 +94,60 @@ def get_screen_size(window_handle=None) -> tuple[int, int]:
         except Exception:
             # 取得できない場合はデフォルトサイズを返す
             return 800, 600
+
+# data URL 形式の文字列を解析して (media_type, subtype, base64_data) を返す
+# 想定外の形式の場合は ('image/png', 'png', data_url) を返す
+def parse_data_url(data_url: str) -> tuple[str, str, str]:
+    if not isinstance(data_url, str):
+        return "image/png", "png", ""
+
+    match = re.match(r"^data:(.*?);base64,(.*)$", data_url)
+    if match:
+        media_type = match.group(1).strip() or "image/png"
+        b64_data = match.group(2).strip()
+
+        # 画像タイプ部分だけ抽出する
+        subtype_match = re.match(r"^image/(\w+)$", media_type)
+        subtype = subtype_match.group(1) if subtype_match else "png"
+
+        return media_type, subtype, b64_data
+    else:
+        # 想定外の場合はPNG扱い
+        return "image/png", "png", data_url.strip()
+
+# Base64エンコードされた画像データを指定サイズ以下に圧縮する
+def resize_base64_image(b64_data: str, max_size_mb: float, output_format="JPEG", quality_step=5) -> str:
+    img_bytes = base64.b64decode(b64_data)
+    image = Image.open(io.BytesIO(img_bytes))
+    size_mb = len(img_bytes) / (1024 * 1024)
+    if size_mb <= max_size_mb:
+        return b64_data
+
+    buffer = io.BytesIO()
+    if output_format.upper() == "JPEG":
+        # JPEGは画質を下げながら圧縮
+        quality = 95
+        while True:
+            buffer = io.BytesIO()
+            image.save(buffer, format="JPEG", quality=quality)
+            new_data = buffer.getvalue()
+            new_size = len(new_data) / (1024 * 1024)
+            if new_size <= max_size_mb or quality <= 10:
+                break
+            quality -= quality_step
+
+    else:
+        # PNGなどはリサイズ主体で対応
+        width, height = image.size
+        while True:
+            buffer = io.BytesIO()
+            image.save(buffer, format=output_format, optimize=True, compress_level=9)
+            new_data = buffer.getvalue()
+            new_size = len(new_data) / (1024 * 1024)
+            if new_size <= max_size_mb or (width < 100 or height < 100):
+                break
+            # サイズがまだ大きいなら10%ずつ縮小
+            width, height = int(width * 0.9), int(height * 0.9)
+            image = image.resize((width, height), Image.LANCZOS)
+
+    return base64.b64encode(buffer.getvalue()).decode("utf-8")
diff --git a/doc/images/virustotal_1.34.0.png b/doc/images/virustotal_1.34.0.png
diff --git a/requirements.txt b/requirements.txt
@@ -10,4 +10,5 @@ langdetect==1.0.9
 pyperclip==1.9.0
 pydub==0.25.1
 pywin32==306
+pillow==12.0.0
 pyinstaller_versionfile==3.0.1
diff --git a/requirements_linux.txt b/requirements_linux.txt
@@ -7,3 +7,4 @@ gtts==2.5.1
 langdetect==1.0.9
 pyperclip==1.9.0
 pydub==0.25.1
+pillow==12.0.0
diff --git a/version.yaml b/version.yaml
@@ -1,4 +1,4 @@
-Version: 1.33.0.0
+Version: 1.34.0.0
 CompanyName: led-mirage
 FileDescription: AI chat client with voice synthesis for GPT, Claude, and Gemini
 InternalName: ZundaGPT2