1
1
from base64 import b64encode
2
+ import tempfile
2
3
3
4
import streamlit as st
5
+ from audio_recorder_streamlit import audio_recorder
6
+ from gtts import gTTS
4
7
from langchain_community .callbacks .streamlit import (
5
8
StreamlitCallbackHandler ,
6
9
)
@@ -19,8 +22,23 @@ def image_to_base64(image_bytes: bytes) -> str:
19
22
if "chat_history" not in st .session_state :
20
23
st .session_state ["chat_history" ] = []
21
24
22
- # Sidebar: ツール選択とエージェントの構築
25
+ # Sidebar: 入出力モード選択、 ツール選択とエージェントの構築
23
26
with st .sidebar :
27
+ st .subheader ("入出力モード" )
28
+
29
+ # 入出力モード選択
30
+ if "input_output_mode" not in st .session_state :
31
+ st .session_state ["input_output_mode" ] = "テキスト"
32
+
33
+ input_output_mode = st .radio (
34
+ "モードを選択してください" ,
35
+ options = ["テキスト" , "音声" ],
36
+ index = 0 if st .session_state ["input_output_mode" ] == "テキスト" else 1 ,
37
+ help = "テキスト: 従来のテキスト入力/出力, 音声: マイク入力/音声出力"
38
+ )
39
+ st .session_state ["input_output_mode" ] = input_output_mode
40
+
41
+ st .divider ()
24
42
st .subheader ("使用するツール" )
25
43
26
44
# 利用可能なツール一覧を取得
@@ -63,16 +81,54 @@ def image_to_base64(image_bytes: bytes) -> str:
63
81
else :
64
82
st .chat_message ("assistant" ).write (msg .content )
65
83
66
- if prompt := st .chat_input (
67
- accept_file = "multiple" ,
68
- file_type = [
69
- "png" ,
70
- "jpg" ,
71
- "jpeg" ,
72
- "gif" ,
73
- "webp" ,
74
- ],
75
- ):
84
+ # 入力セクション: モードに応じて分岐
85
+ prompt = None
86
+ prompt_text = ""
87
+ prompt_files = []
88
+
89
+ if input_output_mode == "音声" :
90
+ st .subheader ("🎤 音声入力" )
91
+ audio_bytes = audio_recorder (
92
+ text = "クリックして録音" ,
93
+ recording_color = "red" ,
94
+ neutral_color = "black" ,
95
+ icon_name = "microphone" ,
96
+ icon_size = "2x" ,
97
+ key = "audio_input"
98
+ )
99
+
100
+ if audio_bytes :
101
+ st .audio (audio_bytes , format = "audio/wav" )
102
+ # 音声データを一時ファイルに保存
103
+ with tempfile .NamedTemporaryFile (suffix = ".wav" , delete = False ) as temp_audio_file :
104
+ temp_audio_file .write (audio_bytes )
105
+ temp_audio_file_path = temp_audio_file .name
106
+
107
+ # TODO: 音声からテキストへの変換実装
108
+ # 現在は音声入力をプレースホルダーテキストに変換
109
+ prompt_text = "音声入力を受信しました(音声認識は後で実装予定)"
110
+ prompt = prompt_text
111
+
112
+ # 一時ファイルを削除
113
+ import os
114
+ os .unlink (temp_audio_file_path )
115
+
116
+ else :
117
+ # 既存のテキスト入力モード
118
+ if prompt := st .chat_input (
119
+ accept_file = "multiple" ,
120
+ file_type = [
121
+ "png" ,
122
+ "jpg" ,
123
+ "jpeg" ,
124
+ "gif" ,
125
+ "webp" ,
126
+ ],
127
+ ):
128
+ pass # promptは既に設定済み
129
+
130
+ # 共通の入力処理ロジック
131
+ if prompt :
76
132
user_display_items = []
77
133
message_parts = []
78
134
@@ -141,4 +197,27 @@ def image_to_base64(image_bytes: bytes) -> str:
141
197
)
142
198
last_message = response ["messages" ][- 1 ]
143
199
st .session_state ["chat_history" ].append (last_message )
144
- st .write (last_message .content )
200
+
201
+ # レスポンス表示とオーディオ出力
202
+ response_content = last_message .content
203
+ st .write (response_content )
204
+
205
+ # 音声モードの場合、音声出力を追加
206
+ if input_output_mode == "音声" :
207
+ try :
208
+ # gTTSを使って音声生成
209
+ tts = gTTS (text = response_content , lang = 'ja' )
210
+ with tempfile .NamedTemporaryFile (suffix = ".mp3" , delete = False ) as temp_audio_file :
211
+ tts .save (temp_audio_file .name )
212
+
213
+ # 音声ファイルを読み込んでstreamlit audio widgetで再生
214
+ with open (temp_audio_file .name , "rb" ) as audio_file :
215
+ audio_bytes = audio_file .read ()
216
+ st .audio (audio_bytes , format = "audio/mp3" , autoplay = True )
217
+
218
+ # 一時ファイルを削除
219
+ import os
220
+ os .unlink (temp_audio_file .name )
221
+
222
+ except Exception as e :
223
+ st .warning (f"音声出力でエラーが発生しました: { e } " )
0 commit comments