Skip to content

Commit f8bcb01

Browse files
committed
move codes to tts module
1 parent 90f1045 commit f8bcb01

File tree

3 files changed

+64
-54
lines changed

3 files changed

+64
-54
lines changed

template_langgraph/services/streamlits/pages/chat_with_tools_agent.py

Lines changed: 1 addition & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import io
21
import os
32
import tempfile
43
from base64 import b64encode
@@ -7,17 +6,15 @@
76
import streamlit as st
87
import whisper
98
from audio_recorder_streamlit import audio_recorder
10-
from gtts import gTTS
119
from langchain_community.callbacks.streamlit import (
1210
StreamlitCallbackHandler,
1311
)
14-
from pydub import AudioSegment
15-
from pydub.effects import speedup
1612

1713
from template_langgraph.agents.chat_with_tools_agent.agent import (
1814
AgentState,
1915
ChatWithToolsAgent,
2016
)
17+
from template_langgraph.speeches.tts import synthesize_audio
2118
from template_langgraph.tools.common import get_default_tools
2219

2320

@@ -32,56 +29,6 @@ def load_whisper_model(model_size: str = "base"):
3229
return whisper.load_model(model_size)
3330

3431

35-
def synthesize_audio(
36-
text: str,
37-
language: str = "ja",
38-
speed: float = 1.0,
39-
pitch_shift: int = 0,
40-
volume_db: float = 0.0,
41-
) -> bytes | None:
42-
"""Convert text to speech audio using gTTS and pydub adjustments."""
43-
44-
if not text.strip():
45-
return None
46-
47-
try:
48-
tts = gTTS(text=text, lang=language)
49-
mp3_buffer = io.BytesIO()
50-
tts.write_to_fp(mp3_buffer)
51-
mp3_buffer.seek(0)
52-
53-
audio_segment = AudioSegment.from_file(mp3_buffer, format="mp3")
54-
original_rate = audio_segment.frame_rate
55-
56-
if pitch_shift != 0:
57-
semitone_ratio = 2.0 ** (pitch_shift / 12.0)
58-
shifted = audio_segment._spawn(
59-
audio_segment.raw_data,
60-
overrides={"frame_rate": int(original_rate * semitone_ratio)},
61-
)
62-
audio_segment = shifted.set_frame_rate(original_rate)
63-
64-
if speed != 1.0:
65-
if speed > 1.0:
66-
audio_segment = speedup(audio_segment, playback_speed=float(speed))
67-
else:
68-
slowed_rate = max(int(original_rate * float(speed)), 1)
69-
audio_segment = audio_segment._spawn(
70-
audio_segment.raw_data,
71-
overrides={"frame_rate": slowed_rate},
72-
).set_frame_rate(original_rate)
73-
74-
if volume_db != 0:
75-
audio_segment += float(volume_db)
76-
77-
output_buffer = io.BytesIO()
78-
audio_segment.export(output_buffer, format="mp3")
79-
return output_buffer.getvalue()
80-
except Exception as exc: # pragma: no cover
81-
st.error(f"音声合成に失敗しました: {exc}")
82-
return None
83-
84-
8532
if "chat_history" not in st.session_state:
8633
st.session_state["chat_history"] = []
8734

template_langgraph/speeches/__init__.py

Whitespace-only changes.

template_langgraph/speeches/tts.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import io
2+
import logging
3+
4+
from gtts import gTTS
5+
from pydub import AudioSegment
6+
from pydub.effects import speedup
7+
8+
from template_langgraph.loggers import get_logger
9+
10+
logger = get_logger(
11+
name=__name__,
12+
verbosity=logging.DEBUG,
13+
)
14+
15+
16+
def synthesize_audio(
17+
text: str,
18+
language: str = "ja",
19+
speed: float = 1.0,
20+
pitch_shift: int = 0,
21+
volume_db: float = 0.0,
22+
) -> bytes | None:
23+
"""Convert text to speech audio using gTTS and pydub adjustments."""
24+
25+
if not text.strip():
26+
return None
27+
28+
try:
29+
tts = gTTS(text=text, lang=language)
30+
mp3_buffer = io.BytesIO()
31+
tts.write_to_fp(mp3_buffer)
32+
mp3_buffer.seek(0)
33+
34+
audio_segment = AudioSegment.from_file(mp3_buffer, format="mp3")
35+
original_rate = audio_segment.frame_rate
36+
37+
if pitch_shift != 0:
38+
semitone_ratio = 2.0 ** (pitch_shift / 12.0)
39+
shifted = audio_segment._spawn(
40+
audio_segment.raw_data,
41+
overrides={"frame_rate": int(original_rate * semitone_ratio)},
42+
)
43+
audio_segment = shifted.set_frame_rate(original_rate)
44+
45+
if speed != 1.0:
46+
if speed > 1.0:
47+
audio_segment = speedup(audio_segment, playback_speed=float(speed))
48+
else:
49+
slowed_rate = max(int(original_rate * float(speed)), 1)
50+
audio_segment = audio_segment._spawn(
51+
audio_segment.raw_data,
52+
overrides={"frame_rate": slowed_rate},
53+
).set_frame_rate(original_rate)
54+
55+
if volume_db != 0:
56+
audio_segment += float(volume_db)
57+
58+
output_buffer = io.BytesIO()
59+
audio_segment.export(output_buffer, format="mp3")
60+
return output_buffer.getvalue()
61+
except Exception as e: # pragma: no cover
62+
logger.error(f"Error in synthesize_audio: {e}")
63+
return None

0 commit comments

Comments
 (0)