Skip to content

Commit e53b24e

Browse files
authored
Merge pull request #1901 from xinnan-tech/py_test_tts
Py test tts
2 parents b91f4e4 + 1a978ab commit e53b24e

File tree

9 files changed

+487
-51
lines changed

9 files changed

+487
-51
lines changed

main/xiaozhi-server/config.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,26 @@ TTS:
710710
# voice_id: female-shaonv
711711
# weight: 1
712712
# language_boost: auto
713+
714+
# MinimaxTTSHTTPStream和MinimaxTTSWebSocketStream还在测试,测试完再开放
715+
#
716+
# MinimaxTTSHTTPStream:
717+
# # Minimax流式语音合成服务
718+
# type: minimax_httpstream
719+
# output_dir: tmp/
720+
# group_id: 你的minimax平台groupID
721+
# api_key: 你的minimax平台接口密钥
722+
# model: "speech-01-turbo"
723+
# voice_id: "female-shaonv"
724+
#
725+
# MinimaxTTSWebSocketStream:
726+
# type: minimax_webSocket
727+
# output_dir: tmp/
728+
# group_id: 你的minimax平台groupID
729+
# api_key: 你的minimax平台接口密钥
730+
# model: "speech-01-turbo"
731+
# voice_id: "female-shaonv"
732+
713733
AliyunTTS:
714734
# 阿里云智能语音交互服务,需要先在阿里云平台开通服务,然后获取验证信息
715735
# 平台地址:https://nls-portal.console.aliyun.com/

main/xiaozhi-server/core/connection.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ def __init__(
132132

133133
# tts相关变量
134134
self.sentence_id = None
135+
# 处理TTS响应没有文本返回
136+
self.tts_MessageText = ""
135137

136138
# iot相关变量
137139
self.iot_descriptors = {}
@@ -182,8 +184,13 @@ async def handle_connection(self, ws):
182184
await ws.send("端口正常,如需测试连接,请使用test_page.html")
183185
await self.close(ws)
184186
return
185-
# 获取客户端ip地址
186-
self.client_ip = ws.remote_address[0]
187+
real_ip = self.headers.get("x-real-ip") or self.headers.get(
188+
"x-forwarded-for"
189+
)
190+
if real_ip:
191+
self.client_ip = real_ip.split(",")[0].strip()
192+
else:
193+
self.client_ip = ws.remote_address[0]
187194
self.logger.bind(tag=TAG).info(
188195
f"{self.client_ip} conn - Headers: {self.headers}"
189196
)
@@ -335,7 +342,7 @@ def _initialize_components(self):
335342
self.config.get("selected_module", {})
336343
)
337344
self.logger = create_connection_logger(self.selected_module_str)
338-
345+
339346
"""初始化组件"""
340347
if self.config.get("prompt") is not None:
341348
user_prompt = self.config["prompt"]
@@ -351,10 +358,10 @@ def _initialize_components(self):
351358
self.vad = self._vad
352359
if self.asr is None:
353360
self.asr = self._initialize_asr()
354-
361+
355362
# 初始化声纹识别
356363
self._initialize_voiceprint()
357-
364+
358365
# 打开语音识别通道
359366
asyncio.run_coroutine_threadsafe(
360367
self.asr.open_audio_channels(self), self.loop
@@ -790,9 +797,9 @@ def chat(self, query, tool_call=False, depth=0):
790797
if not bHasError:
791798
# 如需要大模型先处理一轮,添加相关处理后的日志情况
792799
if len(response_message) > 0:
793-
self.dialogue.put(
794-
Message(role="assistant", content="".join(response_message))
795-
)
800+
text_buff = "".join(response_message)
801+
self.tts_MessageText = text_buff
802+
self.dialogue.put(Message(role="assistant", content=text_buff))
796803
response_message.clear()
797804
self.logger.bind(tag=TAG).debug(
798805
f"function_name={function_name}, function_id={function_id}, function_arguments={function_arguments}"
@@ -814,9 +821,9 @@ def chat(self, query, tool_call=False, depth=0):
814821

815822
# 存储对话内容
816823
if len(response_message) > 0:
817-
self.dialogue.put(
818-
Message(role="assistant", content="".join(response_message))
819-
)
824+
text_buff = "".join(response_message)
825+
self.tts_MessageText = text_buff
826+
self.dialogue.put(Message(role="assistant", content=text_buff))
820827
if depth == 0:
821828
self.tts.tts_text_queue.put(
822829
TTSMessageDTO(
@@ -893,9 +900,7 @@ def _report_worker(self):
893900
if self.executor is None:
894901
continue
895902
# 提交任务到线程池
896-
self.executor.submit(
897-
self._process_report, *item
898-
)
903+
self.executor.submit(self._process_report, *item)
899904
except Exception as e:
900905
self.logger.bind(tag=TAG).error(f"聊天记录上报线程异常: {e}")
901906
except queue.Empty:

main/xiaozhi-server/core/handle/sendAudioHandle.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ async def sendAudioMessage(conn, sentenceType, audios, text):
1212
conn.logger.bind(tag=TAG).info(f"发送音频消息: {sentenceType}, {text}")
1313

1414
pre_buffer = False
15-
if conn.tts.tts_audio_first_sentence and text is not None:
15+
if conn.tts.tts_audio_first_sentence:
1616
conn.logger.bind(tag=TAG).info(f"发送第一段语音: {text}")
1717
conn.tts.tts_audio_first_sentence = False
1818
pre_buffer = True
@@ -73,7 +73,7 @@ async def send_tts_message(conn, state, text=None):
7373
"""发送 TTS 状态消息"""
7474
message = {"type": "tts", "state": state, "session_id": conn.session_id}
7575
if text is not None:
76-
message["text"] = text
76+
message["text"] = textUtils.check_emoji(text)
7777

7878
# TTS播放结束
7979
if state == "stop":

main/xiaozhi-server/core/providers/tts/aliyun_stream.py

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,6 @@ def __init__(self, config, delete_audio_file):
127127

128128
# 专属tts设置
129129
self.message_id = ""
130-
self.tts_text = ""
131-
self.text_buffer = []
132130

133131
# 创建Opus编码器
134132
self.opus_encoder = opus_encoder_utils.OpusEncoderUtils(
@@ -229,7 +227,6 @@ def tts_text_priority_thread(self):
229227

230228
# aliyunStream独有的参数生成
231229
self.message_id = str(uuid.uuid4().hex)
232-
self.text_buffer = []
233230

234231
logger.bind(tag=TAG).info("开始启动TTS会话...")
235232
future = asyncio.run_coroutine_threadsafe(
@@ -250,7 +247,6 @@ def tts_text_priority_thread(self):
250247
logger.bind(tag=TAG).debug(
251248
f"开始发送TTS文本: {message.content_detail}"
252249
)
253-
self.text_buffer.append(message.content_detail)
254250
future = asyncio.run_coroutine_threadsafe(
255251
self.text_to_speak(message.content_detail, None),
256252
loop=self.conn.loop,
@@ -275,9 +271,6 @@ def tts_text_priority_thread(self):
275271
if message.sentence_type == SentenceType.LAST:
276272
try:
277273
logger.bind(tag=TAG).info("开始结束TTS会话...")
278-
self.tts_text = textUtils.get_string_no_punctuation_or_emoji(
279-
"".join(self.text_buffer).replace("\n", "")
280-
)
281274
future = asyncio.run_coroutine_threadsafe(
282275
self.finish_session(self.conn.sentence_id),
283276
loop=self.conn.loop,
@@ -444,34 +437,35 @@ async def _start_monitor_tts_response(self):
444437
event_name = header.get("name")
445438
if event_name == "SynthesisStarted":
446439
logger.bind(tag=TAG).debug("TTS合成已启动")
447-
elif event_name == "SentenceBegin":
448-
logger.bind(tag=TAG).debug(
449-
f"句子语音生成开始: {self.tts_text}"
450-
)
451-
opus_datas_cache = []
452440
self.tts_audio_queue.put(
453-
(SentenceType.FIRST, [], self.tts_text)
441+
(SentenceType.FIRST, [], None)
454442
)
443+
elif event_name == "SentenceBegin":
444+
opus_datas_cache = []
455445
elif event_name == "SentenceEnd":
456-
logger.bind(tag=TAG).info(
457-
f"句子语音生成成功: {self.tts_text}"
458-
)
459446
if (
460447
not is_first_sentence
461448
or first_sentence_segment_count > 10
462449
):
463450
# 发送缓存的数据
464-
self.tts_audio_queue.put(
465-
(SentenceType.MIDDLE, opus_datas_cache, None)
466-
)
451+
if self.conn.tts_MessageText:
452+
logger.bind(tag=TAG).info(
453+
f"句子语音生成成功: {self.conn.tts_MessageText}"
454+
)
455+
self.tts_audio_queue.put(
456+
(SentenceType.MIDDLE, opus_datas_cache, self.conn.tts_MessageText)
457+
)
458+
self.conn.tts_MessageText = None
459+
else:
460+
self.tts_audio_queue.put(
461+
(SentenceType.MIDDLE, opus_datas_cache, None)
462+
)
467463
# 第一句话结束后,将标志设置为False
468464
is_first_sentence = False
469465
elif event_name == "SynthesisCompleted":
470466
logger.bind(tag=TAG).debug(f"会话结束~~")
471467
self._process_before_stop_play_files()
472468
session_finished = True
473-
self.reuse_judgment = time.time()
474-
self.tts_text = ""
475469
break
476470
except json.JSONDecodeError:
477471
logger.bind(tag=TAG).warning("收到无效的JSON消息")

main/xiaozhi-server/core/providers/tts/huoshan_double_stream.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,6 @@ def tts_text_priority_thread(self):
232232
loop=self.conn.loop,
233233
)
234234
future.result()
235-
self.tts_audio_first_sentence = True
236235
self.before_stop_play_files.clear()
237236
logger.bind(tag=TAG).info("TTS会话启动成功")
238237
except Exception as e:

main/xiaozhi-server/core/providers/tts/linkerai.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ def tts_text_priority_thread(self):
5252
self.processed_chars = 0
5353
self.tts_text_buff = []
5454
self.segment_count = 0
55-
self.tts_audio_first_sentence = True
5655
self.before_stop_play_files.clear()
5756
elif ContentType.TEXT == message.content_type:
5857
self.tts_text_buff.append(message.content_detail)

0 commit comments

Comments
 (0)