Skip to content

Commit c035dc5

Browse files
committed
fix: remove empty lines from text before text-to-speech conversion
1 parent bd87004 commit c035dc5

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

apps/application/serializers/application.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from common.db.search import native_search, native_page_search
3535
from common.exception.app_exception import AppApiException
3636
from common.field.common import UploadedFileField
37-
from common.utils.common import get_file_content, restricted_loads, generate_uuid
37+
from common.utils.common import get_file_content, restricted_loads, generate_uuid, _remove_empty_lines
3838
from knowledge.models import Knowledge, KnowledgeScope
3939
from knowledge.serializers.knowledge import KnowledgeSerializer, KnowledgeModelSerializer
4040
from maxkb.conf import PROJECT_DIR
@@ -931,8 +931,9 @@ def text_to_speech(self, instance, with_valid=True):
931931
if application.tts_model_enable:
932932
model = get_model_instance_by_model_workspace_id(application.tts_model_id, application.workspace_id,
933933
**application.tts_model_params_setting)
934+
content = _remove_empty_lines(instance.get('text', ''))
934935

935-
return model.text_to_speech(instance.get('text'))
936+
return model.text_to_speech(content)
936937

937938
def play_demo_text(self, instance, with_valid=True):
938939
text = '你好,这里是语音播放测试'

apps/common/utils/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def markdown_to_plain_text(md: str) -> str:
118118
# 使用正则表达式去除所有 HTML 标签
119119
text = re.sub(r'<[^>]+>', '', text)
120120
# 先移除特定媒体标签(优先级高于通用HTML标签移除)
121-
text = re.sub(r'<(audio|video)[^>]*>.*?</\1>', '', text, flags=re.DOTALL) # 匹配音频/视频标签
121+
text = re.sub(r'<(?:audio|video)(?:\s+[^>]*)?>[\s\S]*?(?:</(?:audio|video)>)?', '', text, flags=re.IGNORECASE)
122122
text = re.sub(r'<img[^>]*>', '', text) # 匹配图片标签
123123
# 去除多余的空白字符(包括换行符、制表符等)
124124
text = re.sub(r'\s+', ' ', text)

0 commit comments

Comments
 (0)