Skip to content

Commit 4530d3a

Browse files
committed
refactor: Support uploading multiple audio files
1 parent 2a88651 commit 4530d3a

File tree

7 files changed

+41
-19
lines changed

7 files changed

+41
-19
lines changed

apps/application/flow/step_node/speech_to_text_step_node/impl/base_speech_to_text_node.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from concurrent.futures import ThreadPoolExecutor
1111
from application.flow.i_step_node import NodeResult, INode
1212
from application.flow.step_node.speech_to_text_step_node.i_speech_to_text_node import ISpeechToTextNode
13-
from common.util.common import split_and_transcribe
13+
from common.util.common import split_and_transcribe, any_to_mp3
1414
from dataset.models import File
1515
from setting.models_provider.tools import get_model_instance_by_model_user_id
1616

@@ -26,16 +26,21 @@ def execute(self, stt_model_id, chat_id, audio, **kwargs) -> NodeResult:
2626
audio_list = audio
2727
self.context['audio_list'] = audio
2828

29-
3029
def process_audio_item(audio_item, model):
3130
file = QuerySet(File).filter(id=audio_item['file_id']).first()
32-
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
31+
# 根据file_name 吧文件转成mp3格式
32+
file_format = file.file_name.split('.')[-1]
33+
with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{file_format}') as temp_file:
3334
temp_file.write(file.get_byte().tobytes())
3435
temp_file_path = temp_file.name
36+
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_amr_file:
37+
temp_mp3_path = temp_amr_file.name
38+
any_to_mp3(temp_file_path, temp_mp3_path)
3539
try:
3640
return split_and_transcribe(temp_file_path, model)
3741
finally:
3842
os.remove(temp_file_path)
43+
os.remove(temp_mp3_path)
3944

4045
def process_audio_items(audio_list, model):
4146
with ThreadPoolExecutor(max_workers=5) as executor:

apps/common/util/common.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -181,18 +181,18 @@ def sil_to_wav(silk_path, wav_path, rate: int = 24000):
181181
f.write(wav_data)
182182

183183

184-
def split_and_transcribe(file_path, model, max_segment_length_ms=59000, format="mp3"):
185-
audio_data = AudioSegment.from_file(file_path, format=format)
184+
def split_and_transcribe(file_path, model, max_segment_length_ms=59000, audio_format="mp3"):
185+
audio_data = AudioSegment.from_file(file_path, format=audio_format)
186186
audio_length_ms = len(audio_data)
187187

188188
if audio_length_ms <= max_segment_length_ms:
189-
return model.speech_to_text(io.BytesIO(audio_data.export(format=format).read()))
189+
return model.speech_to_text(io.BytesIO(audio_data.export(format=audio_format).read()))
190190

191191
full_text = []
192192
for start_ms in range(0, audio_length_ms, max_segment_length_ms):
193193
end_ms = min(audio_length_ms, start_ms + max_segment_length_ms)
194194
segment = audio_data[start_ms:end_ms]
195-
text = model.speech_to_text(io.BytesIO(segment.export(format=format).read()))
195+
text = model.speech_to_text(io.BytesIO(segment.export(format=audio_format).read()))
196196
if isinstance(text, str):
197197
full_text.append(text)
198198
return ' '.join(full_text)

apps/dataset/serializers/file_serializers.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,11 @@ def get(self, with_valid=True):
7777
file = QuerySet(File).filter(id=file_id).first()
7878
if file is None:
7979
raise NotFound404(404, "不存在的文件")
80-
# 如果是mp3文件,直接返回文件流
81-
if file.file_name.split(".")[-1] == 'mp3':
82-
return HttpResponse(file.get_byte(), status=200, headers={'Content-Type': 'audio/mp3',
83-
'Content-Disposition': 'attachment; filename="abc.mp3"'})
80+
# 如果是音频文件,直接返回文件流
81+
file_type = file.file_name.split(".")[-1]
82+
if file_type in ['mp3', 'wav', 'ogg', 'aac']:
83+
return HttpResponse(file.get_byte(), status=200, headers={'Content-Type': f'audio/{file_type}',
84+
'Content-Disposition': 'attachment; filename="{}"'.format(
85+
file.file_name)})
8486
return HttpResponse(file.get_byte(), status=200,
8587
headers={'Content-Type': mime_types.get(file.file_name.split(".")[-1], 'text/plain')})

apps/setting/models_provider/impl/vllm_model_provider/credential/llm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def is_valid(self, model_type: str, model_name, model_credential: Dict[str, obje
3434
if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))):
3535
raise AppApiException(ValidCode.valid_error.value, f'{model_type} 模型类型不支持')
3636
try:
37-
model_list = provider.get_base_model_list(model_credential.get('api_base'))
37+
model_list = provider.get_base_model_list(model_credential.get('api_base'), model_credential.get('api_key'))
3838
except Exception as e:
3939
raise AppApiException(ValidCode.valid_error.value, "API 域名无效")
4040
exist = provider.get_model_info_by_name(model_list, model_name)

apps/setting/models_provider/impl/vllm_model_provider/vllm_model_provider.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,13 @@ def get_model_provide_info(self):
4545
'vllm_icon_svg')))
4646

4747
@staticmethod
48-
def get_base_model_list(api_base):
48+
def get_base_model_list(api_base, api_key):
4949
base_url = get_base_url(api_base)
5050
base_url = base_url if base_url.endswith('/v1') else (base_url + '/v1')
51-
r = requests.request(method="GET", url=f"{base_url}/models", timeout=5)
51+
headers = {}
52+
if api_key:
53+
headers['Authorization'] = f"Bearer {api_key}"
54+
r = requests.request(method="GET", url=f"{base_url}/models", headers=headers, timeout=5)
5255
r.raise_for_status()
5356
return r.json().get('data')
5457

ui/src/components/ai-chat/component/chat-input-operate/index.vue

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@
2727
class="delete-icon color-secondary"
2828
v-if="showDelete === item.url"
2929
>
30-
<el-icon><CircleCloseFilled /></el-icon>
30+
<el-icon>
31+
<CircleCloseFilled />
32+
</el-icon>
3133
</div>
3234
<img :src="getImgUrl(item && item?.name)" alt="" width="24" />
3335
<div class="ml-4 ellipsis" style="max-width: 160px" :title="item && item?.name">
@@ -48,7 +50,9 @@
4850
class="delete-icon color-secondary"
4951
v-if="showDelete === item.url"
5052
>
51-
<el-icon><CircleCloseFilled /></el-icon>
53+
<el-icon>
54+
<CircleCloseFilled />
55+
</el-icon>
5256
</div>
5357
<el-image
5458
:src="item.url"
@@ -71,7 +75,9 @@
7175
class="delete-icon color-secondary"
7276
v-if="showDelete === item.url"
7377
>
74-
<el-icon><CircleCloseFilled /></el-icon>
78+
<el-icon>
79+
<CircleCloseFilled />
80+
</el-icon>
7581
</div>
7682
<img :src="getImgUrl(item && item?.name)" alt="" width="24" />
7783
<div class="ml-4 ellipsis" style="max-width: 160px" :title="item && item?.name">
@@ -180,6 +186,7 @@ import 'recorder-core/src/engine/mp3'
180186
181187
import 'recorder-core/src/engine/mp3-engine'
182188
import { MsgWarning } from '@/utils/message'
189+
183190
const route = useRoute()
184191
const {
185192
query: { mode }
@@ -227,7 +234,7 @@ const localLoading = computed({
227234
const imageExtensions = ['jpg', 'jpeg', 'png', 'gif', 'bmp']
228235
const documentExtensions = ['pdf', 'docx', 'txt', 'xls', 'xlsx', 'md', 'html', 'csv']
229236
const videoExtensions = ['mp4', 'avi', 'mov', 'mkv', 'flv']
230-
const audioExtensions = ['mp3']
237+
const audioExtensions = ['mp3', 'wav', 'ogg', 'aac']
231238
232239
const getAcceptList = () => {
233240
const { image, document, audio, video } = props.applicationDetails.file_upload_setting
@@ -513,9 +520,11 @@ function deleteFile(index: number, val: string) {
513520
uploadAudioList.value.splice(index, 1)
514521
}
515522
}
523+
516524
function mouseenter(row: any) {
517525
showDelete.value = row.url
518526
}
527+
519528
function mouseleave() {
520529
showDelete.value = ''
521530
}
@@ -530,16 +539,19 @@ onMounted(() => {
530539
</script>
531540
<style lang="scss" scope>
532541
@import '../../index.scss';
542+
533543
.file {
534544
position: relative;
535545
overflow: inherit;
546+
536547
.delete-icon {
537548
position: absolute;
538549
right: -5px;
539550
top: -5px;
540551
z-index: 1;
541552
}
542553
}
554+
543555
.upload-tooltip-width {
544556
width: 300px;
545557
}

ui/src/workflow/nodes/base-node/component/FileUploadSettingDialog.vue

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@
8585
<div class="flex align-center">
8686
<img class="mr-12" src="@/assets/icon_file-audio.svg" alt="" />
8787
<div>
88-
<p class="line-height-22 mt-4">音频(MP3)</p>
88+
<p class="line-height-22 mt-4">音频(MP3、WAV、OGG、ACC)</p>
8989
<el-text class="color-secondary">需要使用“语音转文本”节点解析音频内容</el-text>
9090
</div>
9191
</div>

0 commit comments

Comments
 (0)