-
QuestionRecently, I have been trying to use Nicegui to create a speech recognition project based on Whisper, using a modular approach to building pages. However, I have encountered some errors while executing speech extraction and recognition functions, which is confusing. @ui.page('/')
def index():
with ui.header(bordered=True, elevated=True):
......
with ui.tab_panels(tabs, value='视频识别').classes('w-full'):
with ui.tab_panel(tab_video):
video_page.video_page() video_page.py @ui.page('/')
def video_page():
......
with ui.card().classes('w-full'):
ui.button('开始识别', on_click=lambda: start_button_click()).classes('w-full')
......
async def start_button_click():
audio_path = await run.io_bound(extract_audio)
if audio_path:
task_transcribe = await run.io_bound(start_transcribe, audio_path)
print(f'识别结果:{task_transcribe}')
async def extract_audio():
current_path = os.getcwd()
ffmpeg_path = os.path.join(current_path, 'ffmpeg\\bin\\ffmpeg.exe')
output_audio = os.path.join(current_path, 'audio')
video_name = os.path.splitext(os.path.basename(upload_video_path))[0]
output_audio_path = os.path.join(output_audio, f"{video_name}.mp3")
command = [
ffmpeg_path,
'-i', upload_video_path,
'-q:a', '0',
'-map', 'a',
output_audio_path
]
try:
process = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, encoding='utf-8',
check=True)
for line in process.stdout.splitlines():
print(line)
current_path = os.getcwd()
toml_file = os.path.join(current_path, 'config/upload_config.toml')
upload_config = toml.load(toml_file)
section = 'upload'
upload_config[section]['audio_save_path'] = output_audio_path
with open(toml_file, 'w', encoding='utf-8') as f:
toml.dump(upload_config, f)
return output_audio_path
except subprocess.CalledProcessError as e:
return None
async def start_transcribe(audio_path):
model = faster_whisper.WhisperModel(model_size_or_path=whisper_local_model_path,
device=whisper_local_model_gpu,
local_files_only=True)
segments, info = model.transcribe(audio=audio_path,
language=whisper_local_model_language,
task=whisper_local_model_task,
vad_filter=whisper_local_model_vad,
vad_parameters=dict(min_silence_duration_ms=whisper_local_model_min_vad),
initial_prompt=whisper_local_model_prompt,
chunk_length=whisper_local_model_chunk_length,
temperature=whisper_local_model_temp,
without_timestamps=whisper_local_model_without_timestamps,
word_timestamps=whisper_local_model_word_timestamps,
beam_size=whisper_local_model_beam_size)
for i, segment in enumerate(segments):
start_time, end_time, text = segment['start'], segment['end'], segment['text']
start_srt = f"{int(start_time // 3600):02}:{int((start_time % 3600) // 60):02}:{int(start_time % 60):02},{int((start_time * 1000) % 1000):03}"
end_srt = f"{int(end_time // 3600):02}:{int((end_time % 3600) // 60):02}:{int(end_time % 60):02},{int((end_time * 1000) % 1000):03}"
print(f"{i + 1}\n{start_srt} --> {end_srt}\n{text}\n\n") The error message encountered is as follows:
After reading the documentation of nickeGUI, I still don't understand how to use io-bound. |
Beta Was this translation helpful? Give feedback.
Replies: 3 comments 10 replies
-
Hi @mxiaonian, Instead of await run.io_bound(extract_audio) you should write await run.io_bound(extract_audio()) |
Beta Was this translation helpful? Give feedback.
-
If you are interested in reproducing this problem, you can use the following minimal code # -*- coding: utf-8 -*-
import os
import faster_whisper
from nicegui import run, ui
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # 修复OMP
def start_transcribe():
audio_path = r'your_audio_path'
model = faster_whisper.WhisperModel(model_size_or_path=whisper_local_model_path,
device="cuda",
local_files_only=True)
segments, transcription_info = model.transcribe(audio=audio_path,
language='en',
task="transcribe",
vad_filter=True,
vad_parameters=dict(
min_silence_duration_ms=500),
initial_prompt="Prompt",
temperature=0.8, # problem code
chunk_length=5, # problem code
without_timestamps=True,
word_timestamps=False,
beam_size=5)
for segment in segments:
print(f"Return information:{segment}")
async def start_button_click():
await run.io_bound(start_transcribe)
ui.button("start", on_click=start_button_click)
ui.run() |
Beta Was this translation helpful? Give feedback.
-
Summarize the knowledge learned. from nicegui import run, ui
async def start():
await run.io_bound(long_term_calculation_1)
await run.io_bound(long_term_calculation_2)
await run.io_bound(long_term_calculation_3)
......
def long_term_calculation_1():
'''your_code'''
......
def long_term_calculation_2():
'''your_code'''
......
def long_term_calculation_3():
'''your_code'''
...... When executing the start function, the program waits for each step to be completed before proceeding to the next step |
Beta Was this translation helpful? Give feedback.
Summarize the knowledge learned.
Starting multiple logics that require a long computation time can be written as follows: the starting entry needs to be asynchronous, while the computation logic does not require it.
for example:
When executing the start function, the program waits for each step to be complete…