@@ -435,39 +435,36 @@ deploymentSpec:
435
435
\ continue\n\n # Check if file is already WAV\n \
436
436
\ if audio_file.suffix.lower() == \" .wav\" :\n processed_audio_files.append(audio_file)\n \
437
437
\ print(f\" Using WAV file directly: {audio_file.name}\" )\n \
438
- \ else:\n # Convert non-WAV files to WAV format\
439
- \ using ffmpeg\n print(f\" Converting {audio_file.name} to\
440
- \ WAV format...\" )\n import tempfile\n\n with\
441
- \ tempfile.NamedTemporaryFile(\n suffix=f\" _{audio_file.stem}.wav\" \
442
- , delete=False\n ) as tmp:\n temp_wav\
443
- \ = pathlib.Path(tmp.name)\n\n try:\n \
444
- \ # Use ffmpeg to convert to WAV format\n subprocess.run(\n \
445
- \ [\n \" ffmpeg\" ,\n \
446
- \ \" -i\" ,\n str(audio_file),\n \
447
- \ \" -ar\" ,\n \" 16000\" \
448
- , # 16kHz sample rate (good for whisper)\n \" \
449
- -ac\" ,\n \" 1\" , # mono channel\n \
450
- \ \" -c:a\" ,\n \" pcm_s16le\" ,\
451
- \ # 16-bit PCM\n \" -y\" , # overwrite output\
452
- \ file\n str(temp_wav),\n \
453
- \ ],\n check=True,\n capture_output=True,\n \
454
- \ )\n\n processed_audio_files.append(temp_wav)\n \
455
- \ temp_files_to_cleanup.append(temp_wav)\n \
456
- \ print(f\" Successfully converted {audio_file.name} to WAV format\" \
457
- )\n\n except subprocess.CalledProcessError as e:\n \
458
- \ print(f\" ffmpeg conversion failed for {audio_file.name}:\
459
- \ {e}\" )\n if e.stderr:\n print(f\" \
460
- stderr: {e.stderr.decode()}\" )\n continue\n return\
461
- \ (processed_audio_files, temp_files_to_cleanup)\n\n # Clean up temporary\
462
- \ files\n def cleanup_temp_files(temp_files_to_cleanup: List[pathlib.Path])\
463
- \ -> None:\n for temp_file in temp_files_to_cleanup:\n \
464
- \ if temp_file.exists():\n temp_file.unlink()\n \
465
- \ print(f\" Cleaned up temporary file: {temp_file.name}\" )\n\n \
466
- \ # Return a Docling DocumentConverter configured for ASR with whisper_turbo\
467
- \ model.\n def get_asr_converter() -> DocumentConverter:\n \"\" \
468
- \" Create a DocumentConverter configured for ASR with whisper_turbo model.\" \
469
- \"\"\n whisper_turbo_llm = InlineAsrNativeWhisperOptions(\n \
470
- \ repo_id=\" turbo\" ,\n inference_framework=InferenceAsrFramework.WHISPER,\n \
438
+ \ continue\n\n # Convert non-WAV files to WAV\
439
+ \ format using ffmpeg\n print(f\" Converting {audio_file.name}\
440
+ \ to WAV format...\" )\n import tempfile\n\n with tempfile.NamedTemporaryFile(\n \
441
+ \ suffix=f\" _{audio_file.stem}.wav\" , delete=False\n \
442
+ \ ) as tmp:\n temp_wav = pathlib.Path(tmp.name)\n\n \
443
+ \ try:\n # Use ffmpeg to convert to WAV format\n \
444
+ \ subprocess.run(\n [\n \
445
+ \ \" ffmpeg\" ,\n \" -i\" ,\n \
446
+ \ str(audio_file),\n \" -ar\" ,\n \
447
+ \ \" 16000\" , # 16kHz sample rate (good for whisper)\n \
448
+ \ \" -ac\" ,\n \" 1\" , # mono\
449
+ \ channel\n \" -c:a\" ,\n \" \
450
+ pcm_s16le\" , # 16-bit PCM\n \" -y\" , # overwrite\
451
+ \ output file\n str(temp_wav),\n \
452
+ \ ],\n check=True,\n capture_output=True,\n \
453
+ \ )\n\n processed_audio_files.append(temp_wav)\n \
454
+ \ temp_files_to_cleanup.append(temp_wav)\n \
455
+ \ print(f\" Successfully converted {audio_file.name} to WAV format\" )\n\n \
456
+ \ except subprocess.CalledProcessError as e:\n \
457
+ \ print(f\" ffmpeg conversion failed for {audio_file.name}: {e}\" )\n \
458
+ \ if e.stderr:\n print(f\" stderr: {e.stderr.decode()}\" \
459
+ )\n continue\n return (processed_audio_files, temp_files_to_cleanup)\n \
460
+ \n # Clean up temporary files\n def cleanup_temp_files(temp_files_to_cleanup:\
461
+ \ List[pathlib.Path]) -> None:\n for temp_file in temp_files_to_cleanup:\n \
462
+ \ temp_file.unlink(missing_ok=True)\n print(f\" Cleaned\
463
+ \ up temporary file: {temp_file.name}\" )\n\n # Return a Docling DocumentConverter\
464
+ \ configured for ASR with whisper_turbo model.\n def get_asr_converter()\
465
+ \ -> DocumentConverter:\n \"\"\" Create a DocumentConverter configured\
466
+ \ for ASR with whisper_turbo model.\"\"\"\n whisper_turbo_llm = InlineAsrNativeWhisperOptions(\n \
467
+ \ repo_id=\" turbo\" ,\n inference_framework=InferenceAsrFramework.WHISPER,\n \
471
468
\ verbose=True,\n timestamps=False,\n word_timestamps=False,\n \
472
469
\ temperature=0.0,\n max_new_tokens=256,\n \
473
470
\ max_time_chunk=30.0,\n )\n\n pipeline_options = AsrPipelineOptions()\n \
@@ -643,39 +640,36 @@ deploymentSpec:
643
640
\ continue\n\n # Check if file is already WAV\n \
644
641
\ if audio_file.suffix.lower() == \" .wav\" :\n processed_audio_files.append(audio_file)\n \
645
642
\ print(f\" Using WAV file directly: {audio_file.name}\" )\n \
646
- \ else:\n # Convert non-WAV files to WAV format\
647
- \ using ffmpeg\n print(f\" Converting {audio_file.name} to\
648
- \ WAV format...\" )\n import tempfile\n\n with\
649
- \ tempfile.NamedTemporaryFile(\n suffix=f\" _{audio_file.stem}.wav\" \
650
- , delete=False\n ) as tmp:\n temp_wav\
651
- \ = pathlib.Path(tmp.name)\n\n try:\n \
652
- \ # Use ffmpeg to convert to WAV format\n subprocess.run(\n \
653
- \ [\n \" ffmpeg\" ,\n \
654
- \ \" -i\" ,\n str(audio_file),\n \
655
- \ \" -ar\" ,\n \" 16000\" \
656
- , # 16kHz sample rate (good for whisper)\n \" \
657
- -ac\" ,\n \" 1\" , # mono channel\n \
658
- \ \" -c:a\" ,\n \" pcm_s16le\" ,\
659
- \ # 16-bit PCM\n \" -y\" , # overwrite output\
660
- \ file\n str(temp_wav),\n \
661
- \ ],\n check=True,\n capture_output=True,\n \
662
- \ )\n\n processed_audio_files.append(temp_wav)\n \
663
- \ temp_files_to_cleanup.append(temp_wav)\n \
664
- \ print(f\" Successfully converted {audio_file.name} to WAV format\" \
665
- )\n\n except subprocess.CalledProcessError as e:\n \
666
- \ print(f\" ffmpeg conversion failed for {audio_file.name}:\
667
- \ {e}\" )\n if e.stderr:\n print(f\" \
668
- stderr: {e.stderr.decode()}\" )\n continue\n return\
669
- \ (processed_audio_files, temp_files_to_cleanup)\n\n # Clean up temporary\
670
- \ files\n def cleanup_temp_files(temp_files_to_cleanup: List[pathlib.Path])\
671
- \ -> None:\n for temp_file in temp_files_to_cleanup:\n \
672
- \ if temp_file.exists():\n temp_file.unlink()\n \
673
- \ print(f\" Cleaned up temporary file: {temp_file.name}\" )\n\n \
674
- \ # Return a Docling DocumentConverter configured for ASR with whisper_turbo\
675
- \ model.\n def get_asr_converter() -> DocumentConverter:\n \"\" \
676
- \" Create a DocumentConverter configured for ASR with whisper_turbo model.\" \
677
- \"\"\n whisper_turbo_llm = InlineAsrNativeWhisperOptions(\n \
678
- \ repo_id=\" turbo\" ,\n inference_framework=InferenceAsrFramework.WHISPER,\n \
643
+ \ continue\n\n # Convert non-WAV files to WAV\
644
+ \ format using ffmpeg\n print(f\" Converting {audio_file.name}\
645
+ \ to WAV format...\" )\n import tempfile\n\n with tempfile.NamedTemporaryFile(\n \
646
+ \ suffix=f\" _{audio_file.stem}.wav\" , delete=False\n \
647
+ \ ) as tmp:\n temp_wav = pathlib.Path(tmp.name)\n\n \
648
+ \ try:\n # Use ffmpeg to convert to WAV format\n \
649
+ \ subprocess.run(\n [\n \
650
+ \ \" ffmpeg\" ,\n \" -i\" ,\n \
651
+ \ str(audio_file),\n \" -ar\" ,\n \
652
+ \ \" 16000\" , # 16kHz sample rate (good for whisper)\n \
653
+ \ \" -ac\" ,\n \" 1\" , # mono\
654
+ \ channel\n \" -c:a\" ,\n \" \
655
+ pcm_s16le\" , # 16-bit PCM\n \" -y\" , # overwrite\
656
+ \ output file\n str(temp_wav),\n \
657
+ \ ],\n check=True,\n capture_output=True,\n \
658
+ \ )\n\n processed_audio_files.append(temp_wav)\n \
659
+ \ temp_files_to_cleanup.append(temp_wav)\n \
660
+ \ print(f\" Successfully converted {audio_file.name} to WAV format\" )\n\n \
661
+ \ except subprocess.CalledProcessError as e:\n \
662
+ \ print(f\" ffmpeg conversion failed for {audio_file.name}: {e}\" )\n \
663
+ \ if e.stderr:\n print(f\" stderr: {e.stderr.decode()}\" \
664
+ )\n continue\n return (processed_audio_files, temp_files_to_cleanup)\n \
665
+ \n # Clean up temporary files\n def cleanup_temp_files(temp_files_to_cleanup:\
666
+ \ List[pathlib.Path]) -> None:\n for temp_file in temp_files_to_cleanup:\n \
667
+ \ temp_file.unlink(missing_ok=True)\n print(f\" Cleaned\
668
+ \ up temporary file: {temp_file.name}\" )\n\n # Return a Docling DocumentConverter\
669
+ \ configured for ASR with whisper_turbo model.\n def get_asr_converter()\
670
+ \ -> DocumentConverter:\n \"\"\" Create a DocumentConverter configured\
671
+ \ for ASR with whisper_turbo model.\"\"\"\n whisper_turbo_llm = InlineAsrNativeWhisperOptions(\n \
672
+ \ repo_id=\" turbo\" ,\n inference_framework=InferenceAsrFramework.WHISPER,\n \
679
673
\ verbose=True,\n timestamps=False,\n word_timestamps=False,\n \
680
674
\ temperature=0.0,\n max_new_tokens=256,\n \
681
675
\ max_time_chunk=30.0,\n )\n\n pipeline_options = AsrPipelineOptions()\n \
0 commit comments