|
| 1 | +#!/usr/bin/env python3 |
1 | 2 | """ |
2 | 3 | This script shows how to use Python APIs for speaker identification with |
3 | 4 | a microphone. |
|
41 | 42 | --model ./wespeaker_zh_cnceleb_resnet34.onnx |
42 | 43 | """ |
43 | 44 |
|
| 45 | +import argparse |
44 | 46 | import functools |
| 47 | +import queue |
| 48 | +import threading |
45 | 49 | from collections import defaultdict |
46 | 50 | from pathlib import Path |
47 | | -from typing import Dict, List, Literal, Tuple, Union |
| 51 | +from typing import Dict, List, Literal, Optional, Tuple, Union |
48 | 52 |
|
49 | 53 | import numpy as np |
50 | 54 | import sherpa_onnx |
51 | 55 | import soundfile as sf |
52 | 56 | from numpy.typing import NDArray |
53 | 57 |
|
| 58 | +try: |
| 59 | + import sounddevice as sd |
| 60 | +except ImportError: |
| 61 | + print("Please install sounddevice first. You can use\n\t") |
| 62 | + print("pip install sounddevice") |
| 63 | + print("\nto install it") |
| 64 | + import sys |
| 65 | + |
| 66 | + sys.exit(1) |
| 67 | + |
54 | 68 |
|
55 | 69 | def load_speaker_embedding_model( |
56 | 70 | model: Union[str, Path], |
@@ -166,25 +180,6 @@ def compute_avg_speaker_embedding( |
166 | 180 | return embeddings_sum / len(filenames) |
167 | 181 |
|
168 | 182 |
|
169 | | -# %% |
170 | | -# The following code is required for command line interface. |
171 | | -# If you only need the packaged functions, you can use only the code above |
172 | | -import argparse |
173 | | -import queue |
174 | | -import threading |
175 | | -from typing import Optional |
176 | | - |
177 | | -try: |
178 | | - import sounddevice as sd |
179 | | -except ImportError: |
180 | | - print("Please install sounddevice first. You can use\n\t") |
181 | | - print("pip install sounddevice") |
182 | | - print("\nto install it") |
183 | | - import sys |
184 | | - |
185 | | - sys.exit(1) |
186 | | - |
187 | | - |
188 | 183 | class Args(argparse.Namespace): |
189 | 184 | speaker_file: Path |
190 | 185 | model: Path |
@@ -274,8 +269,8 @@ def print_microphone_device_info(self) -> None: |
274 | 269 | print("Microphone device information:\n") |
275 | 270 | print(f"Device ID: {device_info['index']}") |
276 | 271 | print(f"Name: {device_info['name']}") |
277 | | - print(f"Default Channels: {device_info['max_input_channels']}") |
278 | | - print(f"Default SampleRate: {device_info['default_samplerate']}") |
| 272 | + print(f"Default Microphone Channels: {device_info['max_input_channels']}") |
| 273 | + print(f"Default Microphone SampleRate: {device_info['default_samplerate']}") |
279 | 274 | print("=" * 50) |
280 | 275 |
|
281 | 276 | def read_mic(self) -> None: |
@@ -315,7 +310,7 @@ def infer_speaker( |
315 | 310 | stream.input_finished() |
316 | 311 |
|
317 | 312 | embedding = np.array(extractor.compute(stream), dtype=np.float32) |
318 | | - name = manager.search(embedding, threshold=threshold) |
| 313 | + name: str = manager.search(embedding, threshold=threshold) |
319 | 314 | return name or "unknown" |
320 | 315 |
|
321 | 316 |
|
@@ -343,16 +338,12 @@ def main() -> None: |
343 | 338 | input("Press Enter to stop recording") |
344 | 339 | recorder.stop_recording() |
345 | 340 |
|
346 | | - print("Compute embedding") |
347 | 341 | name = recorder.infer_speaker(extractor, manager, args.threshold) |
348 | | - print(f"Predicted name: {name}") |
| 342 | + print(f"Predicted name: {name}\n") |
349 | 343 |
|
350 | 344 |
|
351 | 345 | if __name__ == "__main__": |
352 | 346 | try: |
353 | 347 | main() |
354 | 348 | except KeyboardInterrupt: |
355 | 349 | print("\nCaught Ctrl + C. Exiting") |
356 | | - except Exception as e: |
357 | | - print(e) |
358 | | - raise |
0 commit comments