|
1 | | - |
2 | | -def assign_speakers_to_segments_from_global(chunk_path, segments, diarized_segments): |
3 | | - from difflib import get_close_matches |
4 | | - assigned_speakers = [] |
5 | | - for seg in segments: |
6 | | - midpoint = (seg['start'] + seg['end']) / 2 |
7 | | - match = None |
8 | | - for track, _, label in diarized_segments: |
9 | | - normalized_label = str(label).strip().upper() # Normalize case and spacing |
10 | | - if track.start <= midpoint <= track.end: |
11 | | - match = normalized_label |
12 | | - break |
13 | | - |
14 | | - if not match: |
15 | | - match = "Speaker_0" |
16 | | - assigned_speakers.append(match) |
17 | | - logging.info(f"Speaker assignment breakdown: {Counter(assigned_speakers)}") |
18 | | - return assigned_speakers |
19 | | - |
20 | | - |
21 | 1 | from datetime import datetime |
22 | 2 | import re |
23 | 3 | import os |
@@ -100,6 +80,23 @@ def convert_to_wav(input_path): |
100 | 80 | raise |
101 | 81 | return output_path |
102 | 82 |
|
| 83 | +def assign_speakers_to_segments_from_global(chunk_path, segments, diarized_segments): |
| 84 | + from difflib import get_close_matches |
| 85 | + assigned_speakers = [] |
| 86 | + for seg in segments: |
| 87 | + midpoint = (seg['start'] + seg['end']) / 2 |
| 88 | + match = None |
| 89 | + for track, _, label in diarized_segments: |
| 90 | + normalized_label = str(label).strip().upper() # Normalize case and spacing |
| 91 | + if track.start <= midpoint <= track.end: |
| 92 | + match = normalized_label |
| 93 | + break |
| 94 | + |
| 95 | + if not match: |
| 96 | + match = "Speaker_0" |
| 97 | + assigned_speakers.append(match) |
| 98 | + logging.info(f"Speaker assignment breakdown: {Counter(assigned_speakers)}") |
| 99 | + return assigned_speakers |
103 | 100 |
|
104 | 101 | # Applies noise reduction using noisereduce (conservative settings) |
105 | 102 |
|
@@ -266,7 +263,7 @@ def assign_speakers_to_segments(full_audio_path, segments, hf_token, max_speaker |
266 | 263 | """ |
267 | 264 | logging.info(f"Running speaker diarization on: {full_audio_path}") |
268 | 265 | try: |
269 | | - pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=hf_token) |
| 266 | + pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=hf_token) |
270 | 267 | if torch.cuda.is_available(): |
271 | 268 | pipeline.to(torch.device("cuda")) |
272 | 269 | # Run diarization once per full audio (global instead of per chunk) |
|
0 commit comments