Skip to content

Commit 2c48310

Browse files
rmittal-githubsarane22mohnishparmarpskrunner14
authored
Release/2.17.0 (#99)
* Support custom_configuration param in ASR clients (#94) * Passing AST param through custom_configuration * Added exception handling for TTS talk.py * Exposing custom-configurtion to cli * Updating function name to add_custom_configuration_to_config * Updating help message --------- Co-authored-by: mohnishparmar <[email protected]> * Support setting max speakers for offline diarization (#97) * fix: accept input for max_speaker_count in asr/transcribe_file_offline * fix: rename input field to diarization_max_speakers * remove: redundant default value for max_speakers * update SHA of common repo submodule --------- Co-authored-by: sarane22 <[email protected]> Co-authored-by: mohnishparmar <[email protected]> Co-authored-by: Prabhsimran Singh <[email protected]>
1 parent c789e98 commit 2c48310

File tree

9 files changed

+57
-5
lines changed

9 files changed

+57
-5
lines changed

common

riva/client/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
print_streaming,
1313
sleep_audio_length,
1414
add_endpoint_parameters_to_config,
15+
add_custom_configuration_to_config,
1516
)
1617
from riva.client.auth import Auth
1718
from riva.client.nlp import (

riva/client/argparse_utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ def add_asr_config_argparse_parameters(
4949
action='store_true',
5050
help="Flag that controls if speaker diarization should be performed",
5151
)
52+
parser.add_argument(
53+
"--diarization-max-speakers",
54+
default=3,
55+
type=int,
56+
help="Max number of speakers to detect when performing speaker diarization",
57+
)
5258
parser.add_argument(
5359
"--start-history",
5460
default=-1,
@@ -85,6 +91,12 @@ def add_asr_config_argparse_parameters(
8591
type=float,
8692
help="Threshold value for likelihood of blanks before detecting end of utterance",
8793
)
94+
parser.add_argument(
95+
"--custom-configuration",
96+
default="",
97+
type=str,
98+
help="Custom configurations to be sent to the server as key value pairs <key:value,key:value,...>",
99+
)
88100
return parser
89101

90102

riva/client/asr.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,19 @@ def add_audio_file_specs_to_config(
117117
def add_speaker_diarization_to_config(
118118
config: Union[rasr.RecognitionConfig],
119119
diarization_enable: bool,
120+
diarization_max_speakers: int,
120121
) -> None:
121122
inner_config: rasr.RecognitionConfig = config if isinstance(config, rasr.RecognitionConfig) else config.config
122123
if diarization_enable:
123-
diarization_config = rasr.SpeakerDiarizationConfig(enable_speaker_diarization=True)
124+
diarization_config = rasr.SpeakerDiarizationConfig(
125+
enable_speaker_diarization=True,
126+
max_speaker_count=diarization_max_speakers,
127+
)
124128
inner_config.diarization_config.CopyFrom(diarization_config)
125129

130+
126131
def add_endpoint_parameters_to_config(
127-
config: Union[rasr.RecognitionConfig, rasr.EndpointingConfig],
132+
config: Union[rasr.StreamingRecognitionConfig, rasr.RecognitionConfig],
128133
start_history: int,
129134
start_threshold: float,
130135
stop_history: int,
@@ -152,6 +157,22 @@ def add_endpoint_parameters_to_config(
152157
inner_config.endpointing_config.CopyFrom(endpointing_config)
153158

154159

160+
def add_custom_configuration_to_config(
161+
config: Union[rasr.StreamingRecognitionConfig, rasr.RecognitionConfig],
162+
custom_configuration: str,
163+
) -> None:
164+
custom_configuration = custom_configuration.strip().replace(" ", "")
165+
if not custom_configuration:
166+
return
167+
inner_config: rasr.RecognitionConfig = config if isinstance(config, rasr.RecognitionConfig) else config.config
168+
for pair in custom_configuration.split(","):
169+
key_value = pair.split(":")
170+
if len(key_value) == 2:
171+
inner_config.custom_configuration[key_value[0]] = key_value[1]
172+
else:
173+
raise ValueError(f"Invalid key:value pair {key_value}")
174+
175+
155176
PRINT_STREAMING_ADDITIONAL_INFO_MODES = ['no', 'time', 'confidence']
156177

157178

scripts/asr/riva_streaming_asr_client.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ def streaming_transcription_worker(
7373
args.stop_threshold,
7474
args.stop_threshold_eou
7575
)
76+
riva.client.add_custom_configuration_to_config(
77+
config,
78+
args.custom_configuration
79+
)
7680
riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score)
7781
for _ in range(args.num_iterations):
7882
with riva.client.AudioChunkFileIterator(

scripts/asr/transcribe_file.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ def main() -> None:
109109
args.stop_threshold,
110110
args.stop_threshold_eou
111111
)
112+
riva.client.add_custom_configuration_to_config(
113+
config,
114+
args.custom_configuration
115+
)
112116
sound_callback = None
113117
try:
114118
if args.play_audio or args.output_device is not None:

scripts/asr/transcribe_file_offline.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def main() -> None:
3737
enable_word_time_offsets=args.word_time_offsets or args.speaker_diarization,
3838
)
3939
riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score)
40-
riva.client.add_speaker_diarization_to_config(config, args.speaker_diarization)
40+
riva.client.add_speaker_diarization_to_config(config, args.speaker_diarization, args.diarization_max_speakers)
4141
riva.client.add_endpoint_parameters_to_config(
4242
config,
4343
args.start_history,
@@ -46,7 +46,11 @@ def main() -> None:
4646
args.stop_history_eou,
4747
args.stop_threshold,
4848
args.stop_threshold_eou
49-
)
49+
)
50+
riva.client.add_custom_configuration_to_config(
51+
config,
52+
args.custom_configuration
53+
)
5054
with args.input_file.open('rb') as fh:
5155
data = fh.read()
5256
try:

scripts/asr/transcribe_mic.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ def main() -> None:
6767
args.stop_threshold,
6868
args.stop_threshold_eou
6969
)
70+
riva.client.add_custom_configuration_to_config(
71+
config,
72+
args.custom_configuration
73+
)
7074
with riva.client.audio_io.MicrophoneStream(
7175
args.sample_rate_hz,
7276
args.file_streaming_chunk,

scripts/tts/talk.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ def main() -> None:
157157
sound_stream(resp.audio)
158158
if out_f is not None:
159159
out_f.writeframesraw(resp.audio)
160+
except Exception as e:
161+
print(e.details())
160162
finally:
161163
if out_f is not None:
162164
out_f.close()

0 commit comments

Comments
 (0)