Skip to content

Commit 0a75015

Browse files
authored
Add ASR endpointing stop_threshold_eou parameter (#83)
* Exposing the 'stop_historu_eou_th' parameter * updating submodule * Updating param name * Updating help for VAD param * Adding check for stop_threshold_eou * Updating proto branch * updating the submodule
1 parent 80e5f04 commit 0a75015

File tree

8 files changed

+45
-32
lines changed

8 files changed

+45
-32
lines changed

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
[submodule "common"]
22
path = common
33
url = https://github.com/nvidia-riva/common.git
4-
branch = main
4+
branch = release/2.16.0

common

riva/client/argparse_utils.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def add_asr_config_argparse_parameters(
5252
"--start-history",
5353
default=-1,
5454
type=int,
55-
help="Value to detect and initiate start of speech utterance",
55+
help="Value (in milliseconds) to detect and initiate start of speech utterance",
5656
)
5757
parser.add_argument(
5858
"--start-threshold",
@@ -64,19 +64,25 @@ def add_asr_config_argparse_parameters(
6464
"--stop-history",
6565
default=-1,
6666
type=int,
67-
help="Value to reset the endpoint detection history",
67+
help="Value (in milliseconds) to detect end of utterance and reset decoder",
68+
)
69+
parser.add_argument(
70+
"--stop-threshold",
71+
default=-1.0,
72+
type=float,
73+
help="Threshold value for detecting the end of speech utterance",
6874
)
6975
parser.add_argument(
7076
"--stop-history-eou",
7177
default=-1,
7278
type=int,
73-
help="Value to determine the response history for endpoint detection",
79+
help="Value (in milliseconds) to detect end of utterance for the 1st pass and generate an intermediate final transcript",
7480
)
7581
parser.add_argument(
76-
"--stop-threshold",
82+
"--stop-threshold-eou",
7783
default=-1.0,
7884
type=float,
79-
help="Threshold value for detecting the end of speech utterance",
85+
help="Threshold value for likelihood of blanks before detecting end of utterance",
8086
)
8187
return parser
8288

riva/client/asr.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,9 @@ def add_endpoint_parameters_to_config(
130130
stop_history: int,
131131
stop_history_eou: int,
132132
stop_threshold: float,
133+
stop_threshold_eou: float,
133134
) -> None:
134-
if not (start_history > 0 or start_threshold > 0 or stop_history > 0 or stop_history_eou > 0 or stop_threshold > 0):
135+
if not (start_history > 0 or start_threshold > 0 or stop_history > 0 or stop_history_eou > 0 or stop_threshold > 0 or stop_threshold_eou > 0):
135136
return
136137

137138
inner_config: rasr.RecognitionConfig = config if isinstance(config, rasr.RecognitionConfig) else config.config
@@ -146,6 +147,8 @@ def add_endpoint_parameters_to_config(
146147
endpointing_config.stop_history_eou = stop_history_eou
147148
if stop_threshold > 0:
148149
endpointing_config.stop_threshold = stop_threshold
150+
if stop_threshold_eou > 0:
151+
endpointing_config.stop_threshold_eou = stop_threshold_eou
149152
inner_config.endpointing_config.CopyFrom(endpointing_config)
150153

151154

scripts/asr/riva_streaming_asr_client.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,13 @@ def streaming_transcription_worker(
6464
interim_results=True,
6565
)
6666
riva.client.add_endpoint_parameters_to_config(
67-
config,
68-
args.start_history,
69-
args.start_threshold,
70-
args.stop_history,
71-
args.stop_history_eou,
72-
args.stop_threshold
67+
config,
68+
args.start_history,
69+
args.start_threshold,
70+
args.stop_history,
71+
args.stop_history_eou,
72+
args.stop_threshold,
73+
args.stop_threshold_eou
7374
)
7475
riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score)
7576
for _ in range(args.num_iterations):

scripts/asr/transcribe_file.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,13 @@ def main() -> None:
8080
)
8181
riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score)
8282
riva.client.add_endpoint_parameters_to_config(
83-
config,
84-
args.start_history,
85-
args.start_threshold,
86-
args.stop_history,
87-
args.stop_history_eou,
88-
args.stop_threshold
83+
config,
84+
args.start_history,
85+
args.start_threshold,
86+
args.stop_history,
87+
args.stop_history_eou,
88+
args.stop_threshold,
89+
args.stop_threshold_eou
8990
)
9091
sound_callback = None
9192
try:

scripts/asr/transcribe_file_offline.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,13 @@ def main() -> None:
3939
riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score)
4040
riva.client.add_speaker_diarization_to_config(config, args.speaker_diarization)
4141
riva.client.add_endpoint_parameters_to_config(
42-
config,
43-
args.start_history,
44-
args.start_threshold,
45-
args.stop_history,
46-
args.stop_history_eou,
47-
args.stop_threshold
42+
config,
43+
args.start_history,
44+
args.start_threshold,
45+
args.stop_history,
46+
args.stop_history_eou,
47+
args.stop_threshold,
48+
args.stop_threshold_eou
4849
)
4950
with args.input_file.open('rb') as fh:
5051
data = fh.read()

scripts/asr/transcribe_mic.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,13 @@ def main() -> None:
5858
)
5959
riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score)
6060
riva.client.add_endpoint_parameters_to_config(
61-
config,
62-
args.start_history,
63-
args.start_threshold,
64-
args.stop_history,
65-
args.stop_history_eou,
66-
args.stop_threshold
61+
config,
62+
args.start_history,
63+
args.start_threshold,
64+
args.stop_history,
65+
args.stop_history_eou,
66+
args.stop_threshold,
67+
args.stop_threshold_eou
6768
)
6869
with riva.client.audio_io.MicrophoneStream(
6970
args.sample_rate_hz,

0 commit comments

Comments
 (0)