2929from nemo .collections .asr .inference .streaming .decoders .greedy .greedy_ctc_decoder import CTCGreedyDecoder
3030from nemo .collections .asr .inference .streaming .endpointing .greedy .greedy_ctc_endpointing import CTCGreedyEndpointing
3131from nemo .collections .asr .inference .streaming .framing .multi_stream import ContinuousBatchedRequestStreamer
32- from nemo .collections .asr .inference .streaming .framing .request import FeatureBuffer , Frame
32+ from nemo .collections .asr .inference .streaming .framing .request import FeatureBuffer , Frame , Request
3333from nemo .collections .asr .inference .streaming .framing .request_options import ASRRequestOptions
3434from nemo .collections .asr .inference .streaming .state .cache_aware_ctc_state import CacheAwareCTCStreamingState
3535from nemo .collections .asr .inference .utils .endpointing_utils import millisecond_to_frames
@@ -214,17 +214,19 @@ def preprocess(self, buffers: list[Tensor], right_paddings: list[int] | None = N
214214 feature_buffers = torch .cat (feature_buffers ).to (self .device )
215215 return feature_buffers , feature_buffer_lens
216216
217- def run_greedy_decoder (self , state : CacheAwareCTCStreamingState , frame : Frame | FeatureBuffer , log_probs : Tensor ):
217+ def run_greedy_decoder (
218+ self , state : CacheAwareCTCStreamingState , request : Request , log_probs : Tensor
219+ ):
218220 """
219221 Run the greedy CTC decoder on the log_probs and update the state
220222 Args:
221223 state: (CacheAwareCTCStreamingState) The state of the stream
222- frame : (Frame | FeatureBuffer ) The current frame or feature buffer
223- log_probs: (Tensor) The log probabilities of the current frame
224+ request : (Request ) The current request ( frame or feature buffer)
225+ log_probs: (Tensor) The log probabilities of the current request
224226 Returns:
225227 (bool) Whether EOU is detected.
226228 """
227- eou_detected = frame .is_last
229+ eou_detected = request .is_last
228230 last_token = state .label_buffer [- 1 ] if len (state .label_buffer ) > 0 else self .blank_id
229231 cur_output = self .greedy_ctc_decoder (log_probs , compute_confidence = True , previous = last_token )
230232 state .update_label_buffer (cur_output ["labels" ])
@@ -242,28 +244,28 @@ def run_greedy_decoder(self, state: CacheAwareCTCStreamingState, frame: Frame |
242244
243245 def decode_log_probs (
244246 self ,
245- frames : list [Frame | FeatureBuffer ],
247+ requests : list [Request ],
246248 log_probs : Tensor ,
247249 tail_log_probs : Tensor | None ,
248250 ready_state_ids : set ,
249251 ) -> None :
250252 """
251253 Decode the log probabilities and update the state
252254 Args:
253- frames : (list[Frame | FeatureBuffer ]) List of frames or feature buffers to transcribe.
255+ requests : (list[Request ]) List of requests ( frames or feature buffers) to transcribe.
254256 log_probs: (Tensor) Log probabilities.
255257 tail_log_probs: (Tensor | None) Tail log probabilities.
256258 ready_state_ids: (set) Set of ready state IDs.
257259 """
258260
259- for idx , frame in enumerate (frames ):
260- state = self .get_state (frame .stream_id )
261- eou_detected = self .run_greedy_decoder (state , frame , log_probs [idx ])
261+ for idx , request in enumerate (requests ):
262+ state = self .get_state (request .stream_id )
263+ eou_detected = self .run_greedy_decoder (state , request , log_probs [idx ])
262264
263265 if eou_detected :
264266 self .bpe_decoder .decode_bpe_tokens (state )
265267 state .cleanup_after_eou ()
266- ready_state_ids .add (frame .stream_id )
268+ ready_state_ids .add (request .stream_id )
267269
268270 if tail_log_probs is not None :
269271 last_token = state .label_buffer [- 1 ] if len (state .label_buffer ) > 0 else self .blank_id
@@ -274,15 +276,15 @@ def decode_log_probs(
274276
275277 def cache_aware_transcribe_step (
276278 self ,
277- frames : list [Frame | FeatureBuffer ],
279+ requests : list [Request ],
278280 buffered_features : list [Tensor ],
279281 right_paddings : list [int ] | None ,
280282 ready_state_ids : set ,
281283 keep_all_outputs : bool = False ,
282284 ) -> None :
283285 """
284286 Cache Aware Transcribe Step
285- It receives a list of frames (Frame or FeatureBuffer) and features and do the following:
287+ It receives a list of requests (Frame or FeatureBuffer) and features and do the following:
286288
287289 1. Preprocess the features by stacking them and computing the lengths
288290 2. Get the context and mapping from the context manager for cache aware streaming
@@ -291,16 +293,16 @@ def cache_aware_transcribe_step(
291293 5. Decode the log probabilities and update the state
292294
293295 Args:
294- frames : (list[Frame | FeatureBuffer ]) List of frames or feature buffers to transcribe.
296+ requests : (list[Request ]) List of requests ( frames or feature buffers) to transcribe.
295297 buffered_features: (list[Tensor]) List of buffered features.
296298 right_paddings: (list[int] | None) List of right paddings.
297299 ready_state_ids: (set) Set of ready state IDs.
298300 keep_all_outputs: (bool) Whether to keep all outputs or not.
299301 """
300302 feature_buffers , feature_buffer_lens = self .preprocess (buffered_features , right_paddings )
301303
302- stream_ids = [frame .stream_id for frame in frames ]
303- eos_flags = [frame .is_last for frame in frames ]
304+ stream_ids = [request .stream_id for request in requests ]
305+ eos_flags = [request .is_last for request in requests ]
304306 context , mapping = self .context_manager .get_context (stream_ids )
305307
306308 drop_extra_pre_encoded = 0 if not self .use_cache else self .asr_model .drop_extra_pre_encoded
@@ -319,7 +321,7 @@ def cache_aware_transcribe_step(
319321 log_probs = normalize_log_probs (log_probs )
320322 self .context_manager .update_cache (stream_ids , new_context , mapping )
321323 self .context_manager .reset_slots (stream_ids , eos_flags )
322- self .decode_log_probs (frames , log_probs , tail_log_probs , ready_state_ids )
324+ self .decode_log_probs (requests , log_probs , tail_log_probs , ready_state_ids )
323325
324326 def transcribe_step_for_frames (self , frames : list [Frame ]) -> None :
325327 """
0 commit comments