3131
3232#include " online-audio-source.h"
3333#include " feat/feature-functions.h"
34+ #include " feat/feature-window.h"
3435
3536namespace kaldi {
3637
@@ -275,7 +276,8 @@ class OnlineFeInput : public OnlineFeatInputItf {
275276 // "frame_size" - frame extraction window size in audio samples
276277 // "frame_shift" - feature frame width in audio samples
277278 OnlineFeInput (OnlineAudioSourceItf *au_src, E *fe,
278- const int32 frame_size, const int32 frame_shift);
279+ const int32 frame_size, const int32 frame_shift,
280+ const bool snip_edges = true );
279281
280282 virtual int32 Dim () const { return extractor_->Dim (); }
281283
@@ -287,15 +289,26 @@ class OnlineFeInput : public OnlineFeatInputItf {
287289 const int32 frame_size_;
288290 const int32 frame_shift_;
289291 Vector<BaseFloat> wave_; // the samples to be passed for extraction
292+ Vector<BaseFloat> wave_remainder_; // the samples remained from the previous
293+ // feature batch
294+ FrameExtractionOptions frame_opts_;
290295
291296 KALDI_DISALLOW_COPY_AND_ASSIGN (OnlineFeInput);
292297};
293298
294299template <class E >
295300OnlineFeInput<E>::OnlineFeInput(OnlineAudioSourceItf *au_src, E *fe,
296- int32 frame_size, int32 frame_shift)
301+ int32 frame_size, int32 frame_shift,
302+ bool snip_edges)
297303 : source_(au_src), extractor_(fe),
298- frame_size_(frame_size), frame_shift_(frame_shift) {}
304+ frame_size_(frame_size), frame_shift_(frame_shift) {
305+ // we need a FrameExtractionOptions to call NumFrames()
306+ // 1000 is just a fake sample rate which equates ms and samples
307+ frame_opts_.samp_freq = 1000 ;
308+ frame_opts_.frame_shift_ms = frame_shift;
309+ frame_opts_.frame_length_ms = frame_size;
310+ frame_opts_.snip_edges = snip_edges;
311+ }
299312
300313template <class E > bool
301314OnlineFeInput<E>::Compute(Matrix<BaseFloat> *output) {
@@ -311,11 +324,26 @@ OnlineFeInput<E>::Compute(Matrix<BaseFloat> *output) {
311324
312325 bool ans = source_->Read (&read_samples);
313326
327+ Vector<BaseFloat> all_samples (wave_remainder_.Dim () + read_samples.Dim ());
328+ all_samples.Range (0 , wave_remainder_.Dim ()).CopyFromVec (wave_remainder_);
329+ all_samples.Range (wave_remainder_.Dim (), read_samples.Dim ()).
330+ CopyFromVec (read_samples);
331+
314332 // Extract the features
315- if (read_samples.Dim () >= frame_size_) {
316- extractor_->Compute (read_samples, 1.0 , output);
333+ if (all_samples.Dim () >= frame_size_) {
334+ // extract waveform remainder before calling Compute()
335+ int32 num_frames = NumFrames (all_samples.Dim (), frame_opts_);
336+ // offset is the amount at the start that has been extracted.
337+ int32 offset = num_frames * frame_shift_;
338+ int32 remaining_len = all_samples.Dim () - offset;
339+ wave_remainder_.Resize (remaining_len);
340+ KALDI_ASSERT (remaining_len >= 0 );
341+ if (remaining_len > 0 )
342+ wave_remainder_.CopyFromVec (SubVector<BaseFloat>(all_samples, offset, remaining_len));
343+ extractor_->Compute (all_samples, 1.0 , output);
317344 } else {
318345 output->Resize (0 , 0 );
346+ wave_remainder_ = all_samples;
319347 }
320348
321349 return ans;
0 commit comments