Skip to content

Commit afc5e78

Browse files
jdieguezdanpovey
authored andcommitted
[src] Fix to older online decoding code in online/ (OnlineFeInput; was broken by commit cc2469e). (kaldi-asr#3025)
1 parent fb514dc commit afc5e78

File tree

1 file changed

+33
-5
lines changed

1 file changed

+33
-5
lines changed

src/online/online-feat-input.h

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
#include "online-audio-source.h"
3333
#include "feat/feature-functions.h"
34+
#include "feat/feature-window.h"
3435

3536
namespace kaldi {
3637

@@ -275,7 +276,8 @@ class OnlineFeInput : public OnlineFeatInputItf {
275276
// "frame_size" - frame extraction window size in audio samples
276277
// "frame_shift" - feature frame width in audio samples
277278
OnlineFeInput(OnlineAudioSourceItf *au_src, E *fe,
278-
const int32 frame_size, const int32 frame_shift);
279+
const int32 frame_size, const int32 frame_shift,
280+
const bool snip_edges = true);
279281

280282
virtual int32 Dim() const { return extractor_->Dim(); }
281283

@@ -287,15 +289,26 @@ class OnlineFeInput : public OnlineFeatInputItf {
287289
const int32 frame_size_;
288290
const int32 frame_shift_;
289291
Vector<BaseFloat> wave_; // the samples to be passed for extraction
292+
Vector<BaseFloat> wave_remainder_; // the samples remained from the previous
293+
// feature batch
294+
FrameExtractionOptions frame_opts_;
290295

291296
KALDI_DISALLOW_COPY_AND_ASSIGN(OnlineFeInput);
292297
};
293298

294299
template<class E>
295300
OnlineFeInput<E>::OnlineFeInput(OnlineAudioSourceItf *au_src, E *fe,
296-
int32 frame_size, int32 frame_shift)
301+
int32 frame_size, int32 frame_shift,
302+
bool snip_edges)
297303
: source_(au_src), extractor_(fe),
298-
frame_size_(frame_size), frame_shift_(frame_shift) {}
304+
frame_size_(frame_size), frame_shift_(frame_shift) {
305+
// we need a FrameExtractionOptions to call NumFrames()
306+
// 1000 is just a fake sample rate which equates ms and samples
307+
frame_opts_.samp_freq = 1000;
308+
frame_opts_.frame_shift_ms = frame_shift;
309+
frame_opts_.frame_length_ms = frame_size;
310+
frame_opts_.snip_edges = snip_edges;
311+
}
299312

300313
template<class E> bool
301314
OnlineFeInput<E>::Compute(Matrix<BaseFloat> *output) {
@@ -311,11 +324,26 @@ OnlineFeInput<E>::Compute(Matrix<BaseFloat> *output) {
311324

312325
bool ans = source_->Read(&read_samples);
313326

327+
Vector<BaseFloat> all_samples(wave_remainder_.Dim() + read_samples.Dim());
328+
all_samples.Range(0, wave_remainder_.Dim()).CopyFromVec(wave_remainder_);
329+
all_samples.Range(wave_remainder_.Dim(), read_samples.Dim()).
330+
CopyFromVec(read_samples);
331+
314332
// Extract the features
315-
if (read_samples.Dim() >= frame_size_) {
316-
extractor_->Compute(read_samples, 1.0, output);
333+
if (all_samples.Dim() >= frame_size_) {
334+
// extract waveform remainder before calling Compute()
335+
int32 num_frames = NumFrames(all_samples.Dim(), frame_opts_);
336+
// offset is the amount at the start that has been extracted.
337+
int32 offset = num_frames * frame_shift_;
338+
int32 remaining_len = all_samples.Dim() - offset;
339+
wave_remainder_.Resize(remaining_len);
340+
KALDI_ASSERT(remaining_len >= 0);
341+
if (remaining_len > 0)
342+
wave_remainder_.CopyFromVec(SubVector<BaseFloat>(all_samples, offset, remaining_len));
343+
extractor_->Compute(all_samples, 1.0, output);
317344
} else {
318345
output->Resize(0, 0);
346+
wave_remainder_ = all_samples;
319347
}
320348

321349
return ans;

0 commit comments

Comments
 (0)