Skip to content

Commit 641ca80

Browse files
committed
fbank: update to better match kaldifeat
1 parent 3308e68 commit 641ca80

File tree

3 files changed

+30
-2
lines changed

3 files changed

+30
-2
lines changed

src/april_model.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ AprilASRModel aam_create_model(const char *model_path) {
9393
//aam->fbank_opts.snip_edges = aam->params.snip_edges;
9494
aam->fbank_opts.snip_edges = true;
9595

96+
aam->fbank_opts.remove_dc_offset = true;
97+
aam->fbank_opts.preemph_coeff = 0.97f;
98+
9699
ASSERT_OR_FREE_AAM_AND_RETURN_NULL(aam, aam->x_dim[0] == aam->params.batch_size);
97100
ASSERT_OR_FREE_AAM_AND_RETURN_NULL(aam, aam->x_dim[1] == aam->fbank_opts.pull_segment_count);
98101
ASSERT_OR_FREE_AAM_AND_RETURN_NULL(aam, aam->x_dim[2] == aam->fbank_opts.num_bins);

src/fbank.c

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ void fbank_accept_waveform(OnlineFBank fbank, float *wave, size_t wave_count) {
185185
sonicReadFloatFromStream(fbank->sonic_stream, wave, wave_count);
186186
}
187187

188+
float preemph_coeff = fbank->opts.preemph_coeff;
188189
for(ssize_t i=0;; i++) {
189190
if((fbank->temp_segment_avail + 1) > fbank->temp_segments_y){
190191
LOG_WARNING("fbank ran out of space. Please call fbank_pull_segments. Can't eat wave");
@@ -228,12 +229,33 @@ void fbank_accept_waveform(OnlineFBank fbank, float *wave, size_t wave_count) {
228229
ssize_t wave_idx = start_idx + j;
229230
if(wave_idx < 0){
230231
ssize_t ll_idx = fbank->prev_leftover_count + wave_idx;
231-
fbank->data[j] = fbank->prev_leftover[ll_idx] * fbank->window[j];
232+
fbank->data[j] = fbank->prev_leftover[ll_idx];
232233
} else {
233-
fbank->data[j] = wave[start_idx + j] * fbank->window[j];
234+
fbank->data[j] = wave[start_idx + j];
234235
}
235236
}
236237

238+
// Not included: dither
239+
240+
// Apply remove dc offset
241+
if(fbank->opts.remove_dc_offset) {
242+
float sum = 0;
243+
for(int j=0; j<fbank->padded_window_size; j++) sum += fbank->data[j];
244+
float mean = sum / fbank->padded_window_size;
245+
for(int j=0; j<fbank->padded_window_size; j++) fbank->data[j] -= mean;
246+
}
247+
248+
// Apply preemphasize
249+
if(preemph_coeff > 0.0f) {
250+
for(int j=fbank->padded_window_size-1; j>0; --j)
251+
fbank->data[j] -= preemph_coeff * fbank->data[j - 1];
252+
fbank->data[0] -= preemph_coeff * fbank->data[0];
253+
}
254+
255+
// Apply window function
256+
for(int j=0; j<fbank->padded_window_size; j++)
257+
fbank->data[j] *= fbank->window[j];
258+
237259
double *dptr = fbank->data;
238260
double *rptr = fbank->ret;
239261
memcpy((char *)(rptr+1), dptr, fbank->padded_window_size * sizeof(double));

src/fbank.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ typedef struct FBankOptions {
6060

6161
// If false, speed feature will be unavailable
6262
bool use_sonic;
63+
64+
bool remove_dc_offset; // true
65+
float preemph_coeff; // 0.97
6366
} FBankOptions;
6467

6568
OnlineFBank make_fbank(FBankOptions opts);

0 commit comments

Comments
 (0)