1- /* Copyright (c) 2017 Mozilla */
1+ /* Copyright (c) 2018 Gregor Richards
2+ * Copyright (c) 2017 Mozilla */
23/*
34 Redistribution and use in source and binary forms, with or without
45 modification, are permitted provided that the following conditions
5556
5657#define SQUARE (x ) ((x)*(x))
5758
58- #define SMOOTH_BANDS 1
59-
60- #if SMOOTH_BANDS
6159#define NB_BANDS 22
62- #else
63- #define NB_BANDS 21
64- #endif
6560
6661#define CEPS_MEM 8
6762#define NB_DELTA_CEPS 6
7368#define TRAINING 0
7469#endif
7570
71+
72+ /* The built-in model, used if no file is given as input */
73+ extern const struct RNNModel rnnoise_model_orig ;
74+
75+
7676static const opus_int16 eband5ms [] = {
7777/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 20k*/
7878 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 10 , 12 , 14 , 16 , 20 , 24 , 28 , 34 , 40 , 48 , 60 , 78 , 100
@@ -100,7 +100,6 @@ struct DenoiseState {
100100 RNNState rnn ;
101101};
102102
103- #if SMOOTH_BANDS
104103void compute_band_energy (float * bandE , const kiss_fft_cpx * X ) {
105104 int i ;
106105 float sum [NB_BANDS ] = {0 };
@@ -165,32 +164,6 @@ void interp_band_gain(float *g, const float *bandE) {
165164 }
166165 }
167166}
168- #else
169- void compute_band_energy (float * bandE , const kiss_fft_cpx * X ) {
170- int i ;
171- for (i = 0 ;i < NB_BANDS ;i ++ )
172- {
173- int j ;
174- opus_val32 sum = 0 ;
175- for (j = 0 ;j < (eband5ms [i + 1 ]- eband5ms [i ])<<FRAME_SIZE_SHIFT ;j ++ ) {
176- sum += SQUARE (X [(eband5ms [i ]<<FRAME_SIZE_SHIFT ) + j ].r );
177- sum += SQUARE (X [(eband5ms [i ]<<FRAME_SIZE_SHIFT ) + j ].i );
178- }
179- bandE [i ] = sum ;
180- }
181- }
182-
183- void interp_band_gain (float * g , const float * bandE ) {
184- int i ;
185- memset (g , 0 , FREQ_SIZE );
186- for (i = 0 ;i < NB_BANDS ;i ++ )
187- {
188- int j ;
189- for (j = 0 ;j < (eband5ms [i + 1 ]- eband5ms [i ])<<FRAME_SIZE_SHIFT ;j ++ )
190- g [(eband5ms [i ]<<FRAME_SIZE_SHIFT ) + j ] = bandE [i ];
191- }
192- }
193- #endif
194167
195168
196169CommonState common ;
@@ -287,19 +260,41 @@ int rnnoise_get_size() {
287260 return sizeof (DenoiseState );
288261}
289262
290- int rnnoise_init (DenoiseState * st ) {
263+ int rnnoise_get_frame_size () {
264+ return FRAME_SIZE ;
265+ }
266+
267+ int rnnoise_init (DenoiseState * st , RNNModel * model ) {
291268 memset (st , 0 , sizeof (* st ));
269+ if (model )
270+ st -> rnn .model = model ;
271+ else
272+ st -> rnn .model = & rnnoise_model_orig ;
273+ st -> rnn .vad_gru_state = calloc (sizeof (float ), st -> rnn .model -> vad_gru_size );
274+ st -> rnn .noise_gru_state = calloc (sizeof (float ), st -> rnn .model -> noise_gru_size );
275+ st -> rnn .denoise_gru_state = calloc (sizeof (float ), st -> rnn .model -> denoise_gru_size );
276+ st -> rnn .compute_gru_fct = & compute_gru ;
277+
278+ #if defined(__AVX2__ )
279+ if (is_avx2_supported () == 1 ) {
280+ st -> rnn .compute_gru_fct = & compute_gru_avx2 ;
281+ }
282+ #endif
283+
292284 return 0 ;
293285}
294286
295- DenoiseState * rnnoise_create () {
287+ DenoiseState * rnnoise_create (RNNModel * model ) {
296288 DenoiseState * st ;
297289 st = malloc (rnnoise_get_size ());
298- rnnoise_init (st );
290+ rnnoise_init (st , model );
299291 return st ;
300292}
301293
302294void rnnoise_destroy (DenoiseState * st ) {
295+ free (st -> rnn .vad_gru_state );
296+ free (st -> rnn .noise_gru_state );
297+ free (st -> rnn .denoise_gru_state );
303298 free (st );
304299}
305300
@@ -424,13 +419,11 @@ static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y)
424419}
425420
426421static void biquad (float * y , float mem [2 ], const float * x , const float * b , const float * a , int N ) {
427- int i ;
428- for (i = 0 ;i < N ;i ++ ) {
429- float xi , yi ;
430- xi = x [i ];
431- yi = x [i ] + mem [0 ];
432- mem [0 ] = mem [1 ] + (b [0 ]* (double )xi - a [0 ]* (double )yi );
433- mem [1 ] = (b [1 ]* (double )xi - a [1 ]* (double )yi );
422+ for (int i = 0 ;i < N ;i ++ ) {
423+ float xi = x [i ];
424+ float yi = xi + mem [0 ];
425+ mem [0 ] = mem [1 ] + (b [0 ] * xi - a [0 ] * yi );
426+ mem [1 ] = (b [1 ] * xi - a [1 ] * yi );
434427 y [i ] = yi ;
435428 }
436429}
@@ -541,20 +534,21 @@ int main(int argc, char **argv) {
541534 int vad_cnt = 0 ;
542535 int gain_change_count = 0 ;
543536 float speech_gain = 1 , noise_gain = 1 ;
544- FILE * f1 , * f2 , * fout ;
537+ FILE * f1 , * f2 ;
538+ int maxCount ;
545539 DenoiseState * st ;
546540 DenoiseState * noise_state ;
547541 DenoiseState * noisy ;
548- st = rnnoise_create ();
549- noise_state = rnnoise_create ();
550- noisy = rnnoise_create ();
542+ st = rnnoise_create (NULL );
543+ noise_state = rnnoise_create (NULL );
544+ noisy = rnnoise_create (NULL );
551545 if (argc != 4 ) {
552- fprintf (stderr , "usage: %s <speech> <noise> <output denoised >\n" , argv [0 ]);
546+ fprintf (stderr , "usage: %s <speech> <noise> <count >\n" , argv [0 ]);
553547 return 1 ;
554548 }
555549 f1 = fopen (argv [1 ], "r" );
556550 f2 = fopen (argv [2 ], "r" );
557- fout = fopen (argv [3 ], "w" );
551+ maxCount = atoi (argv [3 ]);
558552 for (i = 0 ;i < 150 ;i ++ ) {
559553 short tmp [FRAME_SIZE ];
560554 fread (tmp , sizeof (short ), FRAME_SIZE , f2 );
@@ -566,12 +560,11 @@ int main(int argc, char **argv) {
566560 float Ln [NB_BANDS ];
567561 float features [NB_FEATURES ];
568562 float g [NB_BANDS ];
569- float gf [FREQ_SIZE ]= {1 };
570563 short tmp [FRAME_SIZE ];
571564 float vad = 0 ;
572- float vad_prob ;
573565 float E = 0 ;
574- if (count == 50000000 ) break ;
566+ if (count == maxCount ) break ;
567+ if ((count %1000 )== 0 ) fprintf (stderr , "%d\r" , count );
575568 if (++ gain_change_count > 2821 ) {
576569 speech_gain = pow (10. , (-40 + (rand ()%60 ))/20. );
577570 noise_gain = pow (10. , (-30 + (rand ()%50 ))/20. );
@@ -646,37 +639,16 @@ int main(int argc, char **argv) {
646639 if (vad == 0 && noise_gain == 0 ) g [i ] = -1 ;
647640 }
648641 count ++ ;
649- #if 0
650- for (i = 0 ;i < NB_FEATURES ;i ++ ) printf ("%f " , features [i ]);
651- for (i = 0 ;i < NB_BANDS ;i ++ ) printf ("%f " , g [i ]);
652- for (i = 0 ;i < NB_BANDS ;i ++ ) printf ("%f " , Ln [i ]);
653- printf ("%f\n" , vad );
654- #endif
655642#if 1
656643 fwrite (features , sizeof (float ), NB_FEATURES , stdout );
657644 fwrite (g , sizeof (float ), NB_BANDS , stdout );
658645 fwrite (Ln , sizeof (float ), NB_BANDS , stdout );
659646 fwrite (& vad , sizeof (float ), 1 , stdout );
660- #endif
661- #if 0
662- compute_rnn (& noisy -> rnn , g , & vad_prob , features );
663- interp_band_gain (gf , g );
664- #if 1
665- for (i = 0 ;i < FREQ_SIZE ;i ++ ) {
666- X [i ].r *= gf [i ];
667- X [i ].i *= gf [i ];
668- }
669- #endif
670- frame_synthesis (noisy , xn , X );
671-
672- for (i = 0 ;i < FRAME_SIZE ;i ++ ) tmp [i ] = xn [i ];
673- fwrite (tmp , sizeof (short ), FRAME_SIZE , fout );
674647#endif
675648 }
676649 fprintf (stderr , "matrix size: %d x %d\n" , count , NB_FEATURES + 2 * NB_BANDS + 1 );
677650 fclose (f1 );
678651 fclose (f2 );
679- fclose (fout );
680652 return 0 ;
681653}
682654
0 commit comments