@@ -133,7 +133,7 @@ static void vec_sigmoid(float *y, const float *x, int N)
133133 }
134134}
135135
136- static void gemm_accum16 (float * out , const float * weights , int rows , int cols , int col_stride , const float * x )
136+ static void sgemv_accum16 (float * out , const float * weights , int rows , int cols , int col_stride , const float * x )
137137{
138138 int i , j ;
139139 for (i = 0 ;i < rows ;i += 16 )
@@ -159,7 +159,7 @@ static void gemm_accum16(float *out, const float *weights, int rows, int cols, i
159159 _mm256_storeu_ps (& y [8 ], vy8 );
160160 }
161161}
162- static void sparse_gemm_accum16 (float * out , const float * weights , int rows , const int * idx , const float * x )
162+ static void sparse_sgemv_accum16 (float * out , const float * weights , int rows , const int * idx , const float * x )
163163{
164164 int i , j ;
165165 for (i = 0 ;i < rows ;i += 16 )
@@ -277,7 +277,7 @@ static void vec_sigmoid(float *y, const float *x, int N)
277277 }
278278}
279279
280- static void gemm_accum16 (float * out , const float * weights , int rows , int cols , int col_stride , const float * x )
280+ static void sgemv_accum16 (float * out , const float * weights , int rows , int cols , int col_stride , const float * x )
281281{
282282 int i , j ;
283283 for (i = 0 ;i < rows ;i += 16 )
@@ -310,7 +310,7 @@ static void gemm_accum16(float *out, const float *weights, int rows, int cols, i
310310 }
311311}
312312
313- static void sparse_gemm_accum16 (float * out , const float * w , int rows , const int * idx , const float * x )
313+ static void sparse_sgemv_accum16 (float * out , const float * w , int rows , const int * idx , const float * x )
314314{
315315 int i , j ;
316316 for (i = 0 ;i < rows ;i += 16 )
@@ -353,12 +353,12 @@ static OPUS_INLINE float relu(float x)
353353}
354354
355355
356- static void gemm_accum (float * out , const float * weights , int rows , int cols , int col_stride , const float * x )
356+ static void sgemv_accum (float * out , const float * weights , int rows , int cols , int col_stride , const float * x )
357357{
358358 int i , j ;
359359 if (rows % 16 == 0 )
360360 {
361- gemm_accum16 (out , weights , rows , cols , col_stride , x );
361+ sgemv_accum16 (out , weights , rows , cols , col_stride , x );
362362 } else {
363363 for (i = 0 ;i < rows ;i ++ )
364364 {
@@ -410,7 +410,7 @@ void compute_dense(const DenseLayer *layer, float *output, const float *input)
410410 celt_assert (input != output );
411411 for (i = 0 ;i < N ;i ++ )
412412 output [i ] = layer -> bias [i ];
413- gemm_accum (output , layer -> input_weights , N , M , stride , input );
413+ sgemv_accum (output , layer -> input_weights , N , M , stride , input );
414414 compute_activation (output , output , N , layer -> activation );
415415}
416416
@@ -428,7 +428,7 @@ void compute_mdense(const MDenseLayer *layer, float *output, const float *input)
428428 stride = N * C ;
429429 for (i = 0 ;i < N * C ;i ++ )
430430 tmp [i ] = layer -> bias [i ];
431- gemm_accum (tmp , layer -> input_weights , N * C , M , stride , input );
431+ sgemv_accum (tmp , layer -> input_weights , N * C , M , stride , input );
432432 compute_activation (tmp , tmp , N * C , ACTIVATION_TANH );
433433 for (i = 0 ;i < N ;i ++ )
434434 output [i ] = 0 ;
@@ -462,8 +462,8 @@ void compute_gru(const GRULayer *gru, float *state, const float *input)
462462 for (i = 0 ;i < N ;i ++ )
463463 z [i ] += gru -> bias [3 * N + i ];
464464 }
465- gemm_accum (z , gru -> input_weights , N , M , stride , input );
466- gemm_accum (z , gru -> recurrent_weights , N , N , stride , state );
465+ sgemv_accum (z , gru -> input_weights , N , M , stride , input );
466+ sgemv_accum (z , gru -> recurrent_weights , N , N , stride , state );
467467 compute_activation (z , z , N , ACTIVATION_SIGMOID );
468468
469469 /* Compute reset gate. */
@@ -474,8 +474,8 @@ void compute_gru(const GRULayer *gru, float *state, const float *input)
474474 for (i = 0 ;i < N ;i ++ )
475475 r [i ] += gru -> bias [4 * N + i ];
476476 }
477- gemm_accum (r , & gru -> input_weights [N ], N , M , stride , input );
478- gemm_accum (r , & gru -> recurrent_weights [N ], N , N , stride , state );
477+ sgemv_accum (r , & gru -> input_weights [N ], N , M , stride , input );
478+ sgemv_accum (r , & gru -> recurrent_weights [N ], N , N , stride , state );
479479 compute_activation (r , r , N , ACTIVATION_SIGMOID );
480480
481481 /* Compute output. */
@@ -485,15 +485,15 @@ void compute_gru(const GRULayer *gru, float *state, const float *input)
485485 {
486486 for (i = 0 ;i < N ;i ++ )
487487 tmp [i ] = gru -> bias [5 * N + i ];
488- gemm_accum (tmp , & gru -> recurrent_weights [2 * N ], N , N , stride , state );
488+ sgemv_accum (tmp , & gru -> recurrent_weights [2 * N ], N , N , stride , state );
489489 for (i = 0 ;i < N ;i ++ )
490490 h [i ] += tmp [i ] * r [i ];
491- gemm_accum (h , & gru -> input_weights [2 * N ], N , M , stride , input );
491+ sgemv_accum (h , & gru -> input_weights [2 * N ], N , M , stride , input );
492492 } else {
493493 for (i = 0 ;i < N ;i ++ )
494494 tmp [i ] = state [i ] * r [i ];
495- gemm_accum (h , & gru -> input_weights [2 * N ], N , M , stride , input );
496- gemm_accum (h , & gru -> recurrent_weights [2 * N ], N , N , stride , tmp );
495+ sgemv_accum (h , & gru -> input_weights [2 * N ], N , M , stride , input );
496+ sgemv_accum (h , & gru -> recurrent_weights [2 * N ], N , N , stride , tmp );
497497 }
498498 compute_activation (h , h , N , gru -> activation );
499499 for (i = 0 ;i < N ;i ++ )
@@ -524,10 +524,10 @@ void compute_gru2(const GRULayer *gru, float *state, const float *input)
524524 /* Compute update gate. */
525525 for (i = 0 ;i < 3 * N ;i ++ )
526526 zrh [i ] = gru -> bias [i ];
527- gemm_accum (zrh , gru -> input_weights , 3 * N , M , stride , input );
527+ sgemv_accum (zrh , gru -> input_weights , 3 * N , M , stride , input );
528528 for (i = 0 ;i < 3 * N ;i ++ )
529529 recur [i ] = gru -> bias [3 * N + i ];
530- gemm_accum (recur , gru -> recurrent_weights , 3 * N , N , stride , state );
530+ sgemv_accum (recur , gru -> recurrent_weights , 3 * N , N , stride , state );
531531 for (i = 0 ;i < 2 * N ;i ++ )
532532 zrh [i ] += recur [i ];
533533 compute_activation (zrh , zrh , 2 * N , ACTIVATION_SIGMOID );
@@ -561,7 +561,7 @@ void compute_gru3(const GRULayer *gru, float *state, const float *input)
561561 RNN_COPY (zrh , input , 3 * N );
562562 for (i = 0 ;i < 3 * N ;i ++ )
563563 recur [i ] = gru -> bias [3 * N + i ];
564- gemm_accum (recur , gru -> recurrent_weights , 3 * N , N , stride , state );
564+ sgemv_accum (recur , gru -> recurrent_weights , 3 * N , N , stride , state );
565565 for (i = 0 ;i < 2 * N ;i ++ )
566566 zrh [i ] += recur [i ];
567567 compute_activation (zrh , zrh , 2 * N , ACTIVATION_SIGMOID );
@@ -598,7 +598,7 @@ void compute_sparse_gru(const SparseGRULayer *gru, float *state, const float *in
598598 for (i = 0 ;i < N ;i ++ )
599599 recur [k * N + i ] += gru -> diag_weights [k * N + i ]* state [i ];
600600 }
601- sparse_gemm_accum16 (recur , gru -> recurrent_weights , 3 * N , gru -> idx , state );
601+ sparse_sgemv_accum16 (recur , gru -> recurrent_weights , 3 * N , gru -> idx , state );
602602 for (i = 0 ;i < 2 * N ;i ++ )
603603 zrh [i ] += recur [i ];
604604 compute_activation (zrh , zrh , 2 * N , ACTIVATION_SIGMOID );
@@ -626,7 +626,7 @@ void compute_conv1d(const Conv1DLayer *layer, float *output, float *mem, const f
626626 stride = N ;
627627 for (i = 0 ;i < N ;i ++ )
628628 output [i ] = layer -> bias [i ];
629- gemm_accum (output , layer -> input_weights , N , M , stride , tmp );
629+ sgemv_accum (output , layer -> input_weights , N , M , stride , tmp );
630630 compute_activation (output , output , N , layer -> activation );
631631 RNN_COPY (mem , & tmp [layer -> nb_inputs ], layer -> nb_inputs * (layer -> kernel_size - 1 ));
632632}
0 commit comments