@@ -1099,9 +1099,9 @@ static int sgemvBatch(cb_order order, cb_transpose transA,
10991099 args [8 ] = & N ;
11001100
11011101 if (transA == cb_no_trans ) {
1102- err = GpuKernel_call (& ((blas_handle * )ctx -> blas_handle )-> sgemvBH_N_a1_b1_small , 2 , ls , gs , 0 , args );
1102+ err = GpuKernel_call (& ((blas_handle * )ctx -> blas_handle )-> sgemvBH_N_a1_b1_small , 2 , gs , ls , 0 , args );
11031103 } else {
1104- err = GpuKernel_call (& ((blas_handle * )ctx -> blas_handle )-> sgemvBH_T_a1_b1_small , 2 , ls , gs , 0 , args );
1104+ err = GpuKernel_call (& ((blas_handle * )ctx -> blas_handle )-> sgemvBH_T_a1_b1_small , 2 , gs , ls , 0 , args );
11051105 }
11061106
11071107 cuda_ops .buffer_release (Aa );
@@ -1223,9 +1223,9 @@ static int dgemvBatch(cb_order order, cb_transpose transA,
12231223 args [8 ] = & N ;
12241224
12251225 if (transA == cb_no_trans ) {
1226- err = GpuKernel_call (& ((blas_handle * )ctx -> blas_handle )-> dgemvBH_N_a1_b1_small , 2 , ls , gs , 0 , args );
1226+ err = GpuKernel_call (& ((blas_handle * )ctx -> blas_handle )-> dgemvBH_N_a1_b1_small , 2 , gs , ls , 0 , args );
12271227 } else {
1228- err = GpuKernel_call (& ((blas_handle * )ctx -> blas_handle )-> dgemvBH_T_a1_b1_small , 2 , ls , gs , 0 , args );
1228+ err = GpuKernel_call (& ((blas_handle * )ctx -> blas_handle )-> dgemvBH_T_a1_b1_small , 2 , gs , ls , 0 , args );
12291229 }
12301230
12311231 cuda_ops .buffer_release (Aa );
@@ -1486,7 +1486,7 @@ static int sgerBatch(cb_order order, size_t M, size_t N, float alpha,
14861486 args [8 ] = & M ;
14871487 args [9 ] = & N ;
14881488
1489- err = GpuKernel_call (& ((blas_handle * )ctx -> blas_handle )-> sgerBH_gen_small , 3 , ls , gs , 0 , args );
1489+ err = GpuKernel_call (& ((blas_handle * )ctx -> blas_handle )-> sgerBH_gen_small , 3 , gs , ls , 0 , args );
14901490
14911491 cuda_ops .buffer_release (Aa );
14921492 cuda_ops .buffer_release (xa );
@@ -1618,7 +1618,7 @@ static int dgerBatch(cb_order order, size_t M, size_t N, double alpha,
16181618 args [8 ] = & M ;
16191619 args [9 ] = & N ;
16201620
1621- err = GpuKernel_call (& ((blas_handle * )ctx -> blas_handle )-> sgerBH_gen_small , 3 , ls , gs , 0 , args );
1621+ err = GpuKernel_call (& ((blas_handle * )ctx -> blas_handle )-> sgerBH_gen_small , 3 , gs , ls , 0 , args );
16221622
16231623 cuda_ops .buffer_release (Aa );
16241624 cuda_ops .buffer_release (xa );
0 commit comments