@@ -727,7 +727,7 @@ __device__ __forceinline__ float vec_dot_iq3_ks_q8_1(
727727
728728}
729729
730- /* __device__ __forceinline__ float vec_dot_iq1_bn_q8_1(
730+ __device__ __forceinline__ float vec_dot_iq1_bn_q8_1 (
731731 const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & kbx, const int & iqs) {
732732
733733 half d16; memcpy (&d16, vbq, sizeof (d16));
@@ -784,9 +784,9 @@ __device__ __forceinline__ float vec_dot_iq3_ks_q8_1(
784784 }
785785 return scale * __low2float (bq8_1[iqs].ds ) * sumi;
786786#endif
787- } */
787+ }
788788
789- /* __device__ __forceinline__ float vec_dot_iq2_bn_q8_1(
789+ __device__ __forceinline__ float vec_dot_iq2_bn_q8_1 (
790790 const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & kbx, const int & iqs) {
791791
792792 float scale = *(const float *)vbq;
@@ -825,7 +825,7 @@ __device__ __forceinline__ float vec_dot_iq3_ks_q8_1(
825825 auto d8h = __half22float2 (bq8_1[1 ].ds );
826826 return scale * (d8l.x * (sumi1 + 0 .25f *sumi2) + 0 .0625f * d8h.x *(sumi3 + 0 .25f *sumi4) - 0 .5f *d8l.y - 0 .5f *d8h.y );
827827#endif
828- } */
828+ }
829829
830830} // namespace
831831
@@ -906,16 +906,16 @@ void mul_mat_vec_iq6_k_q8_1_cuda(
906906 iqk_mul_mat_vec_q_cuda<GGML_TYPE_IQ6_K, VDR_IQ6_K_Q8_1_MMVQ, vec_dot_iq6_k_q8_1>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst, stream);
907907}
908908
909- // void mul_mat_vec_iq1_bn_q8_1_cuda(
910- // const void * vx, const void * vy, float * dst,
911- // const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst, cudaStream_t stream) {
909+ void mul_mat_vec_iq1_bn_q8_1_cuda (
910+ const void * vx, const void * vy, float * dst,
911+ const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst, cudaStream_t stream) {
912912
913- // iqk_mul_mat_vec_q_cuda<GGML_TYPE_IQ1_BN, 1, vec_dot_iq1_bn_q8_1>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst, stream);
914- // }
913+ iqk_mul_mat_vec_q_cuda<GGML_TYPE_IQ1_BN, 1 , vec_dot_iq1_bn_q8_1>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst, stream);
914+ }
915915
916- // void mul_mat_vec_iq2_bn_q8_1_cuda(
917- // const void * vx, const void * vy, float * dst,
918- // const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst, cudaStream_t stream) {
916+ void mul_mat_vec_iq2_bn_q8_1_cuda (
917+ const void * vx, const void * vy, float * dst,
918+ const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst, cudaStream_t stream) {
919919
920- // iqk_mul_mat_vec_q_cuda<GGML_TYPE_IQ2_BN, 1, vec_dot_iq2_bn_q8_1>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst, stream);
921- // }
920+ iqk_mul_mat_vec_q_cuda<GGML_TYPE_IQ2_BN, 1 , vec_dot_iq2_bn_q8_1>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst, stream);
921+ }
0 commit comments