We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a4443d3 commit a320ce7Copy full SHA for a320ce7
ggml/src/ggml-cuda/cpy.cu
@@ -98,6 +98,7 @@ template<dequantize_kernel_t dequant, int qk>
98
static __device__ void cpy_blck_q_f32(const char * cxi, char * cdsti) {
99
float * cdstf = (float *)(cdsti);
100
101
+#pragma unroll
102
for (int j = 0; j < qk/2; j++) {
103
float2 dq;
104
dequant(cxi, 0, j, dq);
0 commit comments