We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 241350a commit c9ecde7Copy full SHA for c9ecde7
ggml/src/ggml-cuda/cpy.cu
@@ -85,7 +85,8 @@ static __device__ void cpy_blck_f32_q8_0(const char * cxi, char * cdsti) {
85
static __device__ void cpy_blck_q8_0_f32(const char * cxi, char * cdsti) {
86
float * cdstf = (float *)(cdsti);
87
88
- for (int j = 0; j < QK8_0; j+=2) {
+#pragma unroll
89
+ for (int j = 0; j < QK8_0; j += 2) {
90
float2 dq;
91
dequantize_q8_0(cxi, 0, j, dq);
92
*(cdstf + j) = dq.x;
0 commit comments