We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3b8100c commit 51a2590Copy full SHA for 51a2590
ggml/src/ggml-cuda/cpy.cu
@@ -81,7 +81,7 @@ static __global__ void cpy_flt_transpose(const char * cx, char * cdst, const int
81
for (int j = 0; j < CUDA_CPY_TILE_DIM_2D; j += CUDA_CPY_BLOCK_ROWS) {
82
if (ty + j < ne01 && tx < ne00) {
83
const int col = ((threadIdx.y+j)*sizeof(float)/sizeof(T)) ^ threadIdx.x; //swizzling to avoid bank conflicts
84
- T *tile2 = reinterpret_cast<T*>(tile[threadIdx.x]);
+ const T *tile2 = reinterpret_cast<const T*>(tile[threadIdx.x]);
85
dst[imat*n + (ty+j)*ne00 + tx] = tile2[col];
86
}
87
0 commit comments