We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 81a5f62 + 1529608 commit 831021bCopy full SHA for 831021b
src/infiniop/ops/rms_norm/cuda/kernel.cuh
@@ -22,7 +22,7 @@ __device__ void rmsnormBlock(
22
// Thread_0 computes RMS=1/sqrt(ss/dim+epsilon) and stores in shared memory
23
__shared__ Tcompute rms;
24
if (threadIdx.x == 0) {
25
- rms = Tdata(rsqrtf(ss / Tcompute(dim) + epsilon));
+ rms = Tcompute(rsqrtf(ss / Tcompute(dim) + epsilon));
26
}
27
__syncthreads();
28
0 commit comments