We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ed1db7a commit b350fdeCopy full SHA for b350fde
fms_mo/custom_ext_kernels/triton_kernels.py
@@ -235,7 +235,7 @@ def imatmul_kernel(
235
accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.int32)
236
## ------ prepare LSB rounding/truncation masks -------
237
round_bit = 1 << (chunk_trun_bits - 1) if chunk_trun_bits > 0 else 0
238
- msb_mask = 0x00FFFFFF # only needed when simulating truncation on MSB
+ # msb_mask = 0x00FFFFFF # only needed when simulating truncation on MSB
239
## ---------------------------------------------------------
240
241
for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)):
0 commit comments