We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 984b694 commit 13b3a75Copy full SHA for 13b3a75
python/triton_kernels/triton_kernels/tensor_details/layout_details/hopper_scale.py
@@ -34,7 +34,7 @@ def swizzle_data(self, data):
34
data = data.flatten(-3, -2)
35
assert data.shape[-2] == M // 32
36
assert data.shape[-1] == K * 32
37
- data = data.transpose(-1, -2).contiguous()
+ data = data.contiguous().transpose(-1, -2)
38
return data
39
40
def unswizzle_data(self, data):
@@ -46,7 +46,7 @@ def unswizzle_data(self, data):
46
perm = list(range(b)) + [b + p for p in perm]
47
data = data.permute(*perm)
48
data = data.reshape(*batch, M * 32, K // 32)
49
50
51
52
def swizzle_block_shape(self, block_shape):
0 commit comments