Skip to content

Commit e9d3aa0

Browse files
authored
Revert "[Kernel] Marlin_24: Ensure the mma.sp instruction is using the ::ordered_metadata modifier (introduced with PTX 8.5)" (#5149)
1 parent a22dea5 commit e9d3aa0

File tree

1 file changed

+4
-8
lines changed
  • csrc/quantization/marlin/sparse/common

1 file changed

+4
-8
lines changed

csrc/quantization/marlin/sparse/common/mma.h

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,15 @@ __device__ inline void mma_sp(const FragB& a_frag0, const FragB& a_frag1,
3232
float* c = reinterpret_cast<float*>(&frag_c);
3333
if (psel == 0) {
3434
asm volatile(
35-
"mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
36-
"f32 "
35+
"mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
3736
"{%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
3837
"{%12,%13,%14,%15}, %16, 0x0;\n"
3938
: "=f"(c[0]), "=f"(c[1]), "=f"(c[2]), "=f"(c[3])
4039
: "r"(a0[0]), "r"(a1[0]), "r"(a0[1]), "r"(a1[1]), "r"(b[0]), "r"(b[2]),
4140
"r"(b[4]), "r"(b[6]), "f"(c[0]), "f"(c[1]), "f"(c[2]), "f"(c[3]),
4241
"r"(e[0]));
4342
asm volatile(
44-
"mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
45-
"f32 "
43+
"mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
4644
"{%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
4745
"{%12,%13,%14,%15}, %16, 0x0;\n"
4846
: "=f"(c[4]), "=f"(c[5]), "=f"(c[6]), "=f"(c[7])
@@ -51,17 +49,15 @@ __device__ inline void mma_sp(const FragB& a_frag0, const FragB& a_frag1,
5149
"r"(e[0]));
5250
} else {
5351
asm volatile(
54-
"mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
55-
"f32 "
52+
"mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
5653
"{%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
5754
"{%12,%13,%14,%15}, %16, 0x1;\n"
5855
: "=f"(c[0]), "=f"(c[1]), "=f"(c[2]), "=f"(c[3])
5956
: "r"(a0[0]), "r"(a1[0]), "r"(a0[1]), "r"(a1[1]), "r"(b[0]), "r"(b[2]),
6057
"r"(b[4]), "r"(b[6]), "f"(c[0]), "f"(c[1]), "f"(c[2]), "f"(c[3]),
6158
"r"(e[0]));
6259
asm volatile(
63-
"mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
64-
"f32 "
60+
"mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
6561
"{%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
6662
"{%12,%13,%14,%15}, %16, 0x1;\n"
6763
: "=f"(c[4]), "=f"(c[5]), "=f"(c[6]), "=f"(c[7])

0 commit comments

Comments
 (0)