@@ -32,17 +32,15 @@ __device__ inline void mma_sp(const FragB& a_frag0, const FragB& a_frag1,
3232 float * c = reinterpret_cast <float *>(&frag_c);
3333 if (psel == 0 ) {
3434 asm volatile (
35- " mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
36- " f32 "
35+ " mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
3736 " {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
3837 " {%12,%13,%14,%15}, %16, 0x0;\n "
3938 : " =f" (c[0 ]), " =f" (c[1 ]), " =f" (c[2 ]), " =f" (c[3 ])
4039 : " r" (a0[0 ]), " r" (a1[0 ]), " r" (a0[1 ]), " r" (a1[1 ]), " r" (b[0 ]), " r" (b[2 ]),
4140 " r" (b[4 ]), " r" (b[6 ]), " f" (c[0 ]), " f" (c[1 ]), " f" (c[2 ]), " f" (c[3 ]),
4241 " r" (e[0 ]));
4342 asm volatile (
44- " mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
45- " f32 "
43+ " mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
4644 " {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
4745 " {%12,%13,%14,%15}, %16, 0x0;\n "
4846 : " =f" (c[4 ]), " =f" (c[5 ]), " =f" (c[6 ]), " =f" (c[7 ])
@@ -51,17 +49,15 @@ __device__ inline void mma_sp(const FragB& a_frag0, const FragB& a_frag1,
5149 " r" (e[0 ]));
5250 } else {
5351 asm volatile (
54- " mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
55- " f32 "
52+ " mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
5653 " {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
5754 " {%12,%13,%14,%15}, %16, 0x1;\n "
5855 : " =f" (c[0 ]), " =f" (c[1 ]), " =f" (c[2 ]), " =f" (c[3 ])
5956 : " r" (a0[0 ]), " r" (a1[0 ]), " r" (a0[1 ]), " r" (a1[1 ]), " r" (b[0 ]), " r" (b[2 ]),
6057 " r" (b[4 ]), " r" (b[6 ]), " f" (c[0 ]), " f" (c[1 ]), " f" (c[2 ]), " f" (c[3 ]),
6158 " r" (e[0 ]));
6259 asm volatile (
63- " mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
64- " f32 "
60+ " mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
6561 " {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
6662 " {%12,%13,%14,%15}, %16, 0x1;\n "
6763 : " =f" (c[4 ]), " =f" (c[5 ]), " =f" (c[6 ]), " =f" (c[7 ])
0 commit comments