@@ -32,17 +32,15 @@ __device__ inline void mma_sp(const FragB& a_frag0, const FragB& a_frag1,
32
32
float * c = reinterpret_cast <float *>(&frag_c);
33
33
if (psel == 0 ) {
34
34
asm volatile (
35
- " mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
36
- " f32 "
35
+ " mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
37
36
" {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
38
37
" {%12,%13,%14,%15}, %16, 0x0;\n "
39
38
: " =f" (c[0 ]), " =f" (c[1 ]), " =f" (c[2 ]), " =f" (c[3 ])
40
39
: " r" (a0[0 ]), " r" (a1[0 ]), " r" (a0[1 ]), " r" (a1[1 ]), " r" (b[0 ]), " r" (b[2 ]),
41
40
" r" (b[4 ]), " r" (b[6 ]), " f" (c[0 ]), " f" (c[1 ]), " f" (c[2 ]), " f" (c[3 ]),
42
41
" r" (e[0 ]));
43
42
asm volatile (
44
- " mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
45
- " f32 "
43
+ " mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
46
44
" {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
47
45
" {%12,%13,%14,%15}, %16, 0x0;\n "
48
46
: " =f" (c[4 ]), " =f" (c[5 ]), " =f" (c[6 ]), " =f" (c[7 ])
@@ -51,17 +49,15 @@ __device__ inline void mma_sp(const FragB& a_frag0, const FragB& a_frag1,
51
49
" r" (e[0 ]));
52
50
} else {
53
51
asm volatile (
54
- " mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
55
- " f32 "
52
+ " mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
56
53
" {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
57
54
" {%12,%13,%14,%15}, %16, 0x1;\n "
58
55
: " =f" (c[0 ]), " =f" (c[1 ]), " =f" (c[2 ]), " =f" (c[3 ])
59
56
: " r" (a0[0 ]), " r" (a1[0 ]), " r" (a0[1 ]), " r" (a1[1 ]), " r" (b[0 ]), " r" (b[2 ]),
60
57
" r" (b[4 ]), " r" (b[6 ]), " f" (c[0 ]), " f" (c[1 ]), " f" (c[2 ]), " f" (c[3 ]),
61
58
" r" (e[0 ]));
62
59
asm volatile (
63
- " mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
64
- " f32 "
60
+ " mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
65
61
" {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
66
62
" {%12,%13,%14,%15}, %16, 0x1;\n "
67
63
: " =f" (c[4 ]), " =f" (c[5 ]), " =f" (c[6 ]), " =f" (c[7 ])
0 commit comments