@@ -32,15 +32,17 @@ __device__ inline void mma_sp(const FragB& a_frag0, const FragB& a_frag1,
32
32
float * c = reinterpret_cast <float *>(&frag_c);
33
33
if (psel == 0 ) {
34
34
asm volatile (
35
- " mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
35
+ " mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
36
+ " f32 "
36
37
" {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
37
38
" {%12,%13,%14,%15}, %16, 0x0;\n "
38
39
: " =f" (c[0 ]), " =f" (c[1 ]), " =f" (c[2 ]), " =f" (c[3 ])
39
40
: " r" (a0[0 ]), " r" (a1[0 ]), " r" (a0[1 ]), " r" (a1[1 ]), " r" (b[0 ]), " r" (b[2 ]),
40
41
" r" (b[4 ]), " r" (b[6 ]), " f" (c[0 ]), " f" (c[1 ]), " f" (c[2 ]), " f" (c[3 ]),
41
42
" r" (e[0 ]));
42
43
asm volatile (
43
- " mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
44
+ " mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
45
+ " f32 "
44
46
" {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
45
47
" {%12,%13,%14,%15}, %16, 0x0;\n "
46
48
: " =f" (c[4 ]), " =f" (c[5 ]), " =f" (c[6 ]), " =f" (c[7 ])
@@ -49,15 +51,17 @@ __device__ inline void mma_sp(const FragB& a_frag0, const FragB& a_frag1,
49
51
" r" (e[0 ]));
50
52
} else {
51
53
asm volatile (
52
- " mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
54
+ " mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
55
+ " f32 "
53
56
" {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
54
57
" {%12,%13,%14,%15}, %16, 0x1;\n "
55
58
: " =f" (c[0 ]), " =f" (c[1 ]), " =f" (c[2 ]), " =f" (c[3 ])
56
59
: " r" (a0[0 ]), " r" (a1[0 ]), " r" (a0[1 ]), " r" (a1[1 ]), " r" (b[0 ]), " r" (b[2 ]),
57
60
" r" (b[4 ]), " r" (b[6 ]), " f" (c[0 ]), " f" (c[1 ]), " f" (c[2 ]), " f" (c[3 ]),
58
61
" r" (e[0 ]));
59
62
asm volatile (
60
- " mma.sp.sync.aligned.m16n8k32.row.col.f32.f16.f16.f32 "
63
+ " mma.sp::ordered_metadata.sync.aligned.m16n8k32.row.col.f32.f16.f16."
64
+ " f32 "
61
65
" {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9, %10,%11}, "
62
66
" {%12,%13,%14,%15}, %16, 0x1;\n "
63
67
: " =f" (c[4 ]), " =f" (c[5 ]), " =f" (c[6 ]), " =f" (c[7 ])
0 commit comments