Skip to content

Commit e9acb6c

Browse files
committed
GFX9 has SDWA instruction while GFX10 does not. Add tests to
verify.
1 parent bccf7eb commit e9acb6c

File tree

3 files changed

+82
-2
lines changed

3 files changed

+82
-2
lines changed

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2561,8 +2561,7 @@ defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_s
25612561
defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>;
25622562
defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>;
25632563
defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>;
2564-
2565-
defm V_PK_FMAC_F16 : VOP2_Real_e32e64_gfx9<0x03c>;
2564+
defm V_PK_FMAC_F16 : VOP2_Real_e32e64_gfx9<0x03c>;
25662565
} // End AssemblerPredicate = isGFX9Only
25672566

25682567
defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>;

llvm/test/MC/AMDGPU/gfx10_unsupported_sdwa.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ v_min_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD s
3232
v_mul_lo_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
3333
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
3434

35+
v_pk_fmac_f16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
36+
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
37+
3538
v_sub_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
3639
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
3740

llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,81 @@ v_pk_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3]
1111

1212
v_pk_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
1313
// CHECK-MI: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00]
14+
15+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
16+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06]
17+
18+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
19+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06]
20+
21+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
22+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x00,0x06,0x06]
23+
24+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
25+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x01,0x06,0x06]
26+
27+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
28+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x02,0x06,0x06]
29+
30+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
31+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x03,0x06,0x06]
32+
33+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
34+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x04,0x06,0x06]
35+
36+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
37+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x05,0x06,0x06]
38+
39+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD src1_sel:DWORD
40+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x0e,0x06,0x06]
41+
42+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD
43+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x16,0x06,0x06]
44+
45+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD
46+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x16,0x06,0x06]
47+
48+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
49+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06]
50+
51+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
52+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x00,0x06]
53+
54+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
55+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x01,0x06]
56+
57+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD
58+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x02,0x06]
59+
60+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
61+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x03,0x06]
62+
63+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
64+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x04,0x06]
65+
66+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
67+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x05,0x06]
68+
69+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
70+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06]
71+
72+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
73+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x00]
74+
75+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
76+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x01]
77+
78+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
79+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x02]
80+
81+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
82+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x03]
83+
84+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
85+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x04]
86+
87+
v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
88+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x05]
89+
90+
v_pk_fmac_f16_sdwa v5, v1, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
91+
// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x16]

0 commit comments

Comments
 (0)