Skip to content

Commit 07bafab

Browse files
authored
[AMDGPU] Do not generate V_FMAC_DX9_ZERO_F32 on GFX12 (#171116)
GFX12 does not have the FMAC form of this instruction, only the FMA form. Fixes: #170437
1 parent 33d779d commit 07bafab

File tree

4 files changed

+88
-3
lines changed

4 files changed

+88
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2927,6 +2927,9 @@ def HasMadMacF32Insts : Predicate<"Subtarget->hasMadMacF32Insts()">,
29272927
def HasFmaLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts()">,
29282928
AssemblerPredicate<(any_of FeatureGFX10_3Insts)>;
29292929

2930+
def HasFmacLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts() && Subtarget->getGeneration() < AMDGPUSubtarget::GFX12">,
2931+
AssemblerPredicate<(all_of FeatureGFX10_3Insts, (not FeatureGFX12Insts))>;
2932+
29302933
def HasAtomicDsPkAdd16Insts : Predicate<"Subtarget->hasAtomicDsPkAdd16Insts()">,
29312934
AssemblerPredicate<(any_of FeatureAtomicDsPkAdd16Insts)>;
29322935

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1451,7 +1451,7 @@ def : GCNPat <
14511451

14521452
// Don't allow source modifiers. If there are any source modifiers then it's
14531453
// better to select fma instead of fmac.
1454-
let SubtargetPredicate = HasFmaLegacy32 in
1454+
let SubtargetPredicate = HasFmacLegacy32 in
14551455
def : GCNPat <
14561456
(f32 (int_amdgcn_fma_legacy (VOP3NoMods f32:$src0),
14571457
(VOP3NoMods f32:$src1),

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1266,14 +1266,14 @@ let Constraints = "$vdst = $src2",
12661266
defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">;
12671267
} // End SubtargetPredicate = HasDLInsts
12681268

1269-
let SubtargetPredicate = HasFmaLegacy32 in {
1269+
let SubtargetPredicate = HasFmacLegacy32 in {
12701270

12711271
let Constraints = "$vdst = $src2",
12721272
isConvertibleToThreeAddress = 1,
12731273
isCommutable = 1 in
12741274
defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>;
12751275

1276-
} // End SubtargetPredicate = HasFmaLegacy32
1276+
} // End SubtargetPredicate = HasFmacLegacy32
12771277

12781278
let SubtargetPredicate = HasFmacF64Inst,
12791279
Constraints = "$vdst = $src2",

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s
44
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
55
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
6+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
7+
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
68

79
define float @v_fma(float %a, float %b, float %c) {
810
; GFX10-LABEL: v_fma:
@@ -16,6 +18,16 @@ define float @v_fma(float %a, float %b, float %c) {
1618
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1719
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, v1, v2
1820
; GFX11-NEXT: s_setpc_b64 s[30:31]
21+
;
22+
; GFX12-LABEL: v_fma:
23+
; GFX12: ; %bb.0:
24+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
25+
; GFX12-NEXT: s_wait_expcnt 0x0
26+
; GFX12-NEXT: s_wait_samplecnt 0x0
27+
; GFX12-NEXT: s_wait_bvhcnt 0x0
28+
; GFX12-NEXT: s_wait_kmcnt 0x0
29+
; GFX12-NEXT: v_fma_dx9_zero_f32 v0, v0, v1, v2
30+
; GFX12-NEXT: s_setpc_b64 s[30:31]
1931
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float %b, float %c)
2032
ret float %fma
2133
}
@@ -32,6 +44,16 @@ define float @v_fmac(float %a, float %b, float %c) {
3244
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3345
; GFX11-NEXT: v_fmac_dx9_zero_f32_e32 v0, v1, v2
3446
; GFX11-NEXT: s_setpc_b64 s[30:31]
47+
;
48+
; GFX12-LABEL: v_fmac:
49+
; GFX12: ; %bb.0:
50+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
51+
; GFX12-NEXT: s_wait_expcnt 0x0
52+
; GFX12-NEXT: s_wait_samplecnt 0x0
53+
; GFX12-NEXT: s_wait_bvhcnt 0x0
54+
; GFX12-NEXT: s_wait_kmcnt 0x0
55+
; GFX12-NEXT: v_fma_dx9_zero_f32 v0, v1, v2, v0
56+
; GFX12-NEXT: s_setpc_b64 s[30:31]
3557
%fma = call float @llvm.amdgcn.fma.legacy(float %b, float %c, float %a)
3658
ret float %fma
3759
}
@@ -48,6 +70,16 @@ define float @v_fma_imm(float %a, float %c) {
4870
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4971
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, 0x41200000, v0, v1
5072
; GFX11-NEXT: s_setpc_b64 s[30:31]
73+
;
74+
; GFX12-LABEL: v_fma_imm:
75+
; GFX12: ; %bb.0:
76+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
77+
; GFX12-NEXT: s_wait_expcnt 0x0
78+
; GFX12-NEXT: s_wait_samplecnt 0x0
79+
; GFX12-NEXT: s_wait_bvhcnt 0x0
80+
; GFX12-NEXT: s_wait_kmcnt 0x0
81+
; GFX12-NEXT: v_fma_dx9_zero_f32 v0, 0x41200000, v0, v1
82+
; GFX12-NEXT: s_setpc_b64 s[30:31]
5183
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float 10.0, float %c)
5284
ret float %fma
5385
}
@@ -64,6 +96,16 @@ define float @v_fmac_imm(float %a, float %c) {
6496
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6597
; GFX11-NEXT: v_fmac_dx9_zero_f32_e32 v0, 0x41200000, v1
6698
; GFX11-NEXT: s_setpc_b64 s[30:31]
99+
;
100+
; GFX12-LABEL: v_fmac_imm:
101+
; GFX12: ; %bb.0:
102+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
103+
; GFX12-NEXT: s_wait_expcnt 0x0
104+
; GFX12-NEXT: s_wait_samplecnt 0x0
105+
; GFX12-NEXT: s_wait_bvhcnt 0x0
106+
; GFX12-NEXT: s_wait_kmcnt 0x0
107+
; GFX12-NEXT: v_fma_dx9_zero_f32 v0, 0x41200000, v1, v0
108+
; GFX12-NEXT: s_setpc_b64 s[30:31]
67109
%fma = call float @llvm.amdgcn.fma.legacy(float 10.0, float %c, float %a)
68110
ret float %fma
69111
}
@@ -80,6 +122,16 @@ define float @v_fabs_fma(float %a, float %b, float %c) {
80122
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81123
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, |v0|, v1, v2
82124
; GFX11-NEXT: s_setpc_b64 s[30:31]
125+
;
126+
; GFX12-LABEL: v_fabs_fma:
127+
; GFX12: ; %bb.0:
128+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
129+
; GFX12-NEXT: s_wait_expcnt 0x0
130+
; GFX12-NEXT: s_wait_samplecnt 0x0
131+
; GFX12-NEXT: s_wait_bvhcnt 0x0
132+
; GFX12-NEXT: s_wait_kmcnt 0x0
133+
; GFX12-NEXT: v_fma_dx9_zero_f32 v0, |v0|, v1, v2
134+
; GFX12-NEXT: s_setpc_b64 s[30:31]
83135
%fabs.a = call float @llvm.fabs.f32(float %a)
84136
%fma = call float @llvm.amdgcn.fma.legacy(float %fabs.a, float %b, float %c)
85137
ret float %fma
@@ -97,6 +149,16 @@ define float @v_fneg_fabs_fma(float %a, float %b, float %c) {
97149
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98150
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, -|v1|, v2
99151
; GFX11-NEXT: s_setpc_b64 s[30:31]
152+
;
153+
; GFX12-LABEL: v_fneg_fabs_fma:
154+
; GFX12: ; %bb.0:
155+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
156+
; GFX12-NEXT: s_wait_expcnt 0x0
157+
; GFX12-NEXT: s_wait_samplecnt 0x0
158+
; GFX12-NEXT: s_wait_bvhcnt 0x0
159+
; GFX12-NEXT: s_wait_kmcnt 0x0
160+
; GFX12-NEXT: v_fma_dx9_zero_f32 v0, v0, -|v1|, v2
161+
; GFX12-NEXT: s_setpc_b64 s[30:31]
100162
%fabs.b = call float @llvm.fabs.f32(float %b)
101163
%neg.fabs.b = fneg float %fabs.b
102164
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float %neg.fabs.b, float %c)
@@ -115,6 +177,16 @@ define float @v_fneg_fma(float %a, float %b, float %c) {
115177
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116178
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, v1, -v2
117179
; GFX11-NEXT: s_setpc_b64 s[30:31]
180+
;
181+
; GFX12-LABEL: v_fneg_fma:
182+
; GFX12: ; %bb.0:
183+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
184+
; GFX12-NEXT: s_wait_expcnt 0x0
185+
; GFX12-NEXT: s_wait_samplecnt 0x0
186+
; GFX12-NEXT: s_wait_bvhcnt 0x0
187+
; GFX12-NEXT: s_wait_kmcnt 0x0
188+
; GFX12-NEXT: v_fma_dx9_zero_f32 v0, v0, v1, -v2
189+
; GFX12-NEXT: s_setpc_b64 s[30:31]
118190
%neg.c = fneg float %c
119191
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float %b, float %neg.c)
120192
ret float %fma
@@ -132,6 +204,16 @@ define float @v_fma_const_const(float %a) {
132204
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133205
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, v0, 2.0, -1.0
134206
; GFX11-NEXT: s_setpc_b64 s[30:31]
207+
;
208+
; GFX12-LABEL: v_fma_const_const:
209+
; GFX12: ; %bb.0:
210+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
211+
; GFX12-NEXT: s_wait_expcnt 0x0
212+
; GFX12-NEXT: s_wait_samplecnt 0x0
213+
; GFX12-NEXT: s_wait_bvhcnt 0x0
214+
; GFX12-NEXT: s_wait_kmcnt 0x0
215+
; GFX12-NEXT: v_fma_dx9_zero_f32 v0, v0, 2.0, -1.0
216+
; GFX12-NEXT: s_setpc_b64 s[30:31]
135217
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float 2.0, float -1.0)
136218
ret float %fma
137219
}

0 commit comments

Comments
 (0)