Skip to content

Commit 7f02a12

Browse files
committed
[AMDGPU][GISel] Add RegBankLegalize support for G_STRICT_{FADD|FMUL}
1 parent 9bae84b commit 7f02a12

File tree

7 files changed

+183
-233
lines changed

7 files changed

+183
-233
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
120120
return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
121121
case UniV2S16:
122122
return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
123+
case UniV2S32:
124+
return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isUniform(Reg);
123125
case UniB32:
124126
return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
125127
case UniB64:
@@ -160,6 +162,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
160162
return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
161163
case DivV2S16:
162164
return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
165+
case DivV2S32:
166+
return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isDivergent(Reg);
163167
case DivB32:
164168
return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
165169
case DivB64:
@@ -939,7 +943,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
939943

940944
bool hasSALUFloat = ST->hasSALUFloatInsts();
941945

942-
addRulesForGOpcs({G_FADD, G_FMUL}, Standard)
946+
addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
943947
.Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
944948
.Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
945949
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})

llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll

Lines changed: 51 additions & 169 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
3-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
3+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
44

55
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10-SDAG %s
6-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10-GISEL %s
6+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10-GISEL %s
77

88
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11-SDAG %s
9-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11-GISEL %s
9+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11-GISEL %s
1010

1111
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
12-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
12+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
1313

1414
define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 {
1515
; GFX9-LABEL: v_constained_fadd_f32_fpexcept_strict:

llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-SDAG %s
3-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL %s
3+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL %s
44

5-
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s
6-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s
5+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10-SDAG %s
6+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10-GISEL %s
77

8-
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s
9-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s
8+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX11-SDAG %s
9+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX11-GISEL %s
1010

1111
define double @v_constained_fadd_f64_fpexcept_strict(double %x, double %y) #0 {
1212
; GCN-LABEL: v_constained_fadd_f64_fpexcept_strict:
@@ -96,12 +96,38 @@ define amdgpu_ps <2 x float> @s_constained_fadd_f64_fpexcept_strict(double inreg
9696
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s4
9797
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, s5
9898
; GCN-GISEL-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1]
99+
; GCN-GISEL-NEXT: v_readfirstlane_b32 s0, v0
100+
; GCN-GISEL-NEXT: v_readfirstlane_b32 s1, v1
101+
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s0
102+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, s1
99103
; GCN-GISEL-NEXT: ; return to shader part epilog
100104
;
101-
; GFX10PLUS-LABEL: s_constained_fadd_f64_fpexcept_strict:
102-
; GFX10PLUS: ; %bb.0:
103-
; GFX10PLUS-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
104-
; GFX10PLUS-NEXT: ; return to shader part epilog
105+
; GFX10-SDAG-LABEL: s_constained_fadd_f64_fpexcept_strict:
106+
; GFX10-SDAG: ; %bb.0:
107+
; GFX10-SDAG-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
108+
; GFX10-SDAG-NEXT: ; return to shader part epilog
109+
;
110+
; GFX10-GISEL-LABEL: s_constained_fadd_f64_fpexcept_strict:
111+
; GFX10-GISEL: ; %bb.0:
112+
; GFX10-GISEL-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
113+
; GFX10-GISEL-NEXT: v_readfirstlane_b32 s0, v0
114+
; GFX10-GISEL-NEXT: v_readfirstlane_b32 s1, v1
115+
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
116+
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
117+
; GFX10-GISEL-NEXT: ; return to shader part epilog
118+
;
119+
; GFX11-SDAG-LABEL: s_constained_fadd_f64_fpexcept_strict:
120+
; GFX11-SDAG: ; %bb.0:
121+
; GFX11-SDAG-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
122+
; GFX11-SDAG-NEXT: ; return to shader part epilog
123+
;
124+
; GFX11-GISEL-LABEL: s_constained_fadd_f64_fpexcept_strict:
125+
; GFX11-GISEL: ; %bb.0:
126+
; GFX11-GISEL-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
127+
; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v0
128+
; GFX11-GISEL-NEXT: v_readfirstlane_b32 s1, v1
129+
; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
130+
; GFX11-GISEL-NEXT: ; return to shader part epilog
105131
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
106132
%cast = bitcast double %val to <2 x float>
107133
ret <2 x float> %cast
@@ -113,6 +139,3 @@ declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3
113139

114140
attributes #0 = { strictfp }
115141
attributes #1 = { inaccessiblememonly nounwind willreturn }
116-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
117-
; GFX10: {{.*}}
118-
; GFX11: {{.*}}

llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
3-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
3+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
44

55
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
6-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
6+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
77

88
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-SDAG %s
9-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-GISEL %s
9+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-GISEL %s
1010

1111
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
1212
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
13-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
14-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
13+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
14+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
1515

1616
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
17-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
17+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
1818

1919
; FIXME: promotion not handled without f16 insts
2020

@@ -627,25 +627,41 @@ define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half>
627627
;
628628
; GFX8-GISEL-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
629629
; GFX8-GISEL: ; %bb.0:
630-
; GFX8-GISEL-NEXT: s_lshr_b32 s0, s2, 16
631-
; GFX8-GISEL-NEXT: s_lshr_b32 s1, s3, 16
632630
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s3
633-
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1
634-
; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0
631+
; GFX8-GISEL-NEXT: s_lshr_b32 s1, s3, 16
635632
; GFX8-GISEL-NEXT: v_mul_f16_e32 v0, s2, v0
636-
; GFX8-GISEL-NEXT: v_mul_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
637-
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
633+
; GFX8-GISEL-NEXT: s_lshr_b32 s0, s2, 16
634+
; GFX8-GISEL-NEXT: v_readfirstlane_b32 s2, v0
635+
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s1
636+
; GFX8-GISEL-NEXT: v_mul_f16_e32 v0, s0, v0
637+
; GFX8-GISEL-NEXT: v_readfirstlane_b32 s0, v0
638+
; GFX8-GISEL-NEXT: s_and_b32 s0, 0xffff, s0
639+
; GFX8-GISEL-NEXT: s_and_b32 s1, 0xffff, s2
640+
; GFX8-GISEL-NEXT: s_lshl_b32 s0, s0, 16
641+
; GFX8-GISEL-NEXT: s_or_b32 s0, s1, s0
642+
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0
638643
; GFX8-GISEL-NEXT: ; return to shader part epilog
639644
;
640645
; GFX10PLUS-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
641646
; GFX10PLUS: ; %bb.0:
642647
; GFX10PLUS-NEXT: v_pk_mul_f16 v0, s2, s3
643648
; GFX10PLUS-NEXT: ; return to shader part epilog
644649
;
645-
; GFX12-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
646-
; GFX12: ; %bb.0:
647-
; GFX12-NEXT: v_pk_mul_f16 v0, s2, s3
648-
; GFX12-NEXT: ; return to shader part epilog
650+
; GFX12-SDAG-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
651+
; GFX12-SDAG: ; %bb.0:
652+
; GFX12-SDAG-NEXT: v_pk_mul_f16 v0, s2, s3
653+
; GFX12-SDAG-NEXT: ; return to shader part epilog
654+
;
655+
; GFX12-GISEL-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
656+
; GFX12-GISEL: ; %bb.0:
657+
; GFX12-GISEL-NEXT: s_lshr_b32 s0, s2, 16
658+
; GFX12-GISEL-NEXT: s_lshr_b32 s1, s3, 16
659+
; GFX12-GISEL-NEXT: s_mul_f16 s2, s2, s3
660+
; GFX12-GISEL-NEXT: s_mul_f16 s0, s0, s1
661+
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
662+
; GFX12-GISEL-NEXT: s_pack_ll_b32_b16 s0, s2, s0
663+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
664+
; GFX12-GISEL-NEXT: ; return to shader part epilog
649665
%val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
650666
ret <2 x half> %val
651667
}

llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN %s
3+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN %s
44

55
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
77

88
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
9-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
9+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
1010

11-
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
12-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
11+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
12+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
1313

1414
define float @v_constained_fmul_f32_fpexcept_strict(float %x, float %y) #0 {
1515
; GCN-LABEL: v_constained_fmul_f32_fpexcept_strict:
@@ -339,6 +339,3 @@ declare <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float>, <2 x
339339
declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
340340

341341
attributes #0 = { strictfp }
342-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
343-
; GFX12-GISEL: {{.*}}
344-
; GFX12-SDAG: {{.*}}

llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll

Lines changed: 50 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-SDAG %s
3+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL %s
44

5-
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
5+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
6+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
77

8-
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
9-
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
8+
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
9+
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
1010

1111
define double @v_constained_fmul_f64_fpexcept_strict(double %x, double %y) #0 {
1212
; GCN-LABEL: v_constained_fmul_f64_fpexcept_strict:
@@ -178,22 +178,50 @@ define <3 x double> @v_constained_fmul_v3f64_fpexcept_strict(<3 x double> %x, <3
178178
}
179179

180180
define amdgpu_ps <2 x float> @s_constained_fmul_f64_fpexcept_strict(double inreg %x, double inreg %y) #0 {
181-
; GCN-LABEL: s_constained_fmul_f64_fpexcept_strict:
182-
; GCN: ; %bb.0:
183-
; GCN-NEXT: v_mov_b32_e32 v0, s4
184-
; GCN-NEXT: v_mov_b32_e32 v1, s5
185-
; GCN-NEXT: v_mul_f64 v[0:1], s[2:3], v[0:1]
186-
; GCN-NEXT: ; return to shader part epilog
187-
;
188-
; GFX10-LABEL: s_constained_fmul_f64_fpexcept_strict:
189-
; GFX10: ; %bb.0:
190-
; GFX10-NEXT: v_mul_f64 v[0:1], s[2:3], s[4:5]
191-
; GFX10-NEXT: ; return to shader part epilog
192-
;
193-
; GFX11-LABEL: s_constained_fmul_f64_fpexcept_strict:
194-
; GFX11: ; %bb.0:
195-
; GFX11-NEXT: v_mul_f64 v[0:1], s[2:3], s[4:5]
196-
; GFX11-NEXT: ; return to shader part epilog
181+
; GCN-SDAG-LABEL: s_constained_fmul_f64_fpexcept_strict:
182+
; GCN-SDAG: ; %bb.0:
183+
; GCN-SDAG-NEXT: v_mov_b32_e32 v0, s4
184+
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, s5
185+
; GCN-SDAG-NEXT: v_mul_f64 v[0:1], s[2:3], v[0:1]
186+
; GCN-SDAG-NEXT: ; return to shader part epilog
187+
;
188+
; GCN-GISEL-LABEL: s_constained_fmul_f64_fpexcept_strict:
189+
; GCN-GISEL: ; %bb.0:
190+
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s4
191+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, s5
192+
; GCN-GISEL-NEXT: v_mul_f64 v[0:1], s[2:3], v[0:1]
193+
; GCN-GISEL-NEXT: v_readfirstlane_b32 s0, v0
194+
; GCN-GISEL-NEXT: v_readfirstlane_b32 s1, v1
195+
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s0
196+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, s1
197+
; GCN-GISEL-NEXT: ; return to shader part epilog
198+
;
199+
; GFX10-SDAG-LABEL: s_constained_fmul_f64_fpexcept_strict:
200+
; GFX10-SDAG: ; %bb.0:
201+
; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], s[2:3], s[4:5]
202+
; GFX10-SDAG-NEXT: ; return to shader part epilog
203+
;
204+
; GFX10-GISEL-LABEL: s_constained_fmul_f64_fpexcept_strict:
205+
; GFX10-GISEL: ; %bb.0:
206+
; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], s[2:3], s[4:5]
207+
; GFX10-GISEL-NEXT: v_readfirstlane_b32 s0, v0
208+
; GFX10-GISEL-NEXT: v_readfirstlane_b32 s1, v1
209+
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
210+
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
211+
; GFX10-GISEL-NEXT: ; return to shader part epilog
212+
;
213+
; GFX11-SDAG-LABEL: s_constained_fmul_f64_fpexcept_strict:
214+
; GFX11-SDAG: ; %bb.0:
215+
; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], s[2:3], s[4:5]
216+
; GFX11-SDAG-NEXT: ; return to shader part epilog
217+
;
218+
; GFX11-GISEL-LABEL: s_constained_fmul_f64_fpexcept_strict:
219+
; GFX11-GISEL: ; %bb.0:
220+
; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], s[2:3], s[4:5]
221+
; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v0
222+
; GFX11-GISEL-NEXT: v_readfirstlane_b32 s1, v1
223+
; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
224+
; GFX11-GISEL-NEXT: ; return to shader part epilog
197225
%val = call double @llvm.experimental.constrained.fmul.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
198226
%cast = bitcast double %val to <2 x float>
199227
ret <2 x float> %cast

0 commit comments

Comments
 (0)