Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
case UniV2S16:
return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
case UniV2S32:
return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isUniform(Reg);
case UniV2S64:
return MRI.getType(Reg) == LLT::fixed_vector(2, 64) && MUI.isUniform(Reg);
case UniB32:
return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
case UniB64:
Expand Down Expand Up @@ -160,6 +164,10 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
case DivV2S16:
return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
case DivV2S32:
return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isDivergent(Reg);
case DivV2S64:
return MRI.getType(Reg) == LLT::fixed_vector(2, 64) && MUI.isDivergent(Reg);
case DivB32:
return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
case DivB64:
Expand Down Expand Up @@ -991,6 +999,20 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
.Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});

addRulesForGOpcs({G_STRICT_FADD, G_STRICT_FSUB, G_STRICT_FMUL}, Standard)
.Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
.Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
.Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
.Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
.Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32}}})
.Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32, VgprV2S32}}})
.Any({{UniV2S64}, {{UniInVgprV2S64}, {VgprV2S64, VgprV2S64}}})
.Any({{DivV2S64}, {{VgprV2S64}, {VgprV2S64, VgprV2S64}}});

using namespace Intrinsic;

addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,10 @@ enum UniformityLLTOpPredicateID {

UniV2S16,
UniV2S32,

UniV2S64,
DivV2S16,
DivV2S32,
DivV2S64,

// B types
B32,
Expand Down Expand Up @@ -172,6 +173,7 @@ enum RegBankLLTMappingApplyID {
VgprPtr128,
VgprV2S16,
VgprV2S32,
VgprV2S64,
VgprB32,
VgprB64,
VgprB96,
Expand All @@ -187,6 +189,7 @@ enum RegBankLLTMappingApplyID {
UniInVgprS64,
UniInVgprV2S16,
UniInVgprV2S32,
UniInVgprV2S64,
UniInVgprV4S32,
UniInVgprB32,
UniInVgprB64,
Expand Down
49 changes: 32 additions & 17 deletions llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,SDAG %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -global-isel -new-reg-bank-select -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GISEL %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-FAKE16 %s
; FIXME: promotion not handled without f16 insts
Expand Down Expand Up @@ -179,12 +180,19 @@ define <3 x half> @v_constained_fadd_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX8-NEXT: v_add_f16_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fadd_v3f16_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_add_f16 v0, v0, v2
; GFX10-NEXT: v_add_f16_e32 v1, v1, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
; SDAG-LABEL: v_constained_fadd_v3f16_fpexcept_strict:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_pk_add_f16 v0, v0, v2
; SDAG-NEXT: v_add_f16_e32 v1, v1, v3
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_constained_fadd_v3f16_fpexcept_strict:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_pk_add_f16 v0, v0, v2
; GISEL-NEXT: v_pk_add_f16 v1, v1, v3
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_constained_fadd_v3f16_fpexcept_strict:
; GFX11-TRUE16: ; %bb.0:
Expand Down Expand Up @@ -228,16 +236,23 @@ define <4 x half> @v_constained_fadd_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-NEXT: v_add_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-NEXT: v_add_f16_e32 v0, v0, v2
; GFX10-NEXT: v_add_f16_e32 v1, v1, v3
; GFX10-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
; GFX10-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
; GFX10-NEXT: s_setpc_b64 s[30:31]
; SDAG-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_add_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; SDAG-NEXT: v_add_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; SDAG-NEXT: v_add_f16_e32 v0, v0, v2
; SDAG-NEXT: v_add_f16_e32 v1, v1, v3
; SDAG-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
; SDAG-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_pk_add_f16 v0, v0, v2
; GISEL-NEXT: v_pk_add_f16 v1, v1, v3
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
; GFX11-TRUE16: ; %bb.0:
Expand Down
25 changes: 19 additions & 6 deletions llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,SDAG %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -global-isel -new-reg-bank-select -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GISEL %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s

define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 {
Expand Down Expand Up @@ -206,11 +207,23 @@ define float @v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs(float %x, floa
; GCN-NEXT: v_sub_f32_e64 v0, v1, |v0|
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: v_sub_f32_e64 v0, v1, |v0|
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
; SDAG-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_sub_f32_e64 v0, v1, |v0|
; SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_add_f32_e64 v0, -|v0|, v1
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_sub_f32_e64 v0, v1, |v0|
; GFX11-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call float @llvm.fabs.f32(float %x) #0
%neg.fabs.x = fneg float %fabs.x
%val = call float @llvm.experimental.constrained.fadd.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
Expand Down
74 changes: 68 additions & 6 deletions llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,SDAG %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -global-isel -new-reg-bank-select -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GISEL %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s

define double @v_constained_fadd_f64_fpexcept_strict(double %x, double %y) #0 {
; GCN-LABEL: v_constained_fadd_f64_fpexcept_strict:
Expand All @@ -15,6 +16,12 @@ define double @v_constained_fadd_f64_fpexcept_strict(double %x, double %y) #0 {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fadd_f64_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret double %val
}
Expand All @@ -31,6 +38,12 @@ define double @v_constained_fadd_f64_fpexcept_ignore(double %x, double %y) #0 {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fadd_f64_fpexcept_ignore:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
ret double %val
}
Expand All @@ -47,6 +60,12 @@ define double @v_constained_fadd_f64_fpexcept_maytrap(double %x, double %y) #0 {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fadd_f64_fpexcept_maytrap:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
ret double %val
}
Expand All @@ -65,6 +84,13 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_strict(<2 x double> %x, <2
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fadd_v2f64_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <2 x double> %val
}
Expand All @@ -83,6 +109,13 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_ignore(<2 x double> %x, <2
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fadd_v2f64_fpexcept_ignore:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
ret <2 x double> %val
}
Expand All @@ -101,6 +134,13 @@ define <2 x double> @v_constained_fadd_v2f64_fpexcept_maytrap(<2 x double> %x, <
; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fadd_v2f64_fpexcept_maytrap:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], v[6:7]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
ret <2 x double> %val
}
Expand All @@ -121,6 +161,14 @@ define <3 x double> @v_constained_fadd_v3f64_fpexcept_strict(<3 x double> %x, <3
; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], v[8:9]
; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_constained_fadd_v3f64_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[6:7]
; GFX11-NEXT: v_add_f64 v[2:3], v[2:3], v[8:9]
; GFX11-NEXT: v_add_f64 v[4:5], v[4:5], v[10:11]
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double> %x, <3 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <3 x double> %val
}
Expand All @@ -133,10 +181,24 @@ define amdgpu_ps <2 x float> @s_constained_fadd_f64_fpexcept_strict(double inreg
; GCN-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1]
; GCN-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_constained_fadd_f64_fpexcept_strict:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
; GFX10-NEXT: ; return to shader part epilog
; SDAG-LABEL: s_constained_fadd_f64_fpexcept_strict:
; SDAG: ; %bb.0:
; SDAG-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: s_constained_fadd_f64_fpexcept_strict:
; GISEL: ; %bb.0:
; GISEL-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
; GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GISEL-NEXT: v_readfirstlane_b32 s1, v1
; GISEL-NEXT: v_mov_b32_e32 v0, s0
; GISEL-NEXT: v_mov_b32_e32 v1, s1
; GISEL-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_constained_fadd_f64_fpexcept_strict:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_add_f64 v[0:1], s[2:3], s[4:5]
; GFX11-NEXT: ; return to shader part epilog
%val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
%cast = bitcast double %val to <2 x float>
ret <2 x float> %cast
Expand Down
27 changes: 16 additions & 11 deletions llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-GISEL %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-GISEL %s

; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX1-GISEL,GFX1-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX1-GISEL,GFX1-GISEL-FAKE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX1-GISEL,GFX1-GISEL-TRUE16 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX1-GISEL,GFX1-GISEL-FAKE16 %s


; FIXME: promotion not handled without f16 insts
Expand Down Expand Up @@ -454,14 +454,19 @@ define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half>
;
; GFX8-GISEL-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_lshr_b32 s0, s2, 16
; GFX8-GISEL-NEXT: s_lshr_b32 s1, s3, 16
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s3
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX8-GISEL-NEXT: s_lshr_b32 s1, s3, 16
; GFX8-GISEL-NEXT: v_mul_f16_e32 v0, s2, v0
; GFX8-GISEL-NEXT: v_mul_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_lshr_b32 s0, s2, 16
; GFX8-GISEL-NEXT: v_readfirstlane_b32 s2, v0
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s1
; GFX8-GISEL-NEXT: v_mul_f16_e32 v0, s0, v0
; GFX8-GISEL-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-GISEL-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-GISEL-NEXT: s_and_b32 s1, 0xffff, s2
; GFX8-GISEL-NEXT: s_lshl_b32 s0, s0, 16
; GFX8-GISEL-NEXT: s_or_b32 s0, s1, s0
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8-GISEL-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
Expand Down
Loading
Loading