Skip to content

Commit afe39b5

Browse files
committed
XXX - Use computeKnownBits for the general case
1 parent 5f33802 commit afe39b5

File tree

3 files changed

+8
-17
lines changed

3 files changed

+8
-17
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13988,16 +13988,11 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
1398813988
SDValue
1398913989
SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
1399013990
DAGCombinerInfo &DCI) const {
13991+
SelectionDAG &DAG = DCI.DAG;
1399113992
SDValue RHS = N->getOperand(1);
13992-
auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
13993-
if (!CRHS)
13994-
return SDValue();
13993+
KnownBits KB = DAG.computeKnownBits(RHS);
1399513994

13996-
// TODO: Worth using computeKnownBits? Maybe expensive since it's so
13997-
// common.
13998-
uint64_t Val = CRHS->getZExtValue();
13999-
if (countr_zero(Val) >= 32) {
14000-
SelectionDAG &DAG = DCI.DAG;
13995+
if (KB.countMinTrailingZeros() >= 32) {
1400113996
SDLoc SL(N);
1400213997
SDValue LHS = N->getOperand(0);
1400313998

@@ -14011,7 +14006,7 @@ SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
1401114006
// to interfere with addressing mode patterns.
1401214007

1401314008
SDValue Hi = getHiHalf64(LHS, DAG);
14014-
SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
14009+
SDValue ConstHi32 = getHiHalf64(RHS, DAG);
1401514010
SDValue AddHi =
1401614011
DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32, N->getFlags());
1401714012

llvm/test/CodeGen/AMDGPU/add64-low-32-bits-known-zero.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,7 @@ define i64 @v_add_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) {
170170
; GFX9-LABEL: v_add_i64_variable_high_bits_known0_0:
171171
; GFX9: ; %bb.0:
172172
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173-
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
174-
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
173+
; GFX9-NEXT: v_add_u32_e32 v1, v1, v2
175174
; GFX9-NEXT: s_setpc_b64 s[30:31]
176175
%zext.offset.hi32 = zext i32 %offset.hi32 to i64
177176
%in.high.bits = shl i64 %zext.offset.hi32, 32
@@ -183,8 +182,7 @@ define i64 @v_add_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) {
183182
define amdgpu_ps i64 @s_add_i64_variable_high_bits_known0_0(i64 inreg %reg, i32 inreg %offset.hi32) {
184183
; GFX9-LABEL: s_add_i64_variable_high_bits_known0_0:
185184
; GFX9: ; %bb.0:
186-
; GFX9-NEXT: s_add_u32 s0, s0, 0
187-
; GFX9-NEXT: s_addc_u32 s1, s1, s2
185+
; GFX9-NEXT: s_add_i32 s1, s1, s2
188186
; GFX9-NEXT: ; return to shader part epilog
189187
%zext.offset.hi32 = zext i32 %offset.hi32 to i64
190188
%in.high.bits = shl i64 %zext.offset.hi32, 32

llvm/test/CodeGen/AMDGPU/sub64-low-32-bits-known-zero.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,7 @@ define i64 @v_sub_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) {
170170
; GFX9-LABEL: v_sub_i64_variable_high_bits_known0_0:
171171
; GFX9: ; %bb.0:
172172
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173-
; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, 0, v0
174-
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
173+
; GFX9-NEXT: v_sub_u32_e32 v1, v1, v2
175174
; GFX9-NEXT: s_setpc_b64 s[30:31]
176175
%zext.offset.hi32 = zext i32 %offset.hi32 to i64
177176
%in.high.bits = shl i64 %zext.offset.hi32, 32
@@ -183,8 +182,7 @@ define i64 @v_sub_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) {
183182
define amdgpu_ps i64 @s_sub_i64_variable_high_bits_known0_0(i64 inreg %reg, i32 inreg %offset.hi32) {
184183
; GFX9-LABEL: s_sub_i64_variable_high_bits_known0_0:
185184
; GFX9: ; %bb.0:
186-
; GFX9-NEXT: s_sub_u32 s0, s0, 0
187-
; GFX9-NEXT: s_subb_u32 s1, s1, s2
185+
; GFX9-NEXT: s_sub_i32 s1, s1, s2
188186
; GFX9-NEXT: ; return to shader part epilog
189187
%zext.offset.hi32 = zext i32 %offset.hi32 to i64
190188
%in.high.bits = shl i64 %zext.offset.hi32, 32

0 commit comments

Comments
 (0)