Skip to content

Commit 0ed5341

Browse files
committed
Use isShiftedMask_64 instead of KnownBits
1 parent 05affac commit 0ed5341

File tree

3 files changed

+71
-21
lines changed

3 files changed

+71
-21
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
#include "AMDGPUCombinerHelper.h"
1010
#include "GCNSubtarget.h"
1111
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12-
#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
1312
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
1413
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
1514
#include "llvm/IR/IntrinsicsAMDGPU.h"
@@ -519,7 +518,16 @@ bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp(
519518
}
520519

521520
bool AMDGPUCombinerHelper::matchConstantIs32BitMask(Register Reg) const {
522-
const KnownBits &Known = VT->getKnownBits(Reg);
523-
return Known.One.extractBits(32, 0).isAllOnes() ||
524-
Known.One.extractBits(32, 32).isAllOnes();
521+
auto Res = getIConstantVRegValWithLookThrough(Reg, MRI);
522+
if (!Res)
523+
return false;
524+
525+
const uint64_t Val = Res->Value.getZExtValue();
526+
unsigned MaskIdx = 0;
527+
unsigned MaskLen = 0;
528+
if (!isShiftedMask_64(Val, MaskIdx, MaskLen))
529+
return false;
530+
531+
// Check if high 32 bits or low 32 bits are all ones.
532+
return (MaskLen == 64 - MaskIdx) || (MaskIdx == 0 && MaskLen >= 32);
525533
}

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-binop-s64-with-s32-mask.mir

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,28 @@ body: |
4343
$sgpr0_sgpr1 = COPY %2(s64)
4444
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
4545
...
46-
46+
---
47+
name: test_and_mask_hi_48bit_mask_rhs
48+
tracksRegLiveness: true
49+
body: |
50+
bb.0:
51+
liveins: $sgpr0_sgpr1, $sgpr2
52+
; CHECK-LABEL: name: test_and_mask_hi_48bit_mask_rhs
53+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
54+
; CHECK-NEXT: {{ $}}
55+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
56+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
57+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536
58+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
59+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[AND]](s32), [[UV1]](s32)
60+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[MV]](s64)
61+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
62+
%0:_(s64) = COPY $sgpr0_sgpr1
63+
%1:_(s64) = G_CONSTANT i64 -65536
64+
%2:_(s64) = G_AND %0, %1
65+
$sgpr0_sgpr1 = COPY %2(s64)
66+
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
67+
...
4768
---
4869
name: test_and_mask_lo_rhs
4970
tracksRegLiveness: true
@@ -85,6 +106,28 @@ body: |
85106
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
86107
...
87108
---
109+
name: test_and_mask_lo_36bit_mask_rhs
110+
tracksRegLiveness: true
111+
body: |
112+
bb.0:
113+
liveins: $sgpr0_sgpr1, $sgpr2
114+
; CHECK-LABEL: name: test_and_mask_lo_36bit_mask_rhs
115+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
116+
; CHECK-NEXT: {{ $}}
117+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
118+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
119+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
120+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
121+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[AND]](s32)
122+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[MV]](s64)
123+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
124+
%0:_(s64) = COPY $sgpr0_sgpr1
125+
%1:_(s64) = G_CONSTANT i64 68719476735
126+
%2:_(s64) = G_AND %0, %1
127+
$sgpr0_sgpr1 = COPY %2(s64)
128+
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
129+
...
130+
---
88131
name: test_and_mask_hi_with_merge_unmerge
89132
tracksRegLiveness: true
90133
body: |
@@ -170,7 +213,6 @@ body: |
170213
$sgpr0_sgpr1 = COPY %2(s64)
171214
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
172215
...
173-
174216
---
175217
name: test_or_mask_lo_rhs
176218
tracksRegLiveness: true

llvm/test/CodeGen/AMDGPU/lround.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,12 @@ define i32 @intrinsic_lround_i32_f64(double %arg) {
114114
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115115
; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
116116
; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000
117-
; GFX9-GISEL-NEXT: s_brev_b32 s4, 1
118117
; GFX9-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
119-
; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
118+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
120119
; GFX9-GISEL-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
120+
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v5, 1
121121
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
122-
; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, s4, v4
122+
; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, v5, v4
123123
; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1]
124124
; GFX9-GISEL-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
125125
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -142,7 +142,7 @@ define i32 @intrinsic_lround_i32_f64(double %arg) {
142142
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143143
; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
144144
; GFX10-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
145-
; GFX10-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
145+
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, 0
146146
; GFX10-GISEL-NEXT: v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
147147
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
148148
; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v4
@@ -172,7 +172,7 @@ define i32 @intrinsic_lround_i32_f64(double %arg) {
172172
; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
173173
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
174174
; GFX11-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
175-
; GFX11-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
175+
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
176176
; GFX11-GISEL-NEXT: v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
177177
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
178178
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
@@ -372,12 +372,12 @@ define i64 @intrinsic_lround_i64_f64(double %arg) {
372372
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373373
; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
374374
; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000
375-
; GFX9-GISEL-NEXT: s_brev_b32 s4, 1
375+
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v7, 1
376376
; GFX9-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
377-
; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
377+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
378378
; GFX9-GISEL-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
379379
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
380-
; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, s4, v4
380+
; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, v7, v4
381381
; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1]
382382
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
383383
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3df00000
@@ -414,7 +414,7 @@ define i64 @intrinsic_lround_i64_f64(double %arg) {
414414
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415415
; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
416416
; GFX10-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
417-
; GFX10-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
417+
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, 0
418418
; GFX10-GISEL-NEXT: v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
419419
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
420420
; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v4
@@ -456,7 +456,7 @@ define i64 @intrinsic_lround_i64_f64(double %arg) {
456456
; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
457457
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
458458
; GFX11-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
459-
; GFX11-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
459+
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
460460
; GFX11-GISEL-NEXT: v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
461461
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
462462
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
@@ -663,12 +663,12 @@ define i64 @intrinsic_llround_i64_f64(double %arg) {
663663
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
664664
; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
665665
; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000
666-
; GFX9-GISEL-NEXT: s_brev_b32 s4, 1
666+
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v7, 1
667667
; GFX9-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
668-
; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
668+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
669669
; GFX9-GISEL-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
670670
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
671-
; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, s4, v4
671+
; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, v7, v4
672672
; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1]
673673
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
674674
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3df00000
@@ -705,7 +705,7 @@ define i64 @intrinsic_llround_i64_f64(double %arg) {
705705
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
706706
; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
707707
; GFX10-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
708-
; GFX10-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
708+
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, 0
709709
; GFX10-GISEL-NEXT: v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
710710
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
711711
; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v4
@@ -747,7 +747,7 @@ define i64 @intrinsic_llround_i64_f64(double %arg) {
747747
; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
748748
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
749749
; GFX11-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
750-
; GFX11-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
750+
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
751751
; GFX11-GISEL-NEXT: v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
752752
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
753753
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0

0 commit comments

Comments
 (0)