Skip to content

Commit e9e51e5

Browse files
committed
Handle max shift-amt more cleanly
Signed-off-by: John Lu <[email protected]>
1 parent e766462 commit e9e51e5

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4185,6 +4185,9 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
41854185

41864186
ShiftAmt = DAG.getConstant(RHSVal - TargetScalarType.getSizeInBits(), SL,
41874187
TargetType);
4188+
} else if (Known.getMinValue().getZExtValue() ==
4189+
(ElementType.getSizeInBits() - 1)) {
4190+
ShiftAmt = ShiftFullAmt;
41884191
} else {
41894192
SDValue truncShiftAmt = DAG.getNode(ISD::TRUNCATE, SL, TargetType, RHS);
41904193
const SDValue ShiftMask =
@@ -4232,11 +4235,7 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
42324235
DAG.ExtractVectorElements(NewShift, LoOps, 0, NElts);
42334236
for (unsigned I = 0; I != NElts; ++I) {
42344237
HiAndLoOps[2 * I + 1] = HiOps[I];
4235-
if (Known.getMinValue().getZExtValue() ==
4236-
(ElementType.getSizeInBits() - 1))
4237-
HiAndLoOps[2 * I] = HiOps[I];
4238-
else
4239-
HiAndLoOps[2 * I] = LoOps[I];
4238+
HiAndLoOps[2 * I] = LoOps[I];
42404239
}
42414240
Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, ConcatType, HiAndLoOps);
42424241
} else {

llvm/test/CodeGen/AMDGPU/ashr64_reduce.ll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,20 @@ define <2 x i64> @ashr_v2_metadata(<2 x i64> %arg0, ptr %arg1.ptr) {
122122
ret <2 x i64> %ashr
123123
}
124124

125+
define <2 x i64> @ashr_v2_metadata_63(<2 x i64> %arg0, ptr %arg1.ptr) {
126+
; CHECK-LABEL: ashr_v2_metadata_63:
127+
; CHECK: ; %bb.0:
128+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129+
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 31, v1
130+
; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v3
131+
; CHECK-NEXT: v_mov_b32_e32 v1, v0
132+
; CHECK-NEXT: v_mov_b32_e32 v3, v2
133+
; CHECK-NEXT: s_setpc_b64 s[30:31]
134+
%shift.amt = load <2 x i64>, ptr %arg1.ptr, !range !4, !noundef !{}
135+
%ashr = ashr <2 x i64> %arg0, %shift.amt
136+
ret <2 x i64> %ashr
137+
}
138+
125139
; Exact attribute does not inhibit reduction
126140
define <2 x i64> @ashr_exact_v2_metadata(<2 x i64> %arg0, ptr %arg1.ptr) {
127141
; CHECK-LABEL: ashr_exact_v2_metadata:
@@ -194,6 +208,7 @@ define <4 x i64> @ashr_v4_metadata(<4 x i64> %arg0, ptr %arg1.ptr) {
194208
!1 = !{i64 32, i64 38, i64 42, i64 48}
195209
!2 = !{i64 31, i64 38, i64 42, i64 48}
196210
!3 = !{i64 32, i64 38, i64 2147483680, i64 2147483681}
211+
!4 = !{i64 63, i64 64}
197212

198213
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
199214
; Test range with an "or X, 16"

0 commit comments

Comments
 (0)