-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AMDGPU] Preserve exact flag for lshr #146744
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: None (LU-JOHN) ChangesWhen reducing 64-bit lshr to 32-bit preserve exact flag. Alive2 verification: https://alive2.llvm.org/ce/z/LcnX7V Full diff: https://github.com/llvm/llvm-project/pull/146744.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index d75c7a178b4a8..8f9c0530e09c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4328,7 +4328,8 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, LHSSL, TargetType, SplitLHS, One);
}
- SDValue NewShift = DAG.getNode(ISD::SRL, SL, TargetType, Hi, ShiftAmt);
+ SDValue NewShift =
+ DAG.getNode(ISD::SRL, SL, TargetType, Hi, ShiftAmt, N->getFlags());
SDValue Vec;
if (VT.isVector()) {
diff --git a/llvm/test/CodeGen/AMDGPU/srl64_reduce_flags.ll b/llvm/test/CodeGen/AMDGPU/srl64_reduce_flags.ll
new file mode 100644
index 0000000000000..ca4b72872a03b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/srl64_reduce_flags.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=finalize-isel -o - %s | FileCheck %s
+
+;; Test that reduction of:
+;;
+;; DST = lshr i64 X, Y
+;;
+;; where Y is in the range [63-32] to:
+;;
+;; DST = [srl i32 X, (Y & 0x1F), 0]
+;;
+;; preserves flags
+
+define i64 @srl_exact(i64 %arg0, i64 %shift_amt) {
+ ; CHECK-LABEL: name: srl_exact
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[DEF3]]
+ ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, killed [[COPY4]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = exact V_LSHRREV_B32_e64 killed [[COPY5]], killed [[COPY3]], implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_LSHRREV_B32_e64_]]
+ ; CHECK-NEXT: $vgpr1 = COPY [[V_MOV_B32_e32_]]
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ %or = or i64 %shift_amt, 32
+ %srl = lshr exact i64 %arg0, %or
+ ret i64 %srl
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you pre-commit this test and then make this change such that the difference can be clearly shown?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Test pre-committed in #146747
|
Needs update on test |
Signed-off-by: John Lu <[email protected]>
Signed-off-by: John Lu <[email protected]>
5403038 to
20867d0
Compare
Test updated. |
When reducing 64-bit lshr to 32-bit preserve exact flag.
Alive2 verification: https://alive2.llvm.org/ce/z/LcnX7V