Skip to content

Commit 367846d

Browse files
committed
New shl32 must preserve flags
Signed-off-by: John Lu <[email protected]>
1 parent 72ae938 commit 367846d

File tree

2 files changed

+100
-1
lines changed

2 files changed

+100
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4060,7 +4060,7 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
40604060
SDValue MaskedShiftAmt =
40614061
DAG.getNode(ISD::AND, SL, TargetType, truncShiftAmt, ShiftMask);
40624062
SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, TargetType, LHS);
4063-
SDValue NewShift = DAG.getNode(ISD::SHL, SL, TargetType, Lo, MaskedShiftAmt);
4063+
SDValue NewShift = DAG.getNode(ISD::SHL, SL, TargetType, Lo, MaskedShiftAmt, N->getFlags());
40644064
const SDValue Zero = DAG.getConstant(0, SL, TargetType);
40654065
SDValue Vec = DAG.getBuildVector(TargetVecPairType, SL, {Zero, NewShift});
40664066
return DAG.getNode(ISD::BITCAST, SL, VT, Vec);
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=finalize-isel -o - %s | FileCheck %s
3+
4+
;; Test that reduction of:
5+
;;
6+
;; DST = shl i64 X, Y
7+
;;
8+
;; where Y is in the range [63-32] to:
9+
;;
10+
;; DST = [0, shl i32 X, (Y & 0x1F)]
11+
;;
12+
;; preserves flags
13+
14+
define i64 @shl_nsw(i64 %arg0, i64 %shift_amt) {
15+
; CHECK-LABEL: name: shl_nsw
16+
; CHECK: bb.0 (%ir-block.0):
17+
; CHECK-NEXT: liveins: $vgpr0, $vgpr2
18+
; CHECK-NEXT: {{ $}}
19+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
20+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
21+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
22+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
23+
; CHECK-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
24+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
25+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[COPY2]], %subreg.sub1
26+
; CHECK-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
27+
; CHECK-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
28+
; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
29+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF3]]
30+
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, killed [[COPY3]], %subreg.sub1
31+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
32+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
33+
; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = nsw V_LSHLREV_B32_e64 killed [[COPY4]], killed [[COPY5]], implicit $exec
34+
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
35+
; CHECK-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]]
36+
; CHECK-NEXT: $vgpr1 = COPY [[V_LSHLREV_B32_e64_]]
37+
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
38+
%or = or i64 %shift_amt, 32
39+
%shl = shl nsw i64 %arg0, %or
40+
ret i64 %shl
41+
}
42+
43+
define i64 @shl_nuw(i64 %arg0, i64 %shift_amt) {
44+
; CHECK-LABEL: name: shl_nuw
45+
; CHECK: bb.0 (%ir-block.0):
46+
; CHECK-NEXT: liveins: $vgpr0, $vgpr2
47+
; CHECK-NEXT: {{ $}}
48+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
49+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
50+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
51+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
52+
; CHECK-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
53+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
54+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[COPY2]], %subreg.sub1
55+
; CHECK-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
56+
; CHECK-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
57+
; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
58+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF3]]
59+
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, killed [[COPY3]], %subreg.sub1
60+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
61+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
62+
; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = nuw V_LSHLREV_B32_e64 killed [[COPY4]], killed [[COPY5]], implicit $exec
63+
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
64+
; CHECK-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]]
65+
; CHECK-NEXT: $vgpr1 = COPY [[V_LSHLREV_B32_e64_]]
66+
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
67+
%or = or i64 %shift_amt, 32
68+
%shl = shl nuw i64 %arg0, %or
69+
ret i64 %shl
70+
}
71+
72+
define i64 @shl_nsw_nuw(i64 %arg0, i64 %shift_amt) {
73+
; CHECK-LABEL: name: shl_nsw_nuw
74+
; CHECK: bb.0 (%ir-block.0):
75+
; CHECK-NEXT: liveins: $vgpr0, $vgpr2
76+
; CHECK-NEXT: {{ $}}
77+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
78+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
79+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
80+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
81+
; CHECK-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
82+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
83+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[COPY2]], %subreg.sub1
84+
; CHECK-NEXT: [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
85+
; CHECK-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
86+
; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
87+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF3]]
88+
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, killed [[COPY3]], %subreg.sub1
89+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
90+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
91+
; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = nuw nsw V_LSHLREV_B32_e64 killed [[COPY4]], killed [[COPY5]], implicit $exec
92+
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
93+
; CHECK-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]]
94+
; CHECK-NEXT: $vgpr1 = COPY [[V_LSHLREV_B32_e64_]]
95+
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
96+
%or = or i64 %shift_amt, 32
97+
%shl = shl nsw nuw i64 %arg0, %or
98+
ret i64 %shl
99+
}

0 commit comments

Comments
 (0)