Skip to content

Commit d1a1e48

Browse files
committed
[SelectionDAG] Constant fold ISD:FSHL/FSHR nodes
1 parent de442b6 commit d1a1e48

File tree

3 files changed

+38
-14
lines changed

3 files changed

+38
-14
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11281,6 +11281,11 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
1128111281
unsigned BitWidth = VT.getScalarSizeInBits();
1128211282
SDLoc DL(N);
1128311283

11284+
// fold (fshl C0, C1, C2) -> C3
11285+
if (SDValue C =
11286+
DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1, N2}))
11287+
return C;
11288+
1128411289
// fold (fshl N0, N1, 0) -> N0
1128511290
// fold (fshr N0, N1, 0) -> N1
1128611291
if (isPowerOf2_32(BitWidth))

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7175,6 +7175,28 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
71757175
}
71767176
}
71777177

7178+
// Handle fshl/fshr special cases.
7179+
if (Opcode == ISD::FSHL || Opcode == ISD::FSHR) {
7180+
auto *C1 = dyn_cast<ConstantSDNode>(Ops[0]);
7181+
auto *C2 = dyn_cast<ConstantSDNode>(Ops[1]);
7182+
auto *C3 = dyn_cast<ConstantSDNode>(Ops[2]);
7183+
7184+
if (C1 && C2 && C3) {
7185+
if (C1->isOpaque() || C2->isOpaque() || C3->isOpaque())
7186+
return SDValue();
7187+
const APInt V1 = C1->getAPIntValue(), V2 = C2->getAPIntValue(),
7188+
V3 = C3->getAPIntValue();
7189+
7190+
APInt FoldedVal = Opcode == ISD::FSHL ? APIntOps::fshl(V1, V2, V3)
7191+
: APIntOps::fshr(V1, V2, V3);
7192+
7193+
SDValue Folded = getConstant(FoldedVal, DL, VT);
7194+
assert((!Folded || !VT.isVector()) &&
7195+
"Can't fold vectors ops with scalar operands");
7196+
return Folded;
7197+
}
7198+
}
7199+
71787200
// This is for vector folding only from here on.
71797201
if (!VT.isVector())
71807202
return SDValue();
@@ -8158,6 +8180,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
81588180
}
81598181
break;
81608182
}
8183+
case ISD::FSHL:
8184+
case ISD::FSHR:
8185+
// Constant folding.
8186+
if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2, N3}))
8187+
return V;
8188+
break;
81618189
case ISD::BUILD_VECTOR: {
81628190
// Attempt to simplify BUILD_VECTOR.
81638191
SDValue Ops[] = {N1, N2, N3};

llvm/test/CodeGen/X86/fshl-fshr-constant.ll

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@ define <4 x i32> @test_fshl_constants() {
1010
;
1111
; CHECK-UNEXPAND-LABEL: test_fshl_constants:
1212
; CHECK-UNEXPAND: # %bb.0:
13-
; CHECK-UNEXPAND-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,5,6,7]
14-
; CHECK-UNEXPAND-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,1,2,3]
15-
; CHECK-UNEXPAND-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
13+
; CHECK-UNEXPAND-NEXT: vpmovsxwd {{.*#+}} xmm0 = [0,512,2048,6144]
1614
; CHECK-UNEXPAND-NEXT: retq
1715
%res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
1816
ret <4 x i32> %res
@@ -79,17 +77,10 @@ define <4 x i32> @test_fshl_none_constant(<4 x i32> %a, <4 x i32> %b, <4 x i32>
7977
}
8078

8179
define <4 x i32> @test_fshr_constants() {
82-
; CHECK-EXPAND-LABEL: test_fshr_constants:
83-
; CHECK-EXPAND: # %bb.0:
84-
; CHECK-EXPAND-NEXT: vmovaps {{.*#+}} xmm0 = [0,8388608,8388608,6291456]
85-
; CHECK-EXPAND-NEXT: retq
86-
;
87-
; CHECK-UNEXPAND-LABEL: test_fshr_constants:
88-
; CHECK-UNEXPAND: # %bb.0:
89-
; CHECK-UNEXPAND-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,1,2,3]
90-
; CHECK-UNEXPAND-NEXT: vpmovsxbd {{.*#+}} xmm0 = [4,5,6,7]
91-
; CHECK-UNEXPAND-NEXT: vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
92-
; CHECK-UNEXPAND-NEXT: retq
80+
; CHECK-LABEL: test_fshr_constants:
81+
; CHECK: # %bb.0:
82+
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [0,8388608,8388608,6291456]
83+
; CHECK-NEXT: retq
9384
%res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> <i32 8, i32 9, i32 10, i32 11>)
9485
ret <4 x i32> %res
9586
}

0 commit comments

Comments
 (0)