Skip to content

Commit 02b82b8

Browse files
davemgreentru
authored andcommitted
[DAG] Ensure more Legal BUILD_VECTOR elements types in shuffle->And combine
This is a followup to D131350, which caused another problem for i64 types being split into i32 on i32 targets. This patch tries to make sure that either Illegal types are OK, or that the element types of a buildvector are legal and bigger than or equal to the size of the original elements. Differential Revision: https://reviews.llvm.org/D131883 (cherry picked from commit dfc95ba)
1 parent 8abe263 commit 02b82b8

File tree

2 files changed

+42
-19
lines changed

2 files changed

+42
-19
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22730,25 +22730,31 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
2273022730
SDLoc DL(N);
2273122731
EVT IntVT = VT.changeVectorElementTypeToInteger();
2273222732
EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
22733-
IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
22734-
SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
22735-
SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
22736-
SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
22737-
for (int I = 0; I != (int)NumElts; ++I)
22738-
if (0 <= Mask[I])
22739-
AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
22740-
22741-
// See if a clear mask is legal instead of going via
22742-
// XformToShuffleWithZero which loses UNDEF mask elements.
22743-
if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
22744-
return DAG.getBitcast(
22745-
VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
22746-
DAG.getConstant(0, DL, IntVT), ClearMask));
22747-
22748-
if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
22749-
return DAG.getBitcast(
22750-
VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
22751-
DAG.getBuildVector(IntVT, DL, AndMask)));
22733+
// Transform the type to a legal type so that the buildvector constant
22734+
// elements are not illegal. Make sure that the result is larger than the
22735+
// original type, incase the value is split into two (eg i64->i32).
22736+
if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
22737+
IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
22738+
if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
22739+
SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
22740+
SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
22741+
SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
22742+
for (int I = 0; I != (int)NumElts; ++I)
22743+
if (0 <= Mask[I])
22744+
AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
22745+
22746+
// See if a clear mask is legal instead of going via
22747+
// XformToShuffleWithZero which loses UNDEF mask elements.
22748+
if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
22749+
return DAG.getBitcast(
22750+
VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
22751+
DAG.getConstant(0, DL, IntVT), ClearMask));
22752+
22753+
if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
22754+
return DAG.getBitcast(
22755+
VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
22756+
DAG.getBuildVector(IntVT, DL, AndMask)));
22757+
}
2275222758
}
2275322759
}
2275422760

llvm/test/CodeGen/ARM/vector-store.ll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,3 +419,20 @@ define void @v3i8store(<3 x i8> *%p) {
419419
store <3 x i8> zeroinitializer, <3 x i8> *%p, align 4
420420
ret void
421421
}
422+
423+
define void @v3i64shuffle(<3 x i64> *%p, <3 x i64> %a) {
424+
; CHECK-LABEL: v3i64shuffle:
425+
; CHECK: @ %bb.0:
426+
; CHECK-NEXT: vmov.i32 q8, #0x0
427+
; CHECK-NEXT: ldrd r12, r1, [sp, #8]
428+
; CHECK-NEXT: vmov d18, r2, r3
429+
; CHECK-NEXT: vorr d19, d16, d16
430+
; CHECK-NEXT: str r1, [r0, #20]
431+
; CHECK-NEXT: vst1.32 {d18, d19}, [r0]!
432+
; CHECK-NEXT: str.w r12, [r0]
433+
; CHECK-NEXT: bx lr
434+
%b = shufflevector <3 x i64> %a, <3 x i64> zeroinitializer, <3 x i32> <i32 0, i32 3, i32 2>
435+
store <3 x i64> %b, <3 x i64> *%p, align 4
436+
ret void
437+
}
438+

0 commit comments

Comments
 (0)