@@ -9772,12 +9772,39 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
97729772 MemVT))
97739773 return SDValue();
97749774
9775+ auto IsRotateLoaded = [](
9776+ ArrayRef<int64_t> ByteOffsets, int64_t FirstOffset, unsigned BitWidth) {
9777+ // Ensure that we have the correct width type, we want to combine two 32 loads into a 64 bit load.
9778+ if (BitWidth != 64 || ByteOffsets.size() != 8)
9779+ return false;
9780+
9781+ constexpr unsigned FourBytes = 4;
9782+
9783+ for (unsigned i = 0; i < FourBytes; ++i) {
9784+ // Check the lower 4 bytes come from the higher memory address.
9785+ if (ByteOffsets[i] != FirstOffset + i + FourBytes)
9786+ return false;
9787+ // Check the higher 4 bytes come from the lower memory adderess.
9788+ if (ByteOffsets[i + FourBytes] != FirstOffset + i)
9789+ return false;
9790+ }
9791+ return true;
9792+ };
9793+
97759794 // Check if the bytes of the OR we are looking at match with either big or
97769795 // little endian value load
97779796 std::optional<bool> IsBigEndian = isBigEndian(
97789797 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9779- if (!IsBigEndian)
9780- return SDValue();
9798+
9799+ bool IsRotated = false;
9800+ if (!IsBigEndian) {
9801+ IsRotated =
9802+ IsRotateLoaded(ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes),
9803+ FirstOffset, VT.getSizeInBits());
9804+
9805+ if (!IsRotated)
9806+ return SDValue();
9807+ }
97819808
97829809 assert(FirstByteProvider && "must be set");
97839810
@@ -9791,8 +9818,9 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
97919818 // replace it with a single (possibly zero-extended) load and bswap + shift if
97929819 // needed.
97939820
9794- // If the load needs byte swap check if the target supports it
9795- bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9821+ // If the load needs byte swap check if the target supports it, make sure that
9822+ // we are not rotating.
9823+ bool NeedsBswap = !IsRotated && (IsBigEndianTarget != *IsBigEndian);
97969824
97979825 // Before legalize we can introduce illegal bswaps which will be later
97989826 // converted to an explicit bswap sequence. This way we end up with a single
@@ -9803,8 +9831,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
98039831 !TLI.isOperationLegal(ISD::BSWAP, VT))
98049832 return SDValue();
98059833
9806- // If we need to bswap and zero extend, we have to insert a shift. Check that
9807- // it is legal.
9834+ // If we need to rotate make sure that is legal.
9835+ if (IsRotated && LegalOperations && !TLI.isOperationLegal(ISD::ROTR, VT))
9836+ return SDValue();
9837+
9838+ // If we need to bswap and zero extend, we have to insert a shift. Check
9839+ // thatunsigned Fast = 0; it is legal.
98089840 if (NeedsBswap && NeedsZext && LegalOperations &&
98099841 !TLI.isOperationLegal(ISD::SHL, VT))
98109842 return SDValue();
@@ -9826,15 +9858,33 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
98269858 for (LoadSDNode *L : Loads)
98279859 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
98289860
9829- if (!NeedsBswap)
9861+ // If no transform is needed the return the new load.
9862+ if (!NeedsBswap && !IsRotated)
98309863 return NewLoad;
98319864
9832- SDValue ShiftedLoad =
9833- NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9834- DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9835- VT, SDLoc(N)))
9836- : NewLoad;
9837- return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9865+ // If we detect the need to BSWAP build the new node and return it.
9866+ if (NeedsBswap) {
9867+ SDValue ShiftedLoad =
9868+ NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9869+ DAG.getShiftAmountConstant(
9870+ ZeroExtendedBytes * 8, VT, SDLoc(N)))
9871+ : NewLoad;
9872+ return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9873+ }
9874+
9875+ // If we detect we need to rotate build the new ROTR node.
9876+ if (IsRotated) {
9877+ // The amount to rotate is half that of the size, i.e 32 bits for an i64
9878+ unsigned RotateAmount = VT.getSizeInBits() / 2;
9879+
9880+ EVT ShiftAmountTy =
9881+ TLI.getShiftAmountTy(NewLoad.getValueType(), DAG.getDataLayout());
9882+
9883+ return DAG.getNode(ISD::ROTR, SDLoc(N), VT, NewLoad,
9884+ DAG.getConstant(RotateAmount, SDLoc(N), ShiftAmountTy));
9885+ }
9886+
9887+ llvm_unreachable("Should have returned a transformed load value");
98389888}
98399889
98409890// If the target has andn, bsl, or a similar bit-select instruction,
0 commit comments