Skip to content

Commit df17560

Browse files
committed
[DAG] Added check to combine two i32 loads into a single i64 load and rotate.
1 parent f2f04c3 commit df17560

File tree

2 files changed

+87
-13
lines changed

2 files changed

+87
-13
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9772,12 +9772,39 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
97729772
MemVT))
97739773
return SDValue();
97749774

9775+
auto IsRotateLoaded = [](
9776+
ArrayRef<int64_t> ByteOffsets, int64_t FirstOffset, unsigned BitWidth) {
9777+
// Ensure that we have the correct width type, we want to combine two 32 loads into a 64 bit load.
9778+
if (BitWidth != 64 || ByteOffsets.size() != 8)
9779+
return false;
9780+
9781+
constexpr unsigned FourBytes = 4;
9782+
9783+
for (unsigned i = 0; i < FourBytes; ++i) {
9784+
// Check the lower 4 bytes come from the higher memory address.
9785+
if (ByteOffsets[i] != FirstOffset + i + FourBytes)
9786+
return false;
9787+
// Check the higher 4 bytes come from the lower memory adderess.
9788+
if (ByteOffsets[i + FourBytes] != FirstOffset + i)
9789+
return false;
9790+
}
9791+
return true;
9792+
};
9793+
97759794
// Check if the bytes of the OR we are looking at match with either big or
97769795
// little endian value load
97779796
std::optional<bool> IsBigEndian = isBigEndian(
97789797
ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9779-
if (!IsBigEndian)
9780-
return SDValue();
9798+
9799+
bool IsRotated = false;
9800+
if (!IsBigEndian) {
9801+
IsRotated =
9802+
IsRotateLoaded(ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes),
9803+
FirstOffset, VT.getSizeInBits());
9804+
9805+
if (!IsRotated)
9806+
return SDValue();
9807+
}
97819808

97829809
assert(FirstByteProvider && "must be set");
97839810

@@ -9791,8 +9818,9 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
97919818
// replace it with a single (possibly zero-extended) load and bswap + shift if
97929819
// needed.
97939820

9794-
// If the load needs byte swap check if the target supports it
9795-
bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9821+
// If the load needs byte swap check if the target supports it, make sure that
9822+
// we are not rotating.
9823+
bool NeedsBswap = !IsRotated && (IsBigEndianTarget != *IsBigEndian);
97969824

97979825
// Before legalize we can introduce illegal bswaps which will be later
97989826
// converted to an explicit bswap sequence. This way we end up with a single
@@ -9803,8 +9831,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
98039831
!TLI.isOperationLegal(ISD::BSWAP, VT))
98049832
return SDValue();
98059833

9806-
// If we need to bswap and zero extend, we have to insert a shift. Check that
9807-
// it is legal.
9834+
// If we need to rotate make sure that is legal.
9835+
if (IsRotated && LegalOperations && !TLI.isOperationLegal(ISD::ROTR, VT))
9836+
return SDValue();
9837+
9838+
// If we need to bswap and zero extend, we have to insert a shift. Check
9839+
// thatunsigned Fast = 0; it is legal.
98089840
if (NeedsBswap && NeedsZext && LegalOperations &&
98099841
!TLI.isOperationLegal(ISD::SHL, VT))
98109842
return SDValue();
@@ -9826,15 +9858,33 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
98269858
for (LoadSDNode *L : Loads)
98279859
DAG.makeEquivalentMemoryOrdering(L, NewLoad);
98289860

9829-
if (!NeedsBswap)
9861+
// If no transform is needed the return the new load.
9862+
if (!NeedsBswap && !IsRotated)
98309863
return NewLoad;
98319864

9832-
SDValue ShiftedLoad =
9833-
NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9834-
DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9835-
VT, SDLoc(N)))
9836-
: NewLoad;
9837-
return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9865+
// If we detect the need to BSWAP build the new node and return it.
9866+
if (NeedsBswap) {
9867+
SDValue ShiftedLoad =
9868+
NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9869+
DAG.getShiftAmountConstant(
9870+
ZeroExtendedBytes * 8, VT, SDLoc(N)))
9871+
: NewLoad;
9872+
return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9873+
}
9874+
9875+
// If we detect we need to rotate build the new ROTR node.
9876+
if (IsRotated) {
9877+
// The amount to rotate is half that of the size, i.e 32 bits for an i64
9878+
unsigned RotateAmount = VT.getSizeInBits() / 2;
9879+
9880+
EVT ShiftAmountTy =
9881+
TLI.getShiftAmountTy(NewLoad.getValueType(), DAG.getDataLayout());
9882+
9883+
return DAG.getNode(ISD::ROTR, SDLoc(N), VT, NewLoad,
9884+
DAG.getConstant(RotateAmount, SDLoc(N), ShiftAmountTy));
9885+
}
9886+
9887+
llvm_unreachable("Should have returned a transformed load value");
98389888
}
98399889

98409890
// If the target has andn, bsl, or a similar bit-select instruction,
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
2+
3+
; This test checks that a pattern of two 32-bit loads, which are combined
4+
; to form a 64-bit value with swapped words, is optimized into a single
5+
; 64-bit load followed by a 32-bit rotate.
6+
7+
define i64 @test_load_bswap_to_rotate(ptr %p) {
8+
; CHECK-LABEL: test_load_bswap_to_rotate:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: movq (%rdi), %rax
11+
; CHECK-NEXT: rorq $32, %rax
12+
; CHECK-NEXT: retq
13+
;
14+
; CHECK-NOT: movl
15+
16+
%p.hi = getelementptr inbounds nuw i8, ptr %p, i64 4
17+
%lo = load i32, ptr %p
18+
%hi = load i32, ptr %p.hi
19+
%conv = zext i32 %lo to i64
20+
%shl = shl nuw i64 %conv, 32
21+
%conv2 = zext i32 %hi to i64
22+
%or = or disjoint i64 %shl, %conv2
23+
ret i64 %or
24+
}

0 commit comments

Comments
 (0)