From df17560c9f030a5301ed9401ca158bde57e0bbc9 Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Mon, 10 Nov 2025 23:16:46 +0000 Subject: [PATCH 1/7] [DAG] Added check to combine two i32 loads into a single i64 load and rotate. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 76 +++++++++++++++---- .../CodeGen/X86/dagcombine-bswap-to-rotate.ll | 24 ++++++ 2 files changed, 87 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4f2eb1e64dbe0..a36e05145040f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9772,12 +9772,39 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { MemVT)) return SDValue(); + auto IsRotateLoaded = []( + ArrayRef ByteOffsets, int64_t FirstOffset, unsigned BitWidth) { + // Ensure that we have the correct width type, we want to combine two 32 loads into a 64 bit load. + if (BitWidth != 64 || ByteOffsets.size() != 8) + return false; + + constexpr unsigned FourBytes = 4; + + for (unsigned i = 0; i < FourBytes; ++i) { + // Check the lower 4 bytes come from the higher memory address. + if (ByteOffsets[i] != FirstOffset + i + FourBytes) + return false; + // Check the higher 4 bytes come from the lower memory adderess. + if (ByteOffsets[i + FourBytes] != FirstOffset + i) + return false; + } + return true; + }; + // Check if the bytes of the OR we are looking at match with either big or // little endian value load std::optional IsBigEndian = isBigEndian( ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset); - if (!IsBigEndian) - return SDValue(); + + bool IsRotated = false; + if (!IsBigEndian) { + IsRotated = + IsRotateLoaded(ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), + FirstOffset, VT.getSizeInBits()); + + if (!IsRotated) + return SDValue(); + } assert(FirstByteProvider && "must be set"); @@ -9791,8 +9818,9 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // replace it with a single (possibly zero-extended) load and bswap + shift if // needed. - // If the load needs byte swap check if the target supports it - bool NeedsBswap = IsBigEndianTarget != *IsBigEndian; + // If the load needs byte swap check if the target supports it, make sure that + // we are not rotating. + bool NeedsBswap = !IsRotated && (IsBigEndianTarget != *IsBigEndian); // Before legalize we can introduce illegal bswaps which will be later // converted to an explicit bswap sequence. This way we end up with a single @@ -9803,8 +9831,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { !TLI.isOperationLegal(ISD::BSWAP, VT)) return SDValue(); - // If we need to bswap and zero extend, we have to insert a shift. Check that - // it is legal. + // If we need to rotate make sure that is legal. + if (IsRotated && LegalOperations && !TLI.isOperationLegal(ISD::ROTR, VT)) + return SDValue(); + + // If we need to bswap and zero extend, we have to insert a shift. Check + // thatunsigned Fast = 0; it is legal. if (NeedsBswap && NeedsZext && LegalOperations && !TLI.isOperationLegal(ISD::SHL, VT)) return SDValue(); @@ -9826,15 +9858,33 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { for (LoadSDNode *L : Loads) DAG.makeEquivalentMemoryOrdering(L, NewLoad); - if (!NeedsBswap) + // If no transform is needed the return the new load. + if (!NeedsBswap && !IsRotated) return NewLoad; - SDValue ShiftedLoad = - NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad, - DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, - VT, SDLoc(N))) - : NewLoad; - return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad); + // If we detect the need to BSWAP build the new node and return it. + if (NeedsBswap) { + SDValue ShiftedLoad = + NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad, + DAG.getShiftAmountConstant( + ZeroExtendedBytes * 8, VT, SDLoc(N))) + : NewLoad; + return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad); + } + + // If we detect we need to rotate build the new ROTR node. + if (IsRotated) { + // The amount to rotate is half that of the size, i.e 32 bits for an i64 + unsigned RotateAmount = VT.getSizeInBits() / 2; + + EVT ShiftAmountTy = + TLI.getShiftAmountTy(NewLoad.getValueType(), DAG.getDataLayout()); + + return DAG.getNode(ISD::ROTR, SDLoc(N), VT, NewLoad, + DAG.getConstant(RotateAmount, SDLoc(N), ShiftAmountTy)); + } + + llvm_unreachable("Should have returned a transformed load value"); } // If the target has andn, bsl, or a similar bit-select instruction, diff --git a/llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll b/llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll new file mode 100644 index 0000000000000..cdab5835c0b48 --- /dev/null +++ b/llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +; This test checks that a pattern of two 32-bit loads, which are combined +; to form a 64-bit value with swapped words, is optimized into a single +; 64-bit load followed by a 32-bit rotate. + +define i64 @test_load_bswap_to_rotate(ptr %p) { +; CHECK-LABEL: test_load_bswap_to_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: rorq $32, %rax +; CHECK-NEXT: retq +; +; CHECK-NOT: movl + + %p.hi = getelementptr inbounds nuw i8, ptr %p, i64 4 + %lo = load i32, ptr %p + %hi = load i32, ptr %p.hi + %conv = zext i32 %lo to i64 + %shl = shl nuw i64 %conv, 32 + %conv2 = zext i32 %hi to i64 + %or = or disjoint i64 %shl, %conv2 + ret i64 %or +} From cafa243421ae800ecf14c97f2b4ad86bf65f57cb Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Mon, 10 Nov 2025 23:39:09 +0000 Subject: [PATCH 2/7] [DAG] Applied formatting --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 15 ++++++++------- .../CodeGen/X86/dagcombine-bswap-to-rotate.ll | 2 -- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a36e05145040f..13cf404ec1d31 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9772,9 +9772,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { MemVT)) return SDValue(); - auto IsRotateLoaded = []( - ArrayRef ByteOffsets, int64_t FirstOffset, unsigned BitWidth) { - // Ensure that we have the correct width type, we want to combine two 32 loads into a 64 bit load. + auto IsRotateLoaded = [](ArrayRef ByteOffsets, int64_t FirstOffset, + unsigned BitWidth) { + // Ensure that we have the correct width type, we want to combine two 32 + // loads into a 64 bit load. if (BitWidth != 64 || ByteOffsets.size() != 8) return false; @@ -9795,13 +9796,13 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // little endian value load std::optional IsBigEndian = isBigEndian( ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset); - + bool IsRotated = false; if (!IsBigEndian) { IsRotated = - IsRotateLoaded(ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), - FirstOffset, VT.getSizeInBits()); - + IsRotateLoaded(ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), + FirstOffset, VT.getSizeInBits()); + if (!IsRotated) return SDValue(); } diff --git a/llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll b/llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll index cdab5835c0b48..b08f2fbbc56e4 100644 --- a/llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll +++ b/llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll @@ -10,8 +10,6 @@ define i64 @test_load_bswap_to_rotate(ptr %p) { ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: rorq $32, %rax ; CHECK-NEXT: retq -; -; CHECK-NOT: movl %p.hi = getelementptr inbounds nuw i8, ptr %p, i64 4 %lo = load i32, ptr %p From b0dc37e2cab04603c446b8ad2b83a1ab7a3fb234 Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Mon, 10 Nov 2025 23:42:26 +0000 Subject: [PATCH 3/7] [DAG] Fixed typo --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 13cf404ec1d31..4ae239bc58ef4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9859,7 +9859,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { for (LoadSDNode *L : Loads) DAG.makeEquivalentMemoryOrdering(L, NewLoad); - // If no transform is needed the return the new load. + // If no transform is needed then return the new load. if (!NeedsBswap && !IsRotated) return NewLoad; From fc3931d6ad5a12e3e99b521619e258a6b128ed1b Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Tue, 11 Nov 2025 18:22:55 +0000 Subject: [PATCH 4/7] [DAG] Moved test to load-combine.ll --- .../CodeGen/X86/dagcombine-bswap-to-rotate.ll | 22 ------------------- llvm/test/CodeGen/X86/load-combine.ll | 17 ++++++++++++++ 2 files changed, 17 insertions(+), 22 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll diff --git a/llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll b/llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll deleted file mode 100644 index b08f2fbbc56e4..0000000000000 --- a/llvm/test/CodeGen/X86/dagcombine-bswap-to-rotate.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s - -; This test checks that a pattern of two 32-bit loads, which are combined -; to form a 64-bit value with swapped words, is optimized into a single -; 64-bit load followed by a 32-bit rotate. - -define i64 @test_load_bswap_to_rotate(ptr %p) { -; CHECK-LABEL: test_load_bswap_to_rotate: -; CHECK: # %bb.0: -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: rorq $32, %rax -; CHECK-NEXT: retq - - %p.hi = getelementptr inbounds nuw i8, ptr %p, i64 4 - %lo = load i32, ptr %p - %hi = load i32, ptr %p.hi - %conv = zext i32 %lo to i64 - %shl = shl nuw i64 %conv, 32 - %conv2 = zext i32 %hi to i64 - %or = or disjoint i64 %shl, %conv2 - ret i64 %or -} diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll index f21c07599d6f1..46d69a5e6063e 100644 --- a/llvm/test/CodeGen/X86/load-combine.ll +++ b/llvm/test/CodeGen/X86/load-combine.ll @@ -1314,3 +1314,20 @@ define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind { %res = or i32 %e1.ext.shift, %e0.ext ret i32 %res } + +define i64 @test_load_bswap_to_rotate(ptr %p) { +; CHECK64-LABEL: test_load_bswap_to_rotate: +; CHECK64: # %bb.0: +; CHECK64-NEXT: movq (%rdi), %rax +; CHECK64-NEXT: rorq $32, %rax +; CHECK64-NEXT: retq + + %p.hi = getelementptr inbounds nuw i8, ptr %p, i64 4 + %lo = load i32, ptr %p + %hi = load i32, ptr %p.hi + %conv = zext i32 %lo to i64 + %shl = shl nuw i64 %conv, 32 + %conv2 = zext i32 %hi to i64 + %or = or disjoint i64 %shl, %conv2 + ret i64 %or +} From 7d3b32c697c4ec03755b9bdb01aeb12a67dc4709 Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Tue, 11 Nov 2025 22:06:35 +0000 Subject: [PATCH 5/7] [DAG] Implemented code review comments and added zext --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 186 ++++++++++-------- 1 file changed, 99 insertions(+), 87 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4ae239bc58ef4..095b012548da4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9398,6 +9398,35 @@ static std::optional isBigEndian(ArrayRef ByteOffsets, return BigEndian; } +// Determines if multiple bytes loaded into a register +// corresponds to loading a single, contiguous block of bytes from memory and +// then perform a bitwise right rotation. Returns the rotation amount or +// std::nullopt if we can't match the pattern. +static std::optional getRotationAmount(ArrayRef ByteOffsets, + int64_t FirstOffset) { + unsigned ByteWidth = ByteOffsets.size(); + if (ByteWidth == 0) + return std::nullopt; + + int64_t FirstByteActualOffset = ByteOffsets[0]; + int64_t RotateAmtInBytes = FirstByteActualOffset - FirstOffset; + + // Check the rotation amount is valid + if (RotateAmtInBytes < 0 || RotateAmtInBytes >= ByteWidth) + return std::nullopt; + + // Make sure each of the following loads follow the same rotational pattern. + for (unsigned I = 0; I < ByteWidth; ++I) { + int64_t ExpectedOffset = FirstOffset + ((I + RotateAmtInBytes) % ByteWidth); + if (ByteOffsets[I] != ExpectedOffset) { + return std::nullopt; + } + } + + // Return the rotation amount in bits. + return RotateAmtInBytes * 8; +} + // Look through one layer of truncate or extend. static SDValue stripTruncAndExt(SDValue Value) { switch (Value.getOpcode()) { @@ -9772,99 +9801,54 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { MemVT)) return SDValue(); - auto IsRotateLoaded = [](ArrayRef ByteOffsets, int64_t FirstOffset, - unsigned BitWidth) { - // Ensure that we have the correct width type, we want to combine two 32 - // loads into a 64 bit load. - if (BitWidth != 64 || ByteOffsets.size() != 8) - return false; - - constexpr unsigned FourBytes = 4; - - for (unsigned i = 0; i < FourBytes; ++i) { - // Check the lower 4 bytes come from the higher memory address. - if (ByteOffsets[i] != FirstOffset + i + FourBytes) - return false; - // Check the higher 4 bytes come from the lower memory adderess. - if (ByteOffsets[i + FourBytes] != FirstOffset + i) - return false; - } - return true; - }; - // Check if the bytes of the OR we are looking at match with either big or // little endian value load std::optional IsBigEndian = isBigEndian( ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset); - bool IsRotated = false; - if (!IsBigEndian) { - IsRotated = - IsRotateLoaded(ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), - FirstOffset, VT.getSizeInBits()); - - if (!IsRotated) + // Handle the standard load combine. + if (IsBigEndian) { + bool NeedsBswap = IsBigEndianTarget != *IsBigEndian; + + // Before legalize we can introduce illegal bswaps which will be later + // converted to an explicit bswap sequence. This way we end up with a single + // load and byte shuffling instead of several loads and byte shuffling. + // We do not introduce illegal bswaps when zero-extending as this tends to + // introduce too many arithmetic instructions. + if (NeedsBswap && (LegalOperations || NeedsZext) && + !TLI.isOperationLegal(ISD::BSWAP, VT)) return SDValue(); - } - - assert(FirstByteProvider && "must be set"); - - // Ensure that the first byte is loaded from zero offset of the first load. - // So the combined value can be loaded from the first load address. - if (MemoryByteOffset(*FirstByteProvider) != 0) - return SDValue(); - auto *FirstLoad = cast(FirstByteProvider->Src.value()); - - // The node we are looking at matches with the pattern, check if we can - // replace it with a single (possibly zero-extended) load and bswap + shift if - // needed. - - // If the load needs byte swap check if the target supports it, make sure that - // we are not rotating. - bool NeedsBswap = !IsRotated && (IsBigEndianTarget != *IsBigEndian); - - // Before legalize we can introduce illegal bswaps which will be later - // converted to an explicit bswap sequence. This way we end up with a single - // load and byte shuffling instead of several loads and byte shuffling. - // We do not introduce illegal bswaps when zero-extending as this tends to - // introduce too many arithmetic instructions. - if (NeedsBswap && (LegalOperations || NeedsZext) && - !TLI.isOperationLegal(ISD::BSWAP, VT)) - return SDValue(); - // If we need to rotate make sure that is legal. - if (IsRotated && LegalOperations && !TLI.isOperationLegal(ISD::ROTR, VT)) - return SDValue(); + // If we need to bswap and zero extend, we have to insert a shift. Check + // thatunsigned Fast = 0; it is legal. + if (NeedsBswap && NeedsZext && LegalOperations && + !TLI.isOperationLegal(ISD::SHL, VT)) + return SDValue(); - // If we need to bswap and zero extend, we have to insert a shift. Check - // thatunsigned Fast = 0; it is legal. - if (NeedsBswap && NeedsZext && LegalOperations && - !TLI.isOperationLegal(ISD::SHL, VT)) - return SDValue(); + auto *FirstLoad = cast(FirstByteProvider->Src.value()); + if (MemoryByteOffset(*FirstByteProvider) != 0) + return SDValue(); - // Check that a load of the wide type is both allowed and fast on the target - unsigned Fast = 0; - bool Allowed = - TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, - *FirstLoad->getMemOperand(), &Fast); - if (!Allowed || !Fast) - return SDValue(); + // Check that a load of the wide type is both allowed and fast on the target + unsigned Fast = 0; + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, + *FirstLoad->getMemOperand(), &Fast) || + !Fast) + return SDValue(); - SDValue NewLoad = - DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT, - Chain, FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign()); + SDValue NewLoad = DAG.getExtLoad( + NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT, Chain, + FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), MemVT, + FirstLoad->getAlign()); - // Transfer chain users from old loads to the new load. - for (LoadSDNode *L : Loads) - DAG.makeEquivalentMemoryOrdering(L, NewLoad); + for (LoadSDNode *L : Loads) + DAG.makeEquivalentMemoryOrdering(L, NewLoad); - // If no transform is needed then return the new load. - if (!NeedsBswap && !IsRotated) - return NewLoad; + // It is a simple combine. + if (!NeedsBswap) + return NewLoad; - // If we detect the need to BSWAP build the new node and return it. - if (NeedsBswap) { + // It is a BSWAP combine. SDValue ShiftedLoad = NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad, DAG.getShiftAmountConstant( @@ -9873,19 +9857,47 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad); } - // If we detect we need to rotate build the new ROTR node. - if (IsRotated) { - // The amount to rotate is half that of the size, i.e 32 bits for an i64 - unsigned RotateAmount = VT.getSizeInBits() / 2; + // Handle the rotated load combine. + if (auto RotateAmt = getRotationAmount( + ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset)) { + + // Make sure we can rotate + if (LegalOperations && !TLI.isOperationLegal(ISD::ROTR, VT)) + return SDValue(); + + auto *FirstLoad = cast(FirstByteProvider->Src.value()); + if (MemoryByteOffset(*FirstByteProvider) != 0) + return SDValue(); + + // Make sure the operation is legal and fast. + unsigned Fast = 0; + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, + *FirstLoad->getMemOperand(), &Fast) || + !Fast) + return SDValue(); + + // Create the new load, rotate and then zero extend after if we need to. + SDValue NewLoad = + DAG.getLoad(MemVT, SDLoc(N), Chain, FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo()); + + for (LoadSDNode *L : Loads) + DAG.makeEquivalentMemoryOrdering(L, NewLoad); EVT ShiftAmountTy = TLI.getShiftAmountTy(NewLoad.getValueType(), DAG.getDataLayout()); + SDValue Rotated = + DAG.getNode(ISD::ROTR, SDLoc(N), MemVT, NewLoad, + DAG.getConstant(*RotateAmt, SDLoc(N), ShiftAmountTy)); - return DAG.getNode(ISD::ROTR, SDLoc(N), VT, NewLoad, - DAG.getConstant(RotateAmount, SDLoc(N), ShiftAmountTy)); + if (NeedsZext) + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Rotated); + + return Rotated; } - llvm_unreachable("Should have returned a transformed load value"); + // No pattern matched. + return SDValue(); } // If the target has andn, bsl, or a similar bit-select instruction, From 55f06ae61d46e59cb584c20f2fb3af553961dd52 Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Tue, 11 Nov 2025 22:07:06 +0000 Subject: [PATCH 6/7] [DAG] Added test case for zext rotate --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++--- llvm/test/CodeGen/X86/load-combine.ll | 31 +++++++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 095b012548da4..7d324c031528d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9846,7 +9846,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // It is a simple combine. if (!NeedsBswap) - return NewLoad; + return NewLoad; // It is a BSWAP combine. SDValue ShiftedLoad = @@ -9864,7 +9864,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Make sure we can rotate if (LegalOperations && !TLI.isOperationLegal(ISD::ROTR, VT)) return SDValue(); - + auto *FirstLoad = cast(FirstByteProvider->Src.value()); if (MemoryByteOffset(*FirstByteProvider) != 0) return SDValue(); @@ -9890,9 +9890,9 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { DAG.getNode(ISD::ROTR, SDLoc(N), MemVT, NewLoad, DAG.getConstant(*RotateAmt, SDLoc(N), ShiftAmountTy)); - if (NeedsZext) + if (NeedsZext) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Rotated); - + return Rotated; } diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll index 46d69a5e6063e..b8311ad1ad3df 100644 --- a/llvm/test/CodeGen/X86/load-combine.ll +++ b/llvm/test/CodeGen/X86/load-combine.ll @@ -1331,3 +1331,34 @@ define i64 @test_load_bswap_to_rotate(ptr %p) { %or = or disjoint i64 %shl, %conv2 ret i64 %or } + +define i64 @test_load_rotate_zext(ptr %p) { +; CHECK64-LABEL: test_load_rotate_zext: +; CHECK64: # %bb.0: +; CHECK64-NEXT: movl (%rdi), %eax +; CHECK64-NEXT: rorl $8, %eax +; CHECK64-NEXT: retq + %p1 = getelementptr inbounds i8, ptr %p, i64 1 + %l1 = load i8, ptr %p1, align 1 + %e1 = zext i8 %l1 to i64 + + %p2 = getelementptr inbounds i8, ptr %p, i64 2 + %l2 = load i8, ptr %p2, align 1 + %e2 = zext i8 %l2 to i64 + %s2 = shl i64 %e2, 8 + + %p3 = getelementptr inbounds i8, ptr %p, i64 3 + %l3 = load i8, ptr %p3, align 1 + %e3 = zext i8 %l3 to i64 + %s3 = shl i64 %e3, 16 + + %p0 = getelementptr inbounds i8, ptr %p, i64 0 + %l0 = load i8, ptr %p0, align 1 + %e0 = zext i8 %l0 to i64 + %s0 = shl i64 %e0, 24 + + %or1 = or i64 %e1, %s2 + %or2 = or i64 %or1, %s3 + %or3 = or i64 %or2, %s0 + ret i64 %or3 +} From d5a4f6c4956c1d234571b035d56bce24802268b9 Mon Sep 17 00:00:00 2001 From: GrumpyPigSkin Date: Tue, 11 Nov 2025 22:58:55 +0000 Subject: [PATCH 7/7] [DAG] Updated tests --- llvm/test/CodeGen/X86/load-combine.ll | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll index b8311ad1ad3df..3fb8cfe3c81da 100644 --- a/llvm/test/CodeGen/X86/load-combine.ll +++ b/llvm/test/CodeGen/X86/load-combine.ll @@ -1316,8 +1316,15 @@ define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind { } define i64 @test_load_bswap_to_rotate(ptr %p) { +; CHECK-LABEL: test_load_bswap_to_rotate: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl (%eax), %edx +; CHECK-NEXT: movl 4(%eax), %eax +; CHECK-NEXT: retl +; ; CHECK64-LABEL: test_load_bswap_to_rotate: -; CHECK64: # %bb.0: +; CHECK64: # %bb.0: ; CHECK64-NEXT: movq (%rdi), %rax ; CHECK64-NEXT: rorq $32, %rax ; CHECK64-NEXT: retq @@ -1333,10 +1340,18 @@ define i64 @test_load_bswap_to_rotate(ptr %p) { } define i64 @test_load_rotate_zext(ptr %p) { +; CHECK-LABEL: test_load_rotate_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl (%eax), %eax +; CHECK-NEXT: rorl $8, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: retl +; ; CHECK64-LABEL: test_load_rotate_zext: -; CHECK64: # %bb.0: -; CHECK64-NEXT: movl (%rdi), %eax -; CHECK64-NEXT: rorl $8, %eax +; CHECK64: # %bb.0: +; CHECK64-NEXT: movl (%rdi), %eax +; CHECK64-NEXT: rorl $8, %eax ; CHECK64-NEXT: retq %p1 = getelementptr inbounds i8, ptr %p, i64 1 %l1 = load i8, ptr %p1, align 1