From 3e7e7081b9cd03a9f2f1191b08973b9602bf30f8 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Fri, 28 Feb 2025 13:23:43 -0300 Subject: [PATCH 01/29] [SelectionDAG] Avoid store merging across function calls This patch improves DAGCombiner's handling of potential store merges by detecting function calls between loads and stores. When a function call exists in the chain between a load and its corresponding store, we avoid merging these stores as it would require costly register spilling. Currently it's only enabled for riscv. --- llvm/include/llvm/CodeGen/TargetLowering.h | 4 ++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 44 +++++++++++++++++-- llvm/lib/Target/RISCV/RISCVISelLowering.h | 6 +++ 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 2089d47e9cbc8..5e61c1f1a9687 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3506,6 +3506,10 @@ class TargetLoweringBase { /// The default implementation just freezes the set of reserved registers. virtual void finalizeLowering(MachineFunction &MF) const; + /// Returns true if it's profitable to allow merging store of loads when there + /// are functions calls between the load and the store. + virtual bool shouldMergeStoreOfLoadsOverCall() const { return true; } + //===----------------------------------------------------------------------===// // GlobalISel Hooks //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ef5f2210573e0..42d972b3a1db1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21363,8 +21363,38 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, // must not be zext, volatile, indexed, and they must be consecutive. BaseIndexOffset LdBasePtr; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *St = cast(StoreNodes[i].MemNode); + // Check if a call exists in the store chain. + auto HasCallInLdStChain = [](SDNode *Load, SDNode *Store) { + bool FoundCall = false; + SmallVector Nodes = {Store->getOperand(0).getNode()}; + while (!Nodes.empty()) { + SDNode *Node = Nodes.pop_back_val(); + if (Node->getNumOperands() == 0) + continue; + + switch (Node->getOpcode()) { + case ISD::TokenFactor: + for (unsigned Nops = Node->getNumOperands(); Nops;) + Nodes.push_back(Node->getOperand(--Nops).getNode()); + break; + case ISD::CALLSEQ_START: + FoundCall = true; + break; + case ISD::LOAD: + if (Node == Load) + return false; + [[fallthrough]]; + default: + Nodes.push_back(Node->getOperand(0).getNode()); + break; + } + } + return FoundCall; + }; + + auto StIt = StoreNodes.begin(); + while (StIt != StoreNodes.end()) { + StoreSDNode *St = cast(StIt->MemNode); SDValue Val = peekThroughBitcasts(St->getValue()); LoadSDNode *Ld = cast(Val); @@ -21380,8 +21410,14 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, LdBasePtr = LdPtr; } - // We found a potential memory operand to merge. - LoadNodes.push_back(MemOpLink(Ld, LdOffset)); + // Check if there is a call in the load/store chain. + if (!TLI.shouldMergeStoreOfLoadsOverCall() && + HasCallInLdStChain(Ld, St)) { + StIt = StoreNodes.erase(StIt); + } else { + LoadNodes.push_back(MemOpLink(Ld, LdOffset)); + ++StIt; + } } while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index ffbc14a29006c..d52d92eb581ee 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -1070,6 +1070,12 @@ class RISCVTargetLowering : public TargetLowering { return false; } + /// Disables storing and loading vectors when there are function calls between + /// the load and store, since these are more expensive than just using scalars + bool shouldMergeStoreOfLoadsOverCall() const override { + return false; + } + /// For available scheduling models FDIV + two independent FMULs are much /// faster than two FDIVs. unsigned combineRepeatedFPDivisors() const override; From 5b1fc65565deb0d655005bfca17b68e20a38cb05 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Mon, 3 Mar 2025 21:11:15 -0300 Subject: [PATCH 02/29] Check call_end instead of start Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 42d972b3a1db1..434a7f92c93b5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21377,7 +21377,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, for (unsigned Nops = Node->getNumOperands(); Nops;) Nodes.push_back(Node->getOperand(--Nops).getNode()); break; - case ISD::CALLSEQ_START: + case ISD::CALLSEQ_END: FoundCall = true; break; case ISD::LOAD: From be40ec2d536dd5750f0adf2288c9b6d5d6e15e5c Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Mon, 3 Mar 2025 21:11:33 -0300 Subject: [PATCH 03/29] Only walk over mem operation chains Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 434a7f92c93b5..4374811fe4930 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21384,8 +21384,9 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, if (Node == Load) return false; [[fallthrough]]; - default: + case ISD::STORE: Nodes.push_back(Node->getOperand(0).getNode()); + default: break; } } From c3298ffa4b8c1702d9dc4f32c9ac4ac3d1e24188 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Mon, 3 Mar 2025 21:12:14 -0300 Subject: [PATCH 04/29] Don't go over NumConsecutiveStores Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4374811fe4930..75ce5b9cd2595 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21394,7 +21394,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, }; auto StIt = StoreNodes.begin(); - while (StIt != StoreNodes.end()) { + unsigned i = 0; + while (StIt != StoreNodes.end() && i < NumConsecutiveStores) { StoreSDNode *St = cast(StIt->MemNode); SDValue Val = peekThroughBitcasts(St->getValue()); LoadSDNode *Ld = cast(Val); @@ -21412,13 +21413,13 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, } // Check if there is a call in the load/store chain. - if (!TLI.shouldMergeStoreOfLoadsOverCall() && - HasCallInLdStChain(Ld, St)) { + if (!TLI.shouldMergeStoreOfLoadsOverCall() && HasCallInLdStChain(Ld, St)) { StIt = StoreNodes.erase(StIt); } else { LoadNodes.push_back(MemOpLink(Ld, LdOffset)); ++StIt; } + ++i; } while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { From b56d1b130bfa3f5533fe72b901ed8fb011f3459d Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Tue, 4 Mar 2025 14:09:23 -0300 Subject: [PATCH 05/29] Use SDValues Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 75ce5b9cd2595..bb987d5f8811a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21366,16 +21366,15 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, // Check if a call exists in the store chain. auto HasCallInLdStChain = [](SDNode *Load, SDNode *Store) { bool FoundCall = false; - SmallVector Nodes = {Store->getOperand(0).getNode()}; - while (!Nodes.empty()) { - SDNode *Node = Nodes.pop_back_val(); + SmallVector Values = {Store->getOperand(0)}; + while (!Values.empty()) { + SDNode *Node = Values.pop_back_val().getNode(); if (Node->getNumOperands() == 0) continue; switch (Node->getOpcode()) { case ISD::TokenFactor: - for (unsigned Nops = Node->getNumOperands(); Nops;) - Nodes.push_back(Node->getOperand(--Nops).getNode()); + append_range(Values, Node->op_values()); break; case ISD::CALLSEQ_END: FoundCall = true; @@ -21385,7 +21384,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, return false; [[fallthrough]]; case ISD::STORE: - Nodes.push_back(Node->getOperand(0).getNode()); + Values.push_back(Node->getOperand(0)); default: break; } From 82420c71c3d4a279376faf752c4554204c68020e Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Tue, 4 Mar 2025 14:09:36 -0300 Subject: [PATCH 06/29] Added fallthrough Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bb987d5f8811a..6209a3e8aa4a7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21385,6 +21385,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, [[fallthrough]]; case ISD::STORE: Values.push_back(Node->getOperand(0)); + [[fallthrough]]; default: break; } From 96c8e5366ec2defe12c0a1e16c3bc2e7ab256079 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Tue, 4 Mar 2025 21:58:18 -0300 Subject: [PATCH 07/29] Add Visited list to cache the walk Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6209a3e8aa4a7..43c0d0f141989 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21365,16 +21365,20 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, // Check if a call exists in the store chain. auto HasCallInLdStChain = [](SDNode *Load, SDNode *Store) { + SmallPtrSet Visited; + SmallVector Worklist; + Worklist.push_back(Store->getOperand(0).getNode()); + bool FoundCall = false; - SmallVector Values = {Store->getOperand(0)}; - while (!Values.empty()) { - SDNode *Node = Values.pop_back_val().getNode(); - if (Node->getNumOperands() == 0) + while (!Worklist.empty()) { + auto Node = Worklist.pop_back_val(); + if (!Visited.insert(Node).second || Node->getNumOperands() == 0) continue; switch (Node->getOpcode()) { case ISD::TokenFactor: - append_range(Values, Node->op_values()); + for (SDValue Op : Node->ops()) + Worklist.push_back(Op.getNode()); break; case ISD::CALLSEQ_END: FoundCall = true; @@ -21384,7 +21388,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, return false; [[fallthrough]]; case ISD::STORE: - Values.push_back(Node->getOperand(0)); + Worklist.push_back(Node->getOperand(0).getNode()); [[fallthrough]]; default: break; From 35743700d5aa4e514f55ba266deda8efe81fa885 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Tue, 4 Mar 2025 22:44:58 -0300 Subject: [PATCH 08/29] Moved increment Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 43c0d0f141989..f898e9e49c059 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21399,7 +21399,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, auto StIt = StoreNodes.begin(); unsigned i = 0; - while (StIt != StoreNodes.end() && i < NumConsecutiveStores) { + while (StIt != StoreNodes.end() && i++ < NumConsecutiveStores) { StoreSDNode *St = cast(StIt->MemNode); SDValue Val = peekThroughBitcasts(St->getValue()); LoadSDNode *Ld = cast(Val); @@ -21423,7 +21423,6 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, LoadNodes.push_back(MemOpLink(Ld, LdOffset)); ++StIt; } - ++i; } while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { From f9393d5c208441c640389503f87b713a01437219 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Tue, 4 Mar 2025 23:19:38 -0300 Subject: [PATCH 09/29] Updated test case Signed-off-by: Mikhail R. Gadelha --- .../CodeGen/RISCV/stores-of-loads-merging.ll | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll index b2be401b4676f..71bb4d5f41e7d 100644 --- a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll +++ b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll @@ -13,40 +13,40 @@ define void @f(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) { ; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: sub sp, sp, a6 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb +; CHECK-NEXT: .cfi_offset s3, -40 +; CHECK-NEXT: .cfi_offset s4, -48 ; CHECK-NEXT: mv s0, a5 ; CHECK-NEXT: mv s1, a4 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vse64.v v8, (a1) -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: ld s3, 0(a2) +; CHECK-NEXT: ld s4, 8(a2) ; CHECK-NEXT: mv s2, a3 ; CHECK-NEXT: call g -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: sd s3, 0(s2) +; CHECK-NEXT: sd s4, 8(s2) ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vse64.v v8, (s2) ; CHECK-NEXT: vle64.v v8, (s1) ; CHECK-NEXT: vse64.v v8, (s0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 48 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: .cfi_restore ra ; CHECK-NEXT: .cfi_restore s0 ; CHECK-NEXT: .cfi_restore s1 ; CHECK-NEXT: .cfi_restore s2 +; CHECK-NEXT: .cfi_restore s3 +; CHECK-NEXT: .cfi_restore s4 ; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret From d86ec01d8494974d3d41e48a9081e1e8bc158b0a Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Wed, 5 Mar 2025 14:31:53 -0300 Subject: [PATCH 10/29] Enable merge by default for scalars Signed-off-by: Mikhail R. Gadelha --- llvm/include/llvm/CodeGen/TargetLowering.h | 2 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 ++- llvm/lib/Target/RISCV/RISCVISelLowering.h | 9 +++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 5e61c1f1a9687..f8dd6cdd6aec8 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3508,7 +3508,7 @@ class TargetLoweringBase { /// Returns true if it's profitable to allow merging store of loads when there /// are functions calls between the load and the store. - virtual bool shouldMergeStoreOfLoadsOverCall() const { return true; } + virtual bool shouldMergeStoreOfLoadsOverCall(EVT) const { return true; } //===----------------------------------------------------------------------===// // GlobalISel Hooks diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f898e9e49c059..11b6d516135ec 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21417,7 +21417,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, } // Check if there is a call in the load/store chain. - if (!TLI.shouldMergeStoreOfLoadsOverCall() && HasCallInLdStChain(Ld, St)) { + if (!TLI.shouldMergeStoreOfLoadsOverCall(MemVT) && + HasCallInLdStChain(Ld, St)) { StIt = StoreNodes.erase(StIt); } else { LoadNodes.push_back(MemOpLink(Ld, LdOffset)); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index d52d92eb581ee..658d1bce2cf6e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -1070,10 +1070,11 @@ class RISCVTargetLowering : public TargetLowering { return false; } - /// Disables storing and loading vectors when there are function calls between - /// the load and store, since these are more expensive than just using scalars - bool shouldMergeStoreOfLoadsOverCall() const override { - return false; + /// Disables storing and loading vectors by default when there are function + /// calls between the load and store, since these are more expensive than just + /// using scalars + bool shouldMergeStoreOfLoadsOverCall(EVT VT) const override { + return VT.isScalarInteger(); } /// For available scheduling models FDIV + two independent FMULs are much From 04bca6d28a55df2da7714f7b2f8105e502d32baa Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Wed, 5 Mar 2025 14:32:54 -0300 Subject: [PATCH 11/29] Rewrite walk back algo to keep track of calls found Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 11b6d516135ec..9ea06d70b36a0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21366,35 +21366,33 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, // Check if a call exists in the store chain. auto HasCallInLdStChain = [](SDNode *Load, SDNode *Store) { SmallPtrSet Visited; - SmallVector Worklist; - Worklist.push_back(Store->getOperand(0).getNode()); + SmallVector, 8> Worklist; + Worklist.emplace_back(Store->getOperand(0).getNode(), false); - bool FoundCall = false; while (!Worklist.empty()) { - auto Node = Worklist.pop_back_val(); + auto [Node, FoundCall] = Worklist.pop_back_val(); if (!Visited.insert(Node).second || Node->getNumOperands() == 0) continue; switch (Node->getOpcode()) { + case ISD::CALLSEQ_END: + Worklist.emplace_back(Node->getOperand(0).getNode(), true); + break; case ISD::TokenFactor: for (SDValue Op : Node->ops()) - Worklist.push_back(Op.getNode()); - break; - case ISD::CALLSEQ_END: - FoundCall = true; + Worklist.emplace_back(Op.getNode(), FoundCall); break; case ISD::LOAD: if (Node == Load) - return false; - [[fallthrough]]; - case ISD::STORE: - Worklist.push_back(Node->getOperand(0).getNode()); + return FoundCall; [[fallthrough]]; default: + if (Node->getNumOperands() > 0) + Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall); break; } } - return FoundCall; + return false; }; auto StIt = StoreNodes.begin(); From f27092ff009d14ac87bde4118e91429fc009e6a6 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Thu, 6 Mar 2025 12:10:04 -0300 Subject: [PATCH 12/29] Check final type before we prevent merges Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 98 ++++++++++--------- 1 file changed, 54 insertions(+), 44 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9ea06d70b36a0..85b3682318e32 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21363,42 +21363,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, // must not be zext, volatile, indexed, and they must be consecutive. BaseIndexOffset LdBasePtr; - // Check if a call exists in the store chain. - auto HasCallInLdStChain = [](SDNode *Load, SDNode *Store) { - SmallPtrSet Visited; - SmallVector, 8> Worklist; - Worklist.emplace_back(Store->getOperand(0).getNode(), false); - - while (!Worklist.empty()) { - auto [Node, FoundCall] = Worklist.pop_back_val(); - if (!Visited.insert(Node).second || Node->getNumOperands() == 0) - continue; - - switch (Node->getOpcode()) { - case ISD::CALLSEQ_END: - Worklist.emplace_back(Node->getOperand(0).getNode(), true); - break; - case ISD::TokenFactor: - for (SDValue Op : Node->ops()) - Worklist.emplace_back(Op.getNode(), FoundCall); - break; - case ISD::LOAD: - if (Node == Load) - return FoundCall; - [[fallthrough]]; - default: - if (Node->getNumOperands() > 0) - Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall); - break; - } - } - return false; - }; - - auto StIt = StoreNodes.begin(); - unsigned i = 0; - while (StIt != StoreNodes.end() && i++ < NumConsecutiveStores) { - StoreSDNode *St = cast(StIt->MemNode); + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *St = cast(StoreNodes[i].MemNode); SDValue Val = peekThroughBitcasts(St->getValue()); LoadSDNode *Ld = cast(Val); @@ -21414,14 +21380,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, LdBasePtr = LdPtr; } - // Check if there is a call in the load/store chain. - if (!TLI.shouldMergeStoreOfLoadsOverCall(MemVT) && - HasCallInLdStChain(Ld, St)) { - StIt = StoreNodes.erase(StIt); - } else { - LoadNodes.push_back(MemOpLink(Ld, LdOffset)); - ++StIt; - } + // We found a potential memory operand to merge. + LoadNodes.push_back(MemOpLink(Ld, LdOffset)); } while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { @@ -21593,6 +21553,56 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); } + auto HasCallInLdStChain = [](SmallVectorImpl &StoreNodes, + SmallVectorImpl &LoadNodes, + unsigned NumStores) { + for (unsigned i = 0; i < NumStores; ++i) { + StoreSDNode *St = cast(StoreNodes[i].MemNode); + SDValue Val = peekThroughBitcasts(St->getValue()); + LoadSDNode *Ld = cast(Val); + assert(Ld == LoadNodes[i].MemNode && "Load and store mismatch"); + + SmallPtrSet Visited; + SmallVector, 8> Worklist; + Worklist.emplace_back(St->getOperand(0).getNode(), false); + + while (!Worklist.empty()) { + auto [Node, FoundCall] = Worklist.pop_back_val(); + if (!Visited.insert(Node).second || Node->getNumOperands() == 0) + continue; + + switch (Node->getOpcode()) { + case ISD::CALLSEQ_END: + Worklist.emplace_back(Node->getOperand(0).getNode(), true); + break; + case ISD::TokenFactor: + for (SDValue Op : Node->ops()) + Worklist.emplace_back(Op.getNode(), FoundCall); + break; + case ISD::LOAD: + if (Node == Ld) + return FoundCall; + [[fallthrough]]; + default: + if (Node->getNumOperands() > 0) + Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall); + break; + } + } + return false; + } + return false; + }; + + // Check if there is a call in the load/store chain. + if (!TLI.shouldMergeStoreOfLoadsOverCall(JointMemOpVT) && + HasCallInLdStChain(StoreNodes, LoadNodes, NumElem)) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + continue; + } + SDLoc LoadDL(LoadNodes[0].MemNode); SDLoc StoreDL(StoreNodes[0].MemNode); From 9faa629fd23dcf359d6d9d66d8e68f9d90c461fe Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Mon, 17 Mar 2025 11:43:52 -0300 Subject: [PATCH 13/29] No need to check operands. It's checked in the start of the loop Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 85b3682318e32..7804b74579eba 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21584,8 +21584,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, return FoundCall; [[fallthrough]]; default: - if (Node->getNumOperands() > 0) - Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall); + Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall); break; } } From b326da182c749a20b9f3fce971bb7951d00d31a2 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Mon, 17 Mar 2025 11:46:54 -0300 Subject: [PATCH 14/29] Assert operand type Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7804b74579eba..2618781c7abb4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21584,6 +21584,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, return FoundCall; [[fallthrough]]; default: + assert(Node->getOperand(0).getValueType() == MVT::Other && + "Invalid chain type"); Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall); break; } From c8580206962240caa157bfe6c8f48b3aa360ea22 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Mon, 17 Mar 2025 12:26:50 -0300 Subject: [PATCH 15/29] Moved peekThroughBitcasts into an assertion Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2618781c7abb4..04a860fb6fade 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21558,9 +21558,9 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, unsigned NumStores) { for (unsigned i = 0; i < NumStores; ++i) { StoreSDNode *St = cast(StoreNodes[i].MemNode); - SDValue Val = peekThroughBitcasts(St->getValue()); - LoadSDNode *Ld = cast(Val); - assert(Ld == LoadNodes[i].MemNode && "Load and store mismatch"); + LoadSDNode *Ld = cast(LoadNodes[i].MemNode); + assert(Ld == cast(peekThroughBitcasts(St->getValue())) && + "Load and store mismatch"); SmallPtrSet Visited; SmallVector, 8> Worklist; From b6b15211ebdca0c57f03dd74ae61ad4b5b975f06 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Mon, 17 Mar 2025 14:17:22 -0300 Subject: [PATCH 16/29] Use getChain instead of accessing the operand 0 Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 04a860fb6fade..cc4ed68dc0da2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21564,7 +21564,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, SmallPtrSet Visited; SmallVector, 8> Worklist; - Worklist.emplace_back(St->getOperand(0).getNode(), false); + Worklist.emplace_back(St->getChain().getNode(), false); while (!Worklist.empty()) { auto [Node, FoundCall] = Worklist.pop_back_val(); From 18e68eaf897a717d37cc10e61f91d14bcf61593f Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Mon, 17 Mar 2025 22:42:49 -0300 Subject: [PATCH 17/29] Make hasCallInLdStChain a member function --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 92 ++++++++++--------- 1 file changed, 49 insertions(+), 43 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cc4ed68dc0da2..bbf04eca3eff3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -792,6 +792,12 @@ namespace { SmallVectorImpl &StoreNodes, unsigned NumStores, SDNode *RootNode); + /// Helper function for tryStoreMergeOfLoads. Checks if the load/store + /// chain has a call in it. \return True if a call is found. + bool hasCallInLdStChain(SmallVectorImpl &StoreNodes, + SmallVectorImpl &LoadNodes, + unsigned NumStores); + /// This is a helper function for mergeConsecutiveStores. Given a list of /// store candidates, find the first N that are consecutive in memory. /// Returns 0 if there are not at least 2 consecutive stores to try merging. @@ -21107,6 +21113,48 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( return true; } +bool DAGCombiner::hasCallInLdStChain(SmallVectorImpl &StoreNodes, + SmallVectorImpl &LoadNodes, + unsigned NumStores) { + for (unsigned i = 0; i < NumStores; ++i) { + StoreSDNode *St = cast(StoreNodes[i].MemNode); + LoadSDNode *Ld = cast(LoadNodes[i].MemNode); + assert(Ld == cast(peekThroughBitcasts(St->getValue())) && + "Load and store mismatch"); + + SmallPtrSet Visited; + SmallVector, 8> Worklist; + Worklist.emplace_back(St->getChain().getNode(), false); + + while (!Worklist.empty()) { + auto [Node, FoundCall] = Worklist.pop_back_val(); + if (!Visited.insert(Node).second || Node->getNumOperands() == 0) + continue; + + switch (Node->getOpcode()) { + case ISD::CALLSEQ_END: + Worklist.emplace_back(Node->getOperand(0).getNode(), true); + break; + case ISD::TokenFactor: + for (SDValue Op : Node->ops()) + Worklist.emplace_back(Op.getNode(), FoundCall); + break; + case ISD::LOAD: + if (Node == Ld) + return FoundCall; + [[fallthrough]]; + default: + assert(Node->getOperand(0).getValueType() == MVT::Other && + "Invalid chain type"); + Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall); + break; + } + } + return false; + } + return false; +} + unsigned DAGCombiner::getConsecutiveStores(SmallVectorImpl &StoreNodes, int64_t ElementSizeBytes) const { @@ -21553,51 +21601,9 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); } - auto HasCallInLdStChain = [](SmallVectorImpl &StoreNodes, - SmallVectorImpl &LoadNodes, - unsigned NumStores) { - for (unsigned i = 0; i < NumStores; ++i) { - StoreSDNode *St = cast(StoreNodes[i].MemNode); - LoadSDNode *Ld = cast(LoadNodes[i].MemNode); - assert(Ld == cast(peekThroughBitcasts(St->getValue())) && - "Load and store mismatch"); - - SmallPtrSet Visited; - SmallVector, 8> Worklist; - Worklist.emplace_back(St->getChain().getNode(), false); - - while (!Worklist.empty()) { - auto [Node, FoundCall] = Worklist.pop_back_val(); - if (!Visited.insert(Node).second || Node->getNumOperands() == 0) - continue; - - switch (Node->getOpcode()) { - case ISD::CALLSEQ_END: - Worklist.emplace_back(Node->getOperand(0).getNode(), true); - break; - case ISD::TokenFactor: - for (SDValue Op : Node->ops()) - Worklist.emplace_back(Op.getNode(), FoundCall); - break; - case ISD::LOAD: - if (Node == Ld) - return FoundCall; - [[fallthrough]]; - default: - assert(Node->getOperand(0).getValueType() == MVT::Other && - "Invalid chain type"); - Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall); - break; - } - } - return false; - } - return false; - }; - // Check if there is a call in the load/store chain. if (!TLI.shouldMergeStoreOfLoadsOverCall(JointMemOpVT) && - HasCallInLdStChain(StoreNodes, LoadNodes, NumElem)) { + hasCallInLdStChain(StoreNodes, LoadNodes, NumElem)) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); NumConsecutiveStores -= NumElem; From 3bc2b224b1aa230adf0217bc1041c8fe6da3abf2 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Tue, 18 Mar 2025 15:37:17 -0300 Subject: [PATCH 18/29] Added test case Signed-off-by: Mikhail R. Gadelha --- .../CodeGen/RISCV/stores-of-loads-merging.ll | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll index 71bb4d5f41e7d..d697f911165b9 100644 --- a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll +++ b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll @@ -90,6 +90,41 @@ define void @f1(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) { store i64 %x0, ptr %q %q.1 = getelementptr i64, ptr %q, i64 1 store i64 %x1, ptr %q.1 + ret void +} +define void @i8_i16(ptr %p, ptr %q) { +; CHECK-LABEL: i8_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: .cfi_offset s1, -24 +; CHECK-NEXT: lh s1, 0(a0) +; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: call g +; CHECK-NEXT: sh s1, 0(s0) +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: .cfi_restore ra +; CHECK-NEXT: .cfi_restore s0 +; CHECK-NEXT: .cfi_restore s1 +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: ret + %p0 = getelementptr i8, ptr %p, i64 0 + %p1 = getelementptr i8, ptr %p, i64 1 + %x0 = load i8, ptr %p0, align 2 + %x1 = load i8, ptr %p1 + call void @g() + %q0 = getelementptr i8, ptr %q, i64 0 + %q1 = getelementptr i8, ptr %q, i64 1 + store i8 %x0, ptr %q0, align 2 + store i8 %x1, ptr %q1 ret void } From 904641ffb553145c38c0051be8a6c8c7812d0f26 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Wed, 19 Mar 2025 13:06:37 -0300 Subject: [PATCH 19/29] Removed duplicated test after merge Signed-off-by: Mikhail R. Gadelha --- .../CodeGen/RISCV/stores-of-loads-merging.ll | 36 ------------------- 1 file changed, 36 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll index 031d1d74cb9e7..d722dadc71565 100644 --- a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll +++ b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll @@ -95,42 +95,6 @@ define void @f1(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) { ret void } -define void @i8_i16(ptr %p, ptr %q) { -; CHECK-LABEL: i8_i16: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset ra, -8 -; CHECK-NEXT: .cfi_offset s0, -16 -; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: lh s1, 0(a0) -; CHECK-NEXT: mv s0, a1 -; CHECK-NEXT: call g -; CHECK-NEXT: sh s1, 0(s0) -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: .cfi_restore ra -; CHECK-NEXT: .cfi_restore s0 -; CHECK-NEXT: .cfi_restore s1 -; CHECK-NEXT: addi sp, sp, 32 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret - %p0 = getelementptr i8, ptr %p, i64 0 - %p1 = getelementptr i8, ptr %p, i64 1 - %x0 = load i8, ptr %p0, align 2 - %x1 = load i8, ptr %p1 - call void @g() - %q0 = getelementptr i8, ptr %q, i64 0 - %q1 = getelementptr i8, ptr %q, i64 1 - store i8 %x0, ptr %q0, align 2 - store i8 %x1, ptr %q1 - ret void -} - ; Merging scalars is profitable, it reduces pressure within a single ; register class. define void @i8_i16(ptr %p, ptr %q) { From a255f16764cf8447e2c30d5c0579690788617951 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Wed, 19 Mar 2025 13:12:37 -0300 Subject: [PATCH 20/29] No need to declare intrinsics anymore Signed-off-by: Mikhail R. Gadelha --- llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll index d722dadc71565..95303f63f0776 100644 --- a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll +++ b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s -declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) declare void @g() ; TODO: Merging scalars into vectors is unprofitable because we have no From 75f4caa77cbea0cde909f77345c42d371edb8fb8 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Wed, 19 Mar 2025 13:15:59 -0300 Subject: [PATCH 21/29] Removed unused args Signed-off-by: Mikhail R. Gadelha --- llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll index 95303f63f0776..15f93954f559e 100644 --- a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll +++ b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll @@ -76,13 +76,13 @@ define void @f(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) { ret void } -define void @f1(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) { +define void @f1(ptr %p, ptr %q, double %t) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a2) +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: fcvt.wu.d a0, fa0, rtz -; CHECK-NEXT: vse64.v v8, (a3) +; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: ret %x0 = load i64, ptr %p %p.1 = getelementptr i64, ptr %p, i64 1 From de96633b1f048cf0c0737e1de924dc531a4a2625 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Wed, 19 Mar 2025 13:29:23 -0300 Subject: [PATCH 22/29] Address comment Signed-off-by: Mikhail R. Gadelha --- llvm/include/llvm/CodeGen/TargetLowering.h | 2 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 058d42ff962e5..58ac87206b9a6 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3522,7 +3522,7 @@ class TargetLoweringBase { /// Returns true if it's profitable to allow merging store of loads when there /// are functions calls between the load and the store. - virtual bool shouldMergeStoreOfLoadsOverCall(EVT) const { return true; } + virtual bool shouldMergeStoreOfLoadsOverCall(EVT, EVT) const { return true; } //===----------------------------------------------------------------------===// // GlobalISel Hooks diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f62a930d39fd9..c5288147e8969 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21647,7 +21647,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, } // Check if there is a call in the load/store chain. - if (!TLI.shouldMergeStoreOfLoadsOverCall(JointMemOpVT) && + if (!TLI.shouldMergeStoreOfLoadsOverCall(MemVT, JointMemOpVT) && hasCallInLdStChain(StoreNodes, LoadNodes, NumElem)) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 658d1bce2cf6e..d52be42f8569c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -1073,8 +1073,8 @@ class RISCVTargetLowering : public TargetLowering { /// Disables storing and loading vectors by default when there are function /// calls between the load and store, since these are more expensive than just /// using scalars - bool shouldMergeStoreOfLoadsOverCall(EVT VT) const override { - return VT.isScalarInteger(); + bool shouldMergeStoreOfLoadsOverCall(EVT SrcVT, EVT MergedVT) const override { + return SrcVT.isScalarInteger() == MergedVT.isScalarInteger(); } /// For available scheduling models FDIV + two independent FMULs are much From 69c361c1909ebbe9062109361746042b78381808 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Wed, 19 Mar 2025 14:58:25 -0300 Subject: [PATCH 23/29] Address comment Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c5288147e8969..17520f80e43fe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21195,7 +21195,6 @@ bool DAGCombiner::hasCallInLdStChain(SmallVectorImpl &StoreNodes, break; } } - return false; } return false; } From 0dfd35496e4f93dac7b756142efdf705e6ade101 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Wed, 19 Mar 2025 15:03:07 -0300 Subject: [PATCH 24/29] Removed todo Signed-off-by: Mikhail R. Gadelha --- llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll index 15f93954f559e..f49c9d4f50f3b 100644 --- a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll +++ b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll @@ -3,8 +3,6 @@ declare void @g() -; TODO: Merging scalars into vectors is unprofitable because we have no -; vector CSRs which creates additional spills around the call. define void @f(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) { ; CHECK-LABEL: f: ; CHECK: # %bb.0: From e73c49d37d92817cc1c8654a6587abece1aeb55a Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Wed, 19 Mar 2025 15:37:05 -0300 Subject: [PATCH 25/29] Simplify interface Signed-off-by: Mikhail R. Gadelha --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 67 +++++++++---------- 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 17520f80e43fe..af49a65e2f688 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -794,9 +794,7 @@ namespace { /// Helper function for tryStoreMergeOfLoads. Checks if the load/store /// chain has a call in it. \return True if a call is found. - bool hasCallInLdStChain(SmallVectorImpl &StoreNodes, - SmallVectorImpl &LoadNodes, - unsigned NumStores); + bool hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld); /// This is a helper function for mergeConsecutiveStores. Given a list of /// store candidates, find the first N that are consecutive in memory. @@ -21158,42 +21156,36 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( return true; } -bool DAGCombiner::hasCallInLdStChain(SmallVectorImpl &StoreNodes, - SmallVectorImpl &LoadNodes, - unsigned NumStores) { - for (unsigned i = 0; i < NumStores; ++i) { - StoreSDNode *St = cast(StoreNodes[i].MemNode); - LoadSDNode *Ld = cast(LoadNodes[i].MemNode); - assert(Ld == cast(peekThroughBitcasts(St->getValue())) && - "Load and store mismatch"); +bool DAGCombiner::hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld) { + assert(Ld == cast(peekThroughBitcasts(St->getValue())) && + "Load and store mismatch"); - SmallPtrSet Visited; - SmallVector, 8> Worklist; - Worklist.emplace_back(St->getChain().getNode(), false); + SmallPtrSet Visited; + SmallVector, 8> Worklist; + Worklist.emplace_back(St->getChain().getNode(), false); - while (!Worklist.empty()) { - auto [Node, FoundCall] = Worklist.pop_back_val(); - if (!Visited.insert(Node).second || Node->getNumOperands() == 0) - continue; + while (!Worklist.empty()) { + auto [Node, FoundCall] = Worklist.pop_back_val(); + if (!Visited.insert(Node).second || Node->getNumOperands() == 0) + continue; - switch (Node->getOpcode()) { - case ISD::CALLSEQ_END: - Worklist.emplace_back(Node->getOperand(0).getNode(), true); - break; - case ISD::TokenFactor: - for (SDValue Op : Node->ops()) - Worklist.emplace_back(Op.getNode(), FoundCall); - break; - case ISD::LOAD: - if (Node == Ld) - return FoundCall; - [[fallthrough]]; - default: - assert(Node->getOperand(0).getValueType() == MVT::Other && - "Invalid chain type"); - Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall); - break; - } + switch (Node->getOpcode()) { + case ISD::CALLSEQ_END: + Worklist.emplace_back(Node->getOperand(0).getNode(), true); + break; + case ISD::TokenFactor: + for (SDValue Op : Node->ops()) + Worklist.emplace_back(Op.getNode(), FoundCall); + break; + case ISD::LOAD: + if (Node == Ld) + return FoundCall; + [[fallthrough]]; + default: + assert(Node->getOperand(0).getValueType() == MVT::Other && + "Invalid chain type"); + Worklist.emplace_back(Node->getOperand(0).getNode(), FoundCall); + break; } } return false; @@ -21647,7 +21639,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl &StoreNodes, // Check if there is a call in the load/store chain. if (!TLI.shouldMergeStoreOfLoadsOverCall(MemVT, JointMemOpVT) && - hasCallInLdStChain(StoreNodes, LoadNodes, NumElem)) { + hasCallInLdStChain(cast(StoreNodes[0].MemNode), + cast(LoadNodes[0].MemNode))) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); NumConsecutiveStores -= NumElem; From d6c848db4a97103e19c398848bde0876a2c4c88e Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Thu, 20 Mar 2025 12:34:38 -0300 Subject: [PATCH 26/29] Remove assert that fails when building blender_r This asserts fails because the order of the store/loads are reversed prior to the check, if we can rotate the loads. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index af49a65e2f688..cd2a7fc3f9ab6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21157,9 +21157,6 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( } bool DAGCombiner::hasCallInLdStChain(StoreSDNode *St, LoadSDNode *Ld) { - assert(Ld == cast(peekThroughBitcasts(St->getValue())) && - "Load and store mismatch"); - SmallPtrSet Visited; SmallVector, 8> Worklist; Worklist.emplace_back(St->getChain().getNode(), false); From 0189f30577ca94e460417a9c0d9e657215361035 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Thu, 20 Mar 2025 20:31:57 -0300 Subject: [PATCH 27/29] Address comment Signed-off-by: Mikhail R. Gadelha --- llvm/lib/Target/RISCV/RISCVISelLowering.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index d52be42f8569c..4072f8ea2fc67 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -1074,7 +1074,7 @@ class RISCVTargetLowering : public TargetLowering { /// calls between the load and store, since these are more expensive than just /// using scalars bool shouldMergeStoreOfLoadsOverCall(EVT SrcVT, EVT MergedVT) const override { - return SrcVT.isScalarInteger() == MergedVT.isScalarInteger(); + return !MergedVT.isVector() || SrcVT.isVector(); } /// For available scheduling models FDIV + two independent FMULs are much From 67b3b65d9663ccebfdc5de9610ae59f796319176 Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Thu, 20 Mar 2025 21:05:25 -0300 Subject: [PATCH 28/29] Update test Signed-off-by: Mikhail R. Gadelha --- .../CodeGen/RISCV/stores-of-loads-merging.ll | 78 +++++++++---------- 1 file changed, 36 insertions(+), 42 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll index d38a634c7ac72..398b4844459ad 100644 --- a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll +++ b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll @@ -363,28 +363,26 @@ define void @two_half(ptr %p, ptr %q) { ; ZVFH-NEXT: .cfi_def_cfa_offset 32 ; ZVFH-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; ZVFH-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; ZVFH-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill +; ZVFH-NEXT: fsd fs1, 0(sp) # 8-byte Folded Spill ; ZVFH-NEXT: .cfi_offset ra, -8 ; ZVFH-NEXT: .cfi_offset s0, -16 -; ZVFH-NEXT: csrr a2, vlenb -; ZVFH-NEXT: sub sp, sp, a2 -; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb -; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFH-NEXT: .cfi_offset fs0, -24 +; ZVFH-NEXT: .cfi_offset fs1, -32 +; ZVFH-NEXT: flh fs0, 0(a0) +; ZVFH-NEXT: flh fs1, 2(a0) ; ZVFH-NEXT: mv s0, a1 ; ZVFH-NEXT: call g -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vse16.v v8, (s0) -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: .cfi_def_cfa sp, 32 +; ZVFH-NEXT: fsh fs0, 0(s0) +; ZVFH-NEXT: fsh fs1, 2(s0) ; ZVFH-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; ZVFH-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; ZVFH-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload +; ZVFH-NEXT: fld fs1, 0(sp) # 8-byte Folded Reload ; ZVFH-NEXT: .cfi_restore ra ; ZVFH-NEXT: .cfi_restore s0 +; ZVFH-NEXT: .cfi_restore fs0 +; ZVFH-NEXT: .cfi_restore fs1 ; ZVFH-NEXT: addi sp, sp, 32 ; ZVFH-NEXT: .cfi_def_cfa_offset 0 ; ZVFH-NEXT: ret @@ -409,28 +407,26 @@ define void @two_float(ptr %p, ptr %q) { ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: .cfi_offset fs0, -24 +; CHECK-NEXT: .cfi_offset fs1, -32 +; CHECK-NEXT: flw fs0, 0(a0) +; CHECK-NEXT: flw fs1, 4(a0) ; CHECK-NEXT: mv s0, a1 ; CHECK-NEXT: call g -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vse32.v v8, (s0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 32 +; CHECK-NEXT: fsw fs0, 0(s0) +; CHECK-NEXT: fsw fs1, 4(s0) ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: .cfi_restore ra ; CHECK-NEXT: .cfi_restore s0 +; CHECK-NEXT: .cfi_restore fs0 +; CHECK-NEXT: .cfi_restore fs1 ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret @@ -453,28 +449,26 @@ define void @two_double(ptr %p, ptr %q) { ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: .cfi_offset fs0, -24 +; CHECK-NEXT: .cfi_offset fs1, -32 +; CHECK-NEXT: fld fs0, 0(a0) +; CHECK-NEXT: fld fs1, 8(a0) ; CHECK-NEXT: mv s0, a1 ; CHECK-NEXT: call g -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vse64.v v8, (s0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 32 +; CHECK-NEXT: fsd fs0, 0(s0) +; CHECK-NEXT: fsd fs1, 8(s0) ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: .cfi_restore ra ; CHECK-NEXT: .cfi_restore s0 +; CHECK-NEXT: .cfi_restore fs0 +; CHECK-NEXT: .cfi_restore fs1 ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret From ed8a5fd26ce91ad78fde2b745799878641994c9a Mon Sep 17 00:00:00 2001 From: "Mikhail R. Gadelha" Date: Thu, 20 Mar 2025 21:33:13 -0300 Subject: [PATCH 29/29] Removed todo Signed-off-by: Mikhail R. Gadelha --- llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll index 398b4844459ad..fefe72bd05f42 100644 --- a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll +++ b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll @@ -398,8 +398,6 @@ define void @two_half(ptr %p, ptr %q) { ret void } -; TODO: This one is currently a vector which is unprofitable, we should -; use i64 instead. define void @two_float(ptr %p, ptr %q) { ; CHECK-LABEL: two_float: ; CHECK: # %bb.0: