diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index e32d6eab3b977..83da083debf34 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -34,6 +34,7 @@ add_llvm_target(RISCVCodeGen RISCVCallingConv.cpp RISCVCodeGenPrepare.cpp RISCVConstantPoolValue.cpp + RISCVCopyCombine.cpp RISCVDeadRegisterDefinitions.cpp RISCVExpandAtomicPseudoInsts.cpp RISCVExpandPseudoInsts.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index ae9410193efe1..d5e55bc60224a 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -30,6 +30,8 @@ void initializeRISCVCodeGenPreparePass(PassRegistry &); FunctionPass *createRISCVDeadRegisterDefinitionsPass(); void initializeRISCVDeadRegisterDefinitionsPass(PassRegistry &); +FunctionPass *createRISCVCopyCombinePass(); +void initializeRISCVCopyCombinePass(PassRegistry &); FunctionPass *createRISCVIndirectBranchTrackingPass(); void initializeRISCVIndirectBranchTrackingPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp b/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp new file mode 100644 index 0000000000000..fa793d4bbba60 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVCopyCombine.cpp @@ -0,0 +1,186 @@ +//===- RISCVCopyCombine.cpp - Remove special copy for RISC-V --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass attempts a shrink-wrap optimization for special cases, which is +// effective when data types require extension. +// +// After finalize-isel: +// bb0: +// liveins: $x10, $x11 +// %1:gpr = COPY $x11 ---- will be delete in this pass +// %0:gpr = COPY $x10 +// %2:gpr = COPY %1:gpr ---- without this pass, sink to bb1 in machine-sink, +// then delete at regalloc +// BEQ %0:gpr, killed %3:gpr, %bb.3 PseudoBR %bb1 +// +// bb1: +// bb2: +// BNE %2:gpr, killed %5:gpr, %bb.2 +// ... +// After regalloc +// bb0: +// liveins: $x10, $x11 +// renamable $x8 = COPY $x11 +// renamable $x11 = ADDI $x0, 57 --- def x11, so COPY can not be sink +// BEQ killed renamable $x10, killed renamable $x11, %bb.4 +// PseudoBR %bb.1 +// +// bb1: +// bb2: +// BEQ killed renamable $x8, killed renamable $x10, %bb.4 +// +// -----> +// +// After this pass: +// bb0: +// liveins: $x10, $x11 +// %0:gpr = COPY $x10 +// %2:gpr = COPY $x11 +// BEQ %0:gpr, killed %3:gpr, %bb.3 +// PseudoBR %bb1 +// +// bb1: +// bb2: +// BNE %2:gpr, killed %5:gpr, %bb.2 +// ... +// After regalloc +// bb0: +// liveins: $x10, $x11 +// renamable $x12 = ADDI $x0, 57 +// renamable $x8 = COPY $x11 +// BEQ killed renamable $x10, killed renamable $x11, %bb.4 +// PseudoBR %bb.1 +// +// bb1: +// bb2: +// BEQ killed renamable $x8, killed renamable $x10, %bb.4 +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +using namespace llvm; +#define DEBUG_TYPE "riscv-copy-combine" +#define RISCV_COPY_COMBINE "RISC-V Copy Combine" + +STATISTIC(NumCopyDeleted, "Number of copy deleted"); + +namespace { +class RISCVCopyCombine : public MachineFunctionPass { +public: + static char ID; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + const TargetRegisterInfo *TRI; + + RISCVCopyCombine() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } + + StringRef getPassName() const override { return RISCV_COPY_COMBINE; } + +private: + bool optimizeBlock(MachineBasicBlock &MBB); + bool copyCombine(MachineOperand &Op); +}; +} // end anonymous namespace + +char RISCVCopyCombine::ID = 0; +INITIALIZE_PASS(RISCVCopyCombine, DEBUG_TYPE, RISCV_COPY_COMBINE, false, false) + +/// Check if it's safe to move From down to To, checking that no physical +/// registers are clobbered. +static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To) { + SmallVector PhysUses; + for (const MachineOperand &MO : From.all_uses()) + if (MO.getReg().isPhysical()) + PhysUses.push_back(MO.getReg()); + bool SawStore = false; + for (auto II = From.getIterator(); II != To.getIterator(); II++) { + for (Register PhysReg : PhysUses) + if (II->definesRegister(PhysReg, nullptr)) + return false; + if (II->mayStore()) { + SawStore = true; + break; + } + } + return From.isSafeToMove(SawStore); +} + +bool RISCVCopyCombine::copyCombine(MachineOperand &Op) { + if (!Op.isReg()) + return false; + + Register Reg = Op.getReg(); + if (!Reg.isVirtual()) + return false; + + MachineInstr *MI = MRI->getVRegDef(Reg); + if (MI->getOpcode() != RISCV::COPY) + return false; + + Register Op1reg = MI->getOperand(1).getReg(); + if (!MRI->hasOneUse(Op1reg) || !Op1reg.isVirtual() || + !MI->getOperand(0).getReg().isVirtual()) + return false; + + MachineInstr *Src = MRI->getVRegDef(Op1reg); + if (!Src || Src->hasUnmodeledSideEffects() || + Src->getOpcode() != RISCV::COPY || Src->getParent() != MI->getParent() || + Src->getNumDefs() != 1) + return false; + + if (!isSafeToMove(*Src, *MI)) + return false; + + Register SrcOp1reg = Src->getOperand(1).getReg(); + MRI->replaceRegWith(Op1reg, SrcOp1reg); + MRI->clearKillFlags(SrcOp1reg); + LLVM_DEBUG(dbgs() << "Deleting this copy instruction "; Src->print(dbgs())); + ++NumCopyDeleted; + Src->eraseFromParent(); + return true; +} + +bool RISCVCopyCombine::optimizeBlock(MachineBasicBlock &MBB) { + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector Cond; + if (TII->analyzeBranch(MBB, TBB, FBB, Cond, /*AllowModify*/ false) || + Cond.empty()) + return false; + + if (!TBB || Cond.size() != 3) + return false; + + return copyCombine(Cond[1]) || copyCombine(Cond[2]); +} + +bool RISCVCopyCombine::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + TII = MF.getSubtarget().getInstrInfo(); + MRI = &MF.getRegInfo(); + TRI = MRI->getTargetRegisterInfo(); + + bool Changed = false; + for (MachineBasicBlock &MBB : MF) + Changed |= optimizeBlock(MBB); + + return Changed; +} + +FunctionPass *llvm::createRISCVCopyCombinePass() { + return new RISCVCopyCombine(); +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 15dd4d57727dd..86d8d8f33ca04 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -128,6 +128,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVPostLegalizerCombinerPass(*PR); initializeKCFIPass(*PR); initializeRISCVDeadRegisterDefinitionsPass(*PR); + initializeRISCVCopyCombinePass(*PR); initializeRISCVLateBranchOptPass(*PR); initializeRISCVMakeCompressibleOptPass(*PR); initializeRISCVGatherScatterLoweringPass(*PR); @@ -455,6 +456,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() { if (TM->getOptLevel() != CodeGenOptLevel::None && EnableRISCVDeadRegisterElimination) addPass(createRISCVDeadRegisterDefinitionsPass()); + return TargetPassConfig::addRegAssignAndRewriteFast(); } @@ -598,6 +600,7 @@ void RISCVPassConfig::addPreEmitPass2() { } void RISCVPassConfig::addMachineSSAOptimization() { + addPass(createRISCVCopyCombinePass()); addPass(createRISCVVectorPeepholePass()); addPass(createRISCVFoldMemOffsetPass()); diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 19de864422bc5..7ebe888b7cc52 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -96,6 +96,7 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions +; CHECK-NEXT: RISC-V Copy Combine ; CHECK-NEXT: RISC-V Vector Peephole Optimization ; CHECK-NEXT: RISC-V Fold Memory Offset ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll index 8534ad379ebab..7667df5566c46 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll @@ -191,43 +191,43 @@ define void @amomax_d_discard(ptr %a, i64 %b) nounwind { ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: mv s0, a2 +; RV32-NEXT: mv s0, a1 ; RV32-NEXT: mv s1, a0 -; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a5, 4(a0) -; RV32-NEXT: mv s2, a1 +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a4, 4(a0) +; RV32-NEXT: mv s2, a2 ; RV32-NEXT: j .LBB11_2 ; RV32-NEXT: .LBB11_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV32-NEXT: sw a4, 8(sp) -; RV32-NEXT: sw a5, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a4, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s1 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a4, 12(sp) ; RV32-NEXT: bnez a0, .LBB11_6 ; RV32-NEXT: .LBB11_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: beq a5, s0, .LBB11_4 +; RV32-NEXT: beq a4, s2, .LBB11_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV32-NEXT: slt a0, s0, a5 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: slt a0, s2, a4 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: bnez a0, .LBB11_1 ; RV32-NEXT: j .LBB11_5 ; RV32-NEXT: .LBB11_4: # in Loop: Header=BB11_2 Depth=1 -; RV32-NEXT: sltu a0, s2, a4 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: sltu a0, s0, a1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: bnez a0, .LBB11_1 ; RV32-NEXT: .LBB11_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV32-NEXT: mv a2, s2 -; RV32-NEXT: mv a3, s0 +; RV32-NEXT: mv a2, s0 +; RV32-NEXT: mv a3, s2 ; RV32-NEXT: j .LBB11_1 ; RV32-NEXT: .LBB11_6: # %atomicrmw.end ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -267,43 +267,43 @@ define void @amomaxu_d_discard(ptr %a, i64 %b) nounwind { ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: mv s0, a2 +; RV32-NEXT: mv s0, a1 ; RV32-NEXT: mv s1, a0 -; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a5, 4(a0) -; RV32-NEXT: mv s2, a1 +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a4, 4(a0) +; RV32-NEXT: mv s2, a2 ; RV32-NEXT: j .LBB13_2 ; RV32-NEXT: .LBB13_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV32-NEXT: sw a4, 8(sp) -; RV32-NEXT: sw a5, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a4, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s1 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a4, 12(sp) ; RV32-NEXT: bnez a0, .LBB13_6 ; RV32-NEXT: .LBB13_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: beq a5, s0, .LBB13_4 +; RV32-NEXT: beq a4, s2, .LBB13_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV32-NEXT: sltu a0, s0, a5 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: sltu a0, s2, a4 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: bnez a0, .LBB13_1 ; RV32-NEXT: j .LBB13_5 ; RV32-NEXT: .LBB13_4: # in Loop: Header=BB13_2 Depth=1 -; RV32-NEXT: sltu a0, s2, a4 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: sltu a0, s0, a1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: bnez a0, .LBB13_1 ; RV32-NEXT: .LBB13_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV32-NEXT: mv a2, s2 -; RV32-NEXT: mv a3, s0 +; RV32-NEXT: mv a2, s0 +; RV32-NEXT: mv a3, s2 ; RV32-NEXT: j .LBB13_1 ; RV32-NEXT: .LBB13_6: # %atomicrmw.end ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -343,43 +343,43 @@ define void @amomin_d_discard(ptr %a, i64 %b) nounwind { ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: mv s0, a2 +; RV32-NEXT: mv s0, a1 ; RV32-NEXT: mv s1, a0 -; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a5, 4(a0) -; RV32-NEXT: mv s2, a1 +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a4, 4(a0) +; RV32-NEXT: mv s2, a2 ; RV32-NEXT: j .LBB15_2 ; RV32-NEXT: .LBB15_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1 -; RV32-NEXT: sw a4, 8(sp) -; RV32-NEXT: sw a5, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a4, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s1 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a4, 12(sp) ; RV32-NEXT: bnez a0, .LBB15_6 ; RV32-NEXT: .LBB15_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: beq a5, s0, .LBB15_4 +; RV32-NEXT: beq a4, s2, .LBB15_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1 -; RV32-NEXT: slt a0, s0, a5 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: slt a0, s2, a4 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: beqz a0, .LBB15_1 ; RV32-NEXT: j .LBB15_5 ; RV32-NEXT: .LBB15_4: # in Loop: Header=BB15_2 Depth=1 -; RV32-NEXT: sltu a0, s2, a4 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: sltu a0, s0, a1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: beqz a0, .LBB15_1 ; RV32-NEXT: .LBB15_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1 -; RV32-NEXT: mv a2, s2 -; RV32-NEXT: mv a3, s0 +; RV32-NEXT: mv a2, s0 +; RV32-NEXT: mv a3, s2 ; RV32-NEXT: j .LBB15_1 ; RV32-NEXT: .LBB15_6: # %atomicrmw.end ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -419,43 +419,43 @@ define void @amominu_d_discard(ptr %a, i64 %b) nounwind { ; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: mv s0, a2 +; RV32-NEXT: mv s0, a1 ; RV32-NEXT: mv s1, a0 -; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a5, 4(a0) -; RV32-NEXT: mv s2, a1 +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a4, 4(a0) +; RV32-NEXT: mv s2, a2 ; RV32-NEXT: j .LBB17_2 ; RV32-NEXT: .LBB17_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1 -; RV32-NEXT: sw a4, 8(sp) -; RV32-NEXT: sw a5, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a4, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s1 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a4, 12(sp) ; RV32-NEXT: bnez a0, .LBB17_6 ; RV32-NEXT: .LBB17_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: beq a5, s0, .LBB17_4 +; RV32-NEXT: beq a4, s2, .LBB17_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1 -; RV32-NEXT: sltu a0, s0, a5 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: sltu a0, s2, a4 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: beqz a0, .LBB17_1 ; RV32-NEXT: j .LBB17_5 ; RV32-NEXT: .LBB17_4: # in Loop: Header=BB17_2 Depth=1 -; RV32-NEXT: sltu a0, s2, a4 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: sltu a0, s0, a1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: beqz a0, .LBB17_1 ; RV32-NEXT: .LBB17_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1 -; RV32-NEXT: mv a2, s2 -; RV32-NEXT: mv a3, s0 +; RV32-NEXT: mv a2, s0 +; RV32-NEXT: mv a3, s2 ; RV32-NEXT: j .LBB17_1 ; RV32-NEXT: .LBB17_6: # %atomicrmw.end ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index 1e5acd2575b88..759bf13fdd47d 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -26072,46 +26072,45 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB220_2 ; RV32I-NEXT: .LBB220_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB220_7 ; RV32I-NEXT: .LBB220_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB220_4 +; RV32I-NEXT: beq a1, s2, .LBB220_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a0, s2, a1 ; RV32I-NEXT: j .LBB220_5 ; RV32I-NEXT: .LBB220_4: # in Loop: Header=BB220_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB220_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB220_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB220_1 ; RV32I-NEXT: .LBB220_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26126,46 +26125,45 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB220_2 ; RV32IA-NEXT: .LBB220_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB220_7 ; RV32IA-NEXT: .LBB220_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB220_4 +; RV32IA-NEXT: beq a1, s2, .LBB220_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a0, s2, a1 ; RV32IA-NEXT: j .LBB220_5 ; RV32IA-NEXT: .LBB220_4: # in Loop: Header=BB220_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB220_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB220_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB220_1 ; RV32IA-NEXT: .LBB220_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26225,46 +26223,45 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB221_2 ; RV32I-NEXT: .LBB221_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB221_7 ; RV32I-NEXT: .LBB221_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB221_4 +; RV32I-NEXT: beq a1, s2, .LBB221_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a0, s2, a1 ; RV32I-NEXT: j .LBB221_5 ; RV32I-NEXT: .LBB221_4: # in Loop: Header=BB221_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB221_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB221_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB221_1 ; RV32I-NEXT: .LBB221_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26279,46 +26276,45 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB221_2 ; RV32IA-NEXT: .LBB221_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 2 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB221_7 ; RV32IA-NEXT: .LBB221_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB221_4 +; RV32IA-NEXT: beq a1, s2, .LBB221_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a0, s2, a1 ; RV32IA-NEXT: j .LBB221_5 ; RV32IA-NEXT: .LBB221_4: # in Loop: Header=BB221_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB221_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB221_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB221_1 ; RV32IA-NEXT: .LBB221_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26383,46 +26379,45 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB222_2 ; RV32I-NEXT: .LBB222_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 3 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB222_7 ; RV32I-NEXT: .LBB222_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB222_4 +; RV32I-NEXT: beq a1, s2, .LBB222_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a0, s2, a1 ; RV32I-NEXT: j .LBB222_5 ; RV32I-NEXT: .LBB222_4: # in Loop: Header=BB222_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB222_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB222_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB222_1 ; RV32I-NEXT: .LBB222_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26437,46 +26432,45 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB222_2 ; RV32IA-NEXT: .LBB222_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 3 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB222_7 ; RV32IA-NEXT: .LBB222_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB222_4 +; RV32IA-NEXT: beq a1, s2, .LBB222_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a0, s2, a1 ; RV32IA-NEXT: j .LBB222_5 ; RV32IA-NEXT: .LBB222_4: # in Loop: Header=BB222_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB222_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB222_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB222_1 ; RV32IA-NEXT: .LBB222_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26541,46 +26535,45 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB223_2 ; RV32I-NEXT: .LBB223_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 4 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB223_7 ; RV32I-NEXT: .LBB223_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB223_4 +; RV32I-NEXT: beq a1, s2, .LBB223_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a0, s2, a1 ; RV32I-NEXT: j .LBB223_5 ; RV32I-NEXT: .LBB223_4: # in Loop: Header=BB223_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB223_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB223_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB223_1 ; RV32I-NEXT: .LBB223_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26595,46 +26588,45 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB223_2 ; RV32IA-NEXT: .LBB223_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 4 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB223_7 ; RV32IA-NEXT: .LBB223_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB223_4 +; RV32IA-NEXT: beq a1, s2, .LBB223_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a0, s2, a1 ; RV32IA-NEXT: j .LBB223_5 ; RV32IA-NEXT: .LBB223_4: # in Loop: Header=BB223_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB223_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB223_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB223_1 ; RV32IA-NEXT: .LBB223_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26699,46 +26691,45 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB224_2 ; RV32I-NEXT: .LBB224_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB224_7 ; RV32I-NEXT: .LBB224_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB224_4 +; RV32I-NEXT: beq a1, s2, .LBB224_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a0, s2, a1 ; RV32I-NEXT: j .LBB224_5 ; RV32I-NEXT: .LBB224_4: # in Loop: Header=BB224_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB224_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB224_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB224_1 ; RV32I-NEXT: .LBB224_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26753,46 +26744,45 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB224_2 ; RV32IA-NEXT: .LBB224_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB224_7 ; RV32IA-NEXT: .LBB224_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB224_4 +; RV32IA-NEXT: beq a1, s2, .LBB224_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a0, s2, a1 ; RV32IA-NEXT: j .LBB224_5 ; RV32IA-NEXT: .LBB224_4: # in Loop: Header=BB224_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB224_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB224_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB224_1 ; RV32IA-NEXT: .LBB224_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26857,46 +26847,45 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB225_2 ; RV32I-NEXT: .LBB225_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB225_7 ; RV32I-NEXT: .LBB225_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB225_4 +; RV32I-NEXT: beq a1, s2, .LBB225_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a0, s2, a1 ; RV32I-NEXT: j .LBB225_5 ; RV32I-NEXT: .LBB225_4: # in Loop: Header=BB225_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB225_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: beqz a0, .LBB225_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB225_1 ; RV32I-NEXT: .LBB225_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26911,46 +26900,45 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB225_2 ; RV32IA-NEXT: .LBB225_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB225_7 ; RV32IA-NEXT: .LBB225_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB225_4 +; RV32IA-NEXT: beq a1, s2, .LBB225_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a0, s2, a1 ; RV32IA-NEXT: j .LBB225_5 ; RV32IA-NEXT: .LBB225_4: # in Loop: Header=BB225_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB225_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: beqz a0, .LBB225_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB225_1 ; RV32IA-NEXT: .LBB225_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27010,46 +26998,45 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB226_2 ; RV32I-NEXT: .LBB226_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB226_7 ; RV32I-NEXT: .LBB226_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB226_4 +; RV32I-NEXT: beq a1, s2, .LBB226_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a0, s2, a1 ; RV32I-NEXT: j .LBB226_5 ; RV32I-NEXT: .LBB226_4: # in Loop: Header=BB226_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB226_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: beqz a0, .LBB226_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB226_1 ; RV32I-NEXT: .LBB226_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27064,46 +27051,45 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB226_2 ; RV32IA-NEXT: .LBB226_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 2 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB226_7 ; RV32IA-NEXT: .LBB226_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB226_4 +; RV32IA-NEXT: beq a1, s2, .LBB226_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a0, s2, a1 ; RV32IA-NEXT: j .LBB226_5 ; RV32IA-NEXT: .LBB226_4: # in Loop: Header=BB226_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB226_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: beqz a0, .LBB226_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB226_1 ; RV32IA-NEXT: .LBB226_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27168,46 +27154,45 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB227_2 ; RV32I-NEXT: .LBB227_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 3 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB227_7 ; RV32I-NEXT: .LBB227_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB227_4 +; RV32I-NEXT: beq a1, s2, .LBB227_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a0, s2, a1 ; RV32I-NEXT: j .LBB227_5 ; RV32I-NEXT: .LBB227_4: # in Loop: Header=BB227_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB227_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: beqz a0, .LBB227_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB227_1 ; RV32I-NEXT: .LBB227_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27222,46 +27207,45 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB227_2 ; RV32IA-NEXT: .LBB227_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 3 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB227_7 ; RV32IA-NEXT: .LBB227_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB227_4 +; RV32IA-NEXT: beq a1, s2, .LBB227_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a0, s2, a1 ; RV32IA-NEXT: j .LBB227_5 ; RV32IA-NEXT: .LBB227_4: # in Loop: Header=BB227_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB227_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: beqz a0, .LBB227_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB227_1 ; RV32IA-NEXT: .LBB227_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27326,46 +27310,45 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB228_2 ; RV32I-NEXT: .LBB228_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 4 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB228_7 ; RV32I-NEXT: .LBB228_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB228_4 +; RV32I-NEXT: beq a1, s2, .LBB228_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a0, s2, a1 ; RV32I-NEXT: j .LBB228_5 ; RV32I-NEXT: .LBB228_4: # in Loop: Header=BB228_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB228_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: beqz a0, .LBB228_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB228_1 ; RV32I-NEXT: .LBB228_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27380,46 +27363,45 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB228_2 ; RV32IA-NEXT: .LBB228_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 4 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB228_7 ; RV32IA-NEXT: .LBB228_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB228_4 +; RV32IA-NEXT: beq a1, s2, .LBB228_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a0, s2, a1 ; RV32IA-NEXT: j .LBB228_5 ; RV32IA-NEXT: .LBB228_4: # in Loop: Header=BB228_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB228_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: beqz a0, .LBB228_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB228_1 ; RV32IA-NEXT: .LBB228_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27484,46 +27466,45 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB229_2 ; RV32I-NEXT: .LBB229_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB229_7 ; RV32I-NEXT: .LBB229_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB229_4 +; RV32I-NEXT: beq a1, s2, .LBB229_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a0, s2, a1 ; RV32I-NEXT: j .LBB229_5 ; RV32I-NEXT: .LBB229_4: # in Loop: Header=BB229_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB229_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: beqz a0, .LBB229_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB229_1 ; RV32I-NEXT: .LBB229_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27538,46 +27519,45 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB229_2 ; RV32IA-NEXT: .LBB229_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB229_7 ; RV32IA-NEXT: .LBB229_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB229_4 +; RV32IA-NEXT: beq a1, s2, .LBB229_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a0, s2, a1 ; RV32IA-NEXT: j .LBB229_5 ; RV32IA-NEXT: .LBB229_4: # in Loop: Header=BB229_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB229_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: beqz a0, .LBB229_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB229_1 ; RV32IA-NEXT: .LBB229_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27642,46 +27622,45 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB230_2 ; RV32I-NEXT: .LBB230_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB230_7 ; RV32I-NEXT: .LBB230_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB230_4 +; RV32I-NEXT: beq a1, s2, .LBB230_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a0, s2, a1 ; RV32I-NEXT: j .LBB230_5 ; RV32I-NEXT: .LBB230_4: # in Loop: Header=BB230_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB230_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB230_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB230_1 ; RV32I-NEXT: .LBB230_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27696,46 +27675,45 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB230_2 ; RV32IA-NEXT: .LBB230_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB230_7 ; RV32IA-NEXT: .LBB230_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB230_4 +; RV32IA-NEXT: beq a1, s2, .LBB230_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a0, s2, a1 ; RV32IA-NEXT: j .LBB230_5 ; RV32IA-NEXT: .LBB230_4: # in Loop: Header=BB230_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB230_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB230_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB230_1 ; RV32IA-NEXT: .LBB230_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27795,46 +27773,45 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB231_2 ; RV32I-NEXT: .LBB231_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB231_7 ; RV32I-NEXT: .LBB231_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB231_4 +; RV32I-NEXT: beq a1, s2, .LBB231_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a0, s2, a1 ; RV32I-NEXT: j .LBB231_5 ; RV32I-NEXT: .LBB231_4: # in Loop: Header=BB231_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB231_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB231_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB231_1 ; RV32I-NEXT: .LBB231_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27849,46 +27826,45 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB231_2 ; RV32IA-NEXT: .LBB231_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 2 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB231_7 ; RV32IA-NEXT: .LBB231_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB231_4 +; RV32IA-NEXT: beq a1, s2, .LBB231_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a0, s2, a1 ; RV32IA-NEXT: j .LBB231_5 ; RV32IA-NEXT: .LBB231_4: # in Loop: Header=BB231_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB231_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB231_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB231_1 ; RV32IA-NEXT: .LBB231_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27953,46 +27929,45 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB232_2 ; RV32I-NEXT: .LBB232_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 3 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB232_7 ; RV32I-NEXT: .LBB232_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB232_4 +; RV32I-NEXT: beq a1, s2, .LBB232_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a0, s2, a1 ; RV32I-NEXT: j .LBB232_5 ; RV32I-NEXT: .LBB232_4: # in Loop: Header=BB232_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB232_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB232_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB232_1 ; RV32I-NEXT: .LBB232_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28007,46 +27982,45 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB232_2 ; RV32IA-NEXT: .LBB232_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 3 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB232_7 ; RV32IA-NEXT: .LBB232_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB232_4 +; RV32IA-NEXT: beq a1, s2, .LBB232_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a0, s2, a1 ; RV32IA-NEXT: j .LBB232_5 ; RV32IA-NEXT: .LBB232_4: # in Loop: Header=BB232_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB232_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB232_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB232_1 ; RV32IA-NEXT: .LBB232_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28111,46 +28085,45 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB233_2 ; RV32I-NEXT: .LBB233_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 4 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB233_7 ; RV32I-NEXT: .LBB233_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB233_4 +; RV32I-NEXT: beq a1, s2, .LBB233_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a0, s2, a1 ; RV32I-NEXT: j .LBB233_5 ; RV32I-NEXT: .LBB233_4: # in Loop: Header=BB233_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB233_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB233_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB233_1 ; RV32I-NEXT: .LBB233_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28165,46 +28138,45 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB233_2 ; RV32IA-NEXT: .LBB233_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 4 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB233_7 ; RV32IA-NEXT: .LBB233_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB233_4 +; RV32IA-NEXT: beq a1, s2, .LBB233_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a0, s2, a1 ; RV32IA-NEXT: j .LBB233_5 ; RV32IA-NEXT: .LBB233_4: # in Loop: Header=BB233_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB233_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB233_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB233_1 ; RV32IA-NEXT: .LBB233_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28269,46 +28241,45 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB234_2 ; RV32I-NEXT: .LBB234_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB234_7 ; RV32I-NEXT: .LBB234_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB234_4 +; RV32I-NEXT: beq a1, s2, .LBB234_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a0, s2, a1 ; RV32I-NEXT: j .LBB234_5 ; RV32I-NEXT: .LBB234_4: # in Loop: Header=BB234_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB234_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB234_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB234_1 ; RV32I-NEXT: .LBB234_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28323,46 +28294,45 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB234_2 ; RV32IA-NEXT: .LBB234_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB234_7 ; RV32IA-NEXT: .LBB234_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB234_4 +; RV32IA-NEXT: beq a1, s2, .LBB234_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a0, s2, a1 ; RV32IA-NEXT: j .LBB234_5 ; RV32IA-NEXT: .LBB234_4: # in Loop: Header=BB234_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB234_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB234_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB234_1 ; RV32IA-NEXT: .LBB234_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28427,46 +28397,45 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB235_2 ; RV32I-NEXT: .LBB235_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB235_7 ; RV32I-NEXT: .LBB235_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB235_4 +; RV32I-NEXT: beq a1, s2, .LBB235_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a0, s2, a1 ; RV32I-NEXT: j .LBB235_5 ; RV32I-NEXT: .LBB235_4: # in Loop: Header=BB235_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB235_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: beqz a0, .LBB235_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB235_1 ; RV32I-NEXT: .LBB235_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28481,46 +28450,45 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB235_2 ; RV32IA-NEXT: .LBB235_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB235_7 ; RV32IA-NEXT: .LBB235_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB235_4 +; RV32IA-NEXT: beq a1, s2, .LBB235_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a0, s2, a1 ; RV32IA-NEXT: j .LBB235_5 ; RV32IA-NEXT: .LBB235_4: # in Loop: Header=BB235_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB235_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: beqz a0, .LBB235_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB235_1 ; RV32IA-NEXT: .LBB235_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28580,46 +28548,45 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB236_2 ; RV32I-NEXT: .LBB236_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB236_7 ; RV32I-NEXT: .LBB236_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB236_4 +; RV32I-NEXT: beq a1, s2, .LBB236_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a0, s2, a1 ; RV32I-NEXT: j .LBB236_5 ; RV32I-NEXT: .LBB236_4: # in Loop: Header=BB236_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB236_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: beqz a0, .LBB236_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB236_1 ; RV32I-NEXT: .LBB236_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28634,46 +28601,45 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB236_2 ; RV32IA-NEXT: .LBB236_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 2 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB236_7 ; RV32IA-NEXT: .LBB236_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB236_4 +; RV32IA-NEXT: beq a1, s2, .LBB236_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a0, s2, a1 ; RV32IA-NEXT: j .LBB236_5 ; RV32IA-NEXT: .LBB236_4: # in Loop: Header=BB236_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB236_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: beqz a0, .LBB236_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB236_1 ; RV32IA-NEXT: .LBB236_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28738,46 +28704,45 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB237_2 ; RV32I-NEXT: .LBB237_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 3 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB237_7 ; RV32I-NEXT: .LBB237_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB237_4 +; RV32I-NEXT: beq a1, s2, .LBB237_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a0, s2, a1 ; RV32I-NEXT: j .LBB237_5 ; RV32I-NEXT: .LBB237_4: # in Loop: Header=BB237_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB237_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: beqz a0, .LBB237_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB237_1 ; RV32I-NEXT: .LBB237_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28792,46 +28757,45 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB237_2 ; RV32IA-NEXT: .LBB237_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 3 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB237_7 ; RV32IA-NEXT: .LBB237_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB237_4 +; RV32IA-NEXT: beq a1, s2, .LBB237_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a0, s2, a1 ; RV32IA-NEXT: j .LBB237_5 ; RV32IA-NEXT: .LBB237_4: # in Loop: Header=BB237_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB237_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: beqz a0, .LBB237_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB237_1 ; RV32IA-NEXT: .LBB237_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28896,46 +28860,45 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB238_2 ; RV32I-NEXT: .LBB238_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 4 ; RV32I-NEXT: li a5, 2 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB238_7 ; RV32I-NEXT: .LBB238_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB238_4 +; RV32I-NEXT: beq a1, s2, .LBB238_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a0, s2, a1 ; RV32I-NEXT: j .LBB238_5 ; RV32I-NEXT: .LBB238_4: # in Loop: Header=BB238_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB238_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: beqz a0, .LBB238_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB238_1 ; RV32I-NEXT: .LBB238_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28950,46 +28913,45 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB238_2 ; RV32IA-NEXT: .LBB238_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 4 ; RV32IA-NEXT: li a5, 2 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB238_7 ; RV32IA-NEXT: .LBB238_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB238_4 +; RV32IA-NEXT: beq a1, s2, .LBB238_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a0, s2, a1 ; RV32IA-NEXT: j .LBB238_5 ; RV32IA-NEXT: .LBB238_4: # in Loop: Header=BB238_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB238_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: beqz a0, .LBB238_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB238_1 ; RV32IA-NEXT: .LBB238_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -29054,46 +29016,45 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB239_2 ; RV32I-NEXT: .LBB239_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB239_7 ; RV32I-NEXT: .LBB239_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB239_4 +; RV32I-NEXT: beq a1, s2, .LBB239_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a0, s2, a1 ; RV32I-NEXT: j .LBB239_5 ; RV32I-NEXT: .LBB239_4: # in Loop: Header=BB239_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB239_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: beqz a0, .LBB239_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB239_1 ; RV32I-NEXT: .LBB239_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -29108,46 +29069,45 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB239_2 ; RV32IA-NEXT: .LBB239_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB239_7 ; RV32IA-NEXT: .LBB239_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB239_4 +; RV32IA-NEXT: beq a1, s2, .LBB239_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a0, s2, a1 ; RV32IA-NEXT: j .LBB239_5 ; RV32IA-NEXT: .LBB239_4: # in Loop: Header=BB239_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB239_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: beqz a0, .LBB239_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB239_1 ; RV32IA-NEXT: .LBB239_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll index b9702e9fe0fc2..8f66a00d3d533 100644 --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -3182,46 +3182,45 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB43_2 ; RV32I-NEXT: .LBB43_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB43_7 ; RV32I-NEXT: .LBB43_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB43_4 +; RV32I-NEXT: beq a1, s2, .LBB43_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a0, s2, a1 ; RV32I-NEXT: j .LBB43_5 ; RV32I-NEXT: .LBB43_4: # in Loop: Header=BB43_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB43_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB43_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB43_1 ; RV32I-NEXT: .LBB43_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3236,46 +3235,45 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB43_2 ; RV32IA-NEXT: .LBB43_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB43_7 ; RV32IA-NEXT: .LBB43_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB43_4 +; RV32IA-NEXT: beq a1, s2, .LBB43_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a0, s2, a1 ; RV32IA-NEXT: j .LBB43_5 ; RV32IA-NEXT: .LBB43_4: # in Loop: Header=BB43_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB43_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB43_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB43_1 ; RV32IA-NEXT: .LBB43_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3335,46 +3333,45 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB44_2 ; RV32I-NEXT: .LBB44_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB44_7 ; RV32I-NEXT: .LBB44_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB44_4 +; RV32I-NEXT: beq a1, s2, .LBB44_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a0, s2, a1 ; RV32I-NEXT: j .LBB44_5 ; RV32I-NEXT: .LBB44_4: # in Loop: Header=BB44_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB44_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: beqz a0, .LBB44_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB44_1 ; RV32I-NEXT: .LBB44_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3389,46 +3386,45 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB44_2 ; RV32IA-NEXT: .LBB44_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB44_7 ; RV32IA-NEXT: .LBB44_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB44_4 +; RV32IA-NEXT: beq a1, s2, .LBB44_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a0, s2, a1 ; RV32IA-NEXT: j .LBB44_5 ; RV32IA-NEXT: .LBB44_4: # in Loop: Header=BB44_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB44_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: beqz a0, .LBB44_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB44_1 ; RV32IA-NEXT: .LBB44_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3488,46 +3484,45 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB45_2 ; RV32I-NEXT: .LBB45_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB45_7 ; RV32I-NEXT: .LBB45_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB45_4 +; RV32I-NEXT: beq a1, s2, .LBB45_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a0, s2, a1 ; RV32I-NEXT: j .LBB45_5 ; RV32I-NEXT: .LBB45_4: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB45_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB45_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB45_1 ; RV32I-NEXT: .LBB45_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3542,46 +3537,45 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB45_2 ; RV32IA-NEXT: .LBB45_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB45_7 ; RV32IA-NEXT: .LBB45_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB45_4 +; RV32IA-NEXT: beq a1, s2, .LBB45_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a0, s2, a1 ; RV32IA-NEXT: j .LBB45_5 ; RV32IA-NEXT: .LBB45_4: # in Loop: Header=BB45_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB45_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB45_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB45_1 ; RV32IA-NEXT: .LBB45_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3641,46 +3635,45 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB46_2 ; RV32I-NEXT: .LBB46_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB46_7 ; RV32I-NEXT: .LBB46_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB46_4 +; RV32I-NEXT: beq a1, s2, .LBB46_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a0, s2, a1 ; RV32I-NEXT: j .LBB46_5 ; RV32I-NEXT: .LBB46_4: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB46_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 ; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: beqz a0, .LBB46_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: j .LBB46_1 ; RV32I-NEXT: .LBB46_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3695,46 +3688,45 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB46_2 ; RV32IA-NEXT: .LBB46_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB46_7 ; RV32IA-NEXT: .LBB46_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB46_4 +; RV32IA-NEXT: beq a1, s2, .LBB46_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a0, s2, a1 ; RV32IA-NEXT: j .LBB46_5 ; RV32IA-NEXT: .LBB46_4: # in Loop: Header=BB46_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB46_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1 ; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: beqz a0, .LBB46_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: j .LBB46_1 ; RV32IA-NEXT: .LBB46_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll index 2db6f80f4fd61..9db89dbf6a1fd 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll @@ -518,44 +518,43 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB3_3 ; RV32I-NEXT: .LBB3_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: sltu a0, a5, s0 +; RV32I-NEXT: sltu a0, a1, s2 ; RV32I-NEXT: .LBB3_2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: and a1, a0, s2 -; RV32I-NEXT: and a0, a0, s0 -; RV32I-NEXT: sltu a3, a4, a1 -; RV32I-NEXT: sub a0, a5, a0 -; RV32I-NEXT: sub a2, a4, a1 +; RV32I-NEXT: and a2, a0, s0 +; RV32I-NEXT: and a0, a0, s2 +; RV32I-NEXT: sltu a3, a4, a2 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: sub a2, a4, a2 ; RV32I-NEXT: sub a3, a0, a3 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB3_5 ; RV32I-NEXT: .LBB3_3: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: bne a5, s0, .LBB3_1 +; RV32I-NEXT: bne a1, s2, .LBB3_1 ; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: sltu a0, a4, s2 +; RV32I-NEXT: sltu a0, a4, s0 ; RV32I-NEXT: j .LBB3_2 ; RV32I-NEXT: .LBB3_5: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -580,44 +579,43 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: .cfi_offset s0, -8 ; RV32IA-NEXT: .cfi_offset s1, -12 ; RV32IA-NEXT: .cfi_offset s2, -16 -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB3_3 ; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: sltu a0, a5, s0 +; RV32IA-NEXT: sltu a0, a1, s2 ; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 ; RV32IA-NEXT: neg a0, a0 -; RV32IA-NEXT: and a1, a0, s2 -; RV32IA-NEXT: and a0, a0, s0 -; RV32IA-NEXT: sltu a3, a4, a1 -; RV32IA-NEXT: sub a0, a5, a0 -; RV32IA-NEXT: sub a2, a4, a1 +; RV32IA-NEXT: and a2, a0, s0 +; RV32IA-NEXT: and a0, a0, s2 +; RV32IA-NEXT: sltu a3, a4, a2 +; RV32IA-NEXT: sub a0, a1, a0 +; RV32IA-NEXT: sub a2, a4, a2 ; RV32IA-NEXT: sub a3, a0, a3 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB3_5 ; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: bne a5, s0, .LBB3_1 +; RV32IA-NEXT: bne a1, s2, .LBB3_1 ; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: sltu a0, a4, s2 +; RV32IA-NEXT: sltu a0, a4, s0 ; RV32IA-NEXT: j .LBB3_2 ; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index ae1db4f1d62da..ac38efe48baa7 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -493,42 +493,41 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB3_3 ; RV32I-NEXT: .LBB3_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: sltu a0, a5, s0 +; RV32I-NEXT: sltu a0, a1, s2 ; RV32I-NEXT: .LBB3_2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: addi a1, a4, 1 +; RV32I-NEXT: addi a2, a4, 1 ; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: seqz a3, a1 -; RV32I-NEXT: and a2, a0, a1 -; RV32I-NEXT: add a3, a5, a3 +; RV32I-NEXT: seqz a3, a2 +; RV32I-NEXT: and a2, a0, a2 +; RV32I-NEXT: add a3, a1, a3 ; RV32I-NEXT: and a3, a0, a3 ; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB3_5 ; RV32I-NEXT: .LBB3_3: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: bne a5, s0, .LBB3_1 +; RV32I-NEXT: bne a1, s2, .LBB3_1 ; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: sltu a0, a4, s2 +; RV32I-NEXT: sltu a0, a4, s0 ; RV32I-NEXT: j .LBB3_2 ; RV32I-NEXT: .LBB3_5: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -553,42 +552,41 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: .cfi_offset s0, -8 ; RV32IA-NEXT: .cfi_offset s1, -12 ; RV32IA-NEXT: .cfi_offset s2, -16 -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 ; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB3_3 ; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: sltu a0, a5, s0 +; RV32IA-NEXT: sltu a0, a1, s2 ; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: addi a1, a4, 1 +; RV32IA-NEXT: addi a2, a4, 1 ; RV32IA-NEXT: neg a0, a0 -; RV32IA-NEXT: seqz a3, a1 -; RV32IA-NEXT: and a2, a0, a1 -; RV32IA-NEXT: add a3, a5, a3 +; RV32IA-NEXT: seqz a3, a2 +; RV32IA-NEXT: and a2, a0, a2 +; RV32IA-NEXT: add a3, a1, a3 ; RV32IA-NEXT: and a3, a0, a3 ; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB3_5 ; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: bne a5, s0, .LBB3_1 +; RV32IA-NEXT: bne a1, s2, .LBB3_1 ; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: sltu a0, a4, s2 +; RV32IA-NEXT: sltu a0, a4, s0 ; RV32IA-NEXT: j .LBB3_2 ; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1281,50 +1279,49 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 -; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a5, 0(a0) -; RV32I-NEXT: lw a4, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lw a4, 0(a0) +; RV32I-NEXT: lw a1, 4(a0) +; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: j .LBB7_2 ; RV32I-NEXT: .LBB7_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 12(sp) +; RV32I-NEXT: sw a4, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 8(sp) -; RV32I-NEXT: lw a4, 12(sp) +; RV32I-NEXT: lw a4, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) ; RV32I-NEXT: bnez a0, .LBB7_7 ; RV32I-NEXT: .LBB7_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a4, s0, .LBB7_4 +; RV32I-NEXT: beq a1, s2, .LBB7_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a4 +; RV32I-NEXT: sltu a0, s2, a1 ; RV32I-NEXT: j .LBB7_5 ; RV32I-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a5 +; RV32I-NEXT: sltu a0, s0, a4 ; RV32I-NEXT: .LBB7_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: or a1, a5, a4 -; RV32I-NEXT: seqz a1, a1 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: or a2, a4, a1 +; RV32I-NEXT: seqz a2, a2 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: bnez a0, .LBB7_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: seqz a0, a5 -; RV32I-NEXT: sub a3, a4, a0 -; RV32I-NEXT: addi a2, a5, -1 +; RV32I-NEXT: seqz a0, a4 +; RV32I-NEXT: sub a3, a1, a0 +; RV32I-NEXT: addi a2, a4, -1 ; RV32I-NEXT: j .LBB7_1 ; RV32I-NEXT: .LBB7_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a5 -; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: mv a0, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1349,50 +1346,49 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: .cfi_offset s0, -8 ; RV32IA-NEXT: .cfi_offset s1, -12 ; RV32IA-NEXT: .cfi_offset s2, -16 -; RV32IA-NEXT: mv s0, a2 +; RV32IA-NEXT: mv s0, a1 ; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a5, 0(a0) -; RV32IA-NEXT: lw a4, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: lw a4, 0(a0) +; RV32IA-NEXT: lw a1, 4(a0) +; RV32IA-NEXT: mv s2, a2 ; RV32IA-NEXT: j .LBB7_2 ; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: sw a5, 8(sp) -; RV32IA-NEXT: sw a4, 12(sp) +; RV32IA-NEXT: sw a4, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 8(sp) -; RV32IA-NEXT: lw a4, 12(sp) +; RV32IA-NEXT: lw a4, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) ; RV32IA-NEXT: bnez a0, .LBB7_7 ; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a4, s0, .LBB7_4 +; RV32IA-NEXT: beq a1, s2, .LBB7_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a4 +; RV32IA-NEXT: sltu a0, s2, a1 ; RV32IA-NEXT: j .LBB7_5 ; RV32IA-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a5 +; RV32IA-NEXT: sltu a0, s0, a4 ; RV32IA-NEXT: .LBB7_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: or a1, a5, a4 -; RV32IA-NEXT: seqz a1, a1 -; RV32IA-NEXT: or a0, a1, a0 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: or a2, a4, a1 +; RV32IA-NEXT: seqz a2, a2 +; RV32IA-NEXT: or a0, a2, a0 +; RV32IA-NEXT: mv a2, s0 +; RV32IA-NEXT: mv a3, s2 ; RV32IA-NEXT: bnez a0, .LBB7_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: seqz a0, a5 -; RV32IA-NEXT: sub a3, a4, a0 -; RV32IA-NEXT: addi a2, a5, -1 +; RV32IA-NEXT: seqz a0, a4 +; RV32IA-NEXT: sub a3, a1, a0 +; RV32IA-NEXT: addi a2, a4, -1 ; RV32IA-NEXT: j .LBB7_1 ; RV32IA-NEXT: .LBB7_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a5 -; RV32IA-NEXT: mv a1, a4 +; RV32IA-NEXT: mv a0, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll index a5426e560bd65..581f299a467a4 100644 --- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll @@ -1080,33 +1080,33 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) { ; RV32-NEXT: .cfi_offset s5, -28 ; RV32-NEXT: .cfi_offset s6, -32 ; RV32-NEXT: mv s5, a5 -; RV32-NEXT: mv s3, a1 -; RV32-NEXT: andi a1, a5, 1 -; RV32-NEXT: beqz a1, .LBB32_8 +; RV32-NEXT: mv s2, a0 +; RV32-NEXT: andi a0, a5, 1 +; RV32-NEXT: beqz a0, .LBB32_8 ; RV32-NEXT: # %bb.1: # %t ; RV32-NEXT: mv s0, a4 -; RV32-NEXT: mv s2, a3 ; RV32-NEXT: mv s1, a2 -; RV32-NEXT: mv s4, a0 -; RV32-NEXT: beq s3, a3, .LBB32_3 +; RV32-NEXT: mv s3, a3 +; RV32-NEXT: mv s4, a1 +; RV32-NEXT: beq a1, a3, .LBB32_3 ; RV32-NEXT: # %bb.2: # %t -; RV32-NEXT: sltu s6, s3, s2 +; RV32-NEXT: sltu s6, s4, s3 ; RV32-NEXT: j .LBB32_4 ; RV32-NEXT: .LBB32_3: -; RV32-NEXT: sltu s6, s4, s1 +; RV32-NEXT: sltu s6, s2, s1 ; RV32-NEXT: .LBB32_4: # %t ; RV32-NEXT: mv a0, s6 ; RV32-NEXT: call call ; RV32-NEXT: beqz s6, .LBB32_8 ; RV32-NEXT: # %bb.5: # %end -; RV32-NEXT: sltu a1, s4, s1 +; RV32-NEXT: sltu a1, s2, s1 ; RV32-NEXT: mv a0, a1 -; RV32-NEXT: beq s3, s2, .LBB32_7 +; RV32-NEXT: beq s4, s3, .LBB32_7 ; RV32-NEXT: # %bb.6: # %end -; RV32-NEXT: sltu a0, s3, s2 +; RV32-NEXT: sltu a0, s4, s3 ; RV32-NEXT: .LBB32_7: # %end -; RV32-NEXT: sub a2, s3, s2 -; RV32-NEXT: sub a3, s4, s1 +; RV32-NEXT: sub a2, s4, s3 +; RV32-NEXT: sub a3, s2, s1 ; RV32-NEXT: sub a2, a2, a1 ; RV32-NEXT: sw a3, 0(s0) ; RV32-NEXT: sw a2, 4(s0) diff --git a/llvm/test/CodeGen/RISCV/rv64-double-convert.ll b/llvm/test/CodeGen/RISCV/rv64-double-convert.ll index dd49d9e3e2dce..f9fd528584169 100644 --- a/llvm/test/CodeGen/RISCV/rv64-double-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64-double-convert.ll @@ -69,14 +69,14 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind { ; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, -449 ; RV64I-NEXT: slli a1, a1, 53 ; RV64I-NEXT: call __gedf2 ; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __fixdfti -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv s3, a1 ; RV64I-NEXT: li s5, -1 ; RV64I-NEXT: bgez s2, .LBB4_2 @@ -86,15 +86,15 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind { ; RV64I-NEXT: li a0, 575 ; RV64I-NEXT: slli a0, a0, 53 ; RV64I-NEXT: addi a1, a0, -1 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __gtdf2 ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: blez a0, .LBB4_4 ; RV64I-NEXT: # %bb.3: ; RV64I-NEXT: srli s3, s5, 1 ; RV64I-NEXT: .LBB4_4: -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call __unorddf2 ; RV64I-NEXT: snez a0, a0 ; RV64I-NEXT: slti a1, s2, 0 @@ -102,7 +102,7 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind { ; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: addi a3, a1, -1 ; RV64I-NEXT: and a1, a0, s3 -; RV64I-NEXT: and a3, a3, s1 +; RV64I-NEXT: and a3, a3, s0 ; RV64I-NEXT: neg a2, a2 ; RV64I-NEXT: or a2, a2, a3 ; RV64I-NEXT: and a0, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/shrinkwrap.ll b/llvm/test/CodeGen/RISCV/shrinkwrap.ll index 90f9509c72373..235c714f7f33b 100644 --- a/llvm/test/CodeGen/RISCV/shrinkwrap.ll +++ b/llvm/test/CodeGen/RISCV/shrinkwrap.ll @@ -361,6 +361,9 @@ define void @li_straightline_b(i32 zeroext %a, i32 zeroext %b) { ; ; RV64I-SW-LABEL: li_straightline_b: ; RV64I-SW: # %bb.0: +; RV64I-SW-NEXT: li a2, 57 +; RV64I-SW-NEXT: beq a0, a2, .LBB3_4 +; RV64I-SW-NEXT: # %bb.1: # %do_call ; RV64I-SW-NEXT: addi sp, sp, -16 ; RV64I-SW-NEXT: .cfi_def_cfa_offset 16 ; RV64I-SW-NEXT: sd ra, 8(sp) # 8-byte Folded Spill @@ -368,21 +371,19 @@ define void @li_straightline_b(i32 zeroext %a, i32 zeroext %b) { ; RV64I-SW-NEXT: .cfi_offset ra, -8 ; RV64I-SW-NEXT: .cfi_offset s0, -16 ; RV64I-SW-NEXT: mv s0, a1 -; RV64I-SW-NEXT: li a1, 57 -; RV64I-SW-NEXT: beq a0, a1, .LBB3_3 -; RV64I-SW-NEXT: # %bb.1: # %do_call ; RV64I-SW-NEXT: call foo ; RV64I-SW-NEXT: li a0, 57 ; RV64I-SW-NEXT: beq s0, a0, .LBB3_3 ; RV64I-SW-NEXT: # %bb.2: # %do_call2 ; RV64I-SW-NEXT: call foo -; RV64I-SW-NEXT: .LBB3_3: # %exit +; RV64I-SW-NEXT: .LBB3_3: ; RV64I-SW-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-SW-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-SW-NEXT: .cfi_restore ra ; RV64I-SW-NEXT: .cfi_restore s0 ; RV64I-SW-NEXT: addi sp, sp, 16 ; RV64I-SW-NEXT: .cfi_def_cfa_offset 0 +; RV64I-SW-NEXT: .LBB3_4: # %exit ; RV64I-SW-NEXT: ret %cmp0 = icmp eq i32 %a, 57 br i1 %cmp0, label %exit, label %do_call