diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h index 3847f4e966afe..afc27082fc8e0 100644 --- a/llvm/lib/Target/ARM/ARM.h +++ b/llvm/lib/Target/ARM/ARM.h @@ -41,6 +41,7 @@ FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); FunctionPass *createARMBranchTargetsPass(); +FunctionPass *createARMDeadRegisterDefinitions(); FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); @@ -66,6 +67,7 @@ void initializeARMBlockPlacementPass(PassRegistry &); void initializeARMBranchTargetsPass(PassRegistry &); void initializeARMConstantIslandsPass(PassRegistry &); void initializeARMDAGToDAGISelLegacyPass(PassRegistry &); +void initializeARMDeadRegisterDefinitionsPass(PassRegistry &); void initializeARMExpandPseudoPass(PassRegistry &); void initializeARMFixCortexA57AES1742098Pass(PassRegistry &); void initializeARMLoadStoreOptPass(PassRegistry &); diff --git a/llvm/lib/Target/ARM/ARMDeadRegisterDefinitionsPass.cpp b/llvm/lib/Target/ARM/ARMDeadRegisterDefinitionsPass.cpp new file mode 100644 index 0000000000000..f92fcf3dfb743 --- /dev/null +++ b/llvm/lib/Target/ARM/ARMDeadRegisterDefinitionsPass.cpp @@ -0,0 +1,229 @@ +//==-- ARMDeadRegisterDefinitions.cpp - Convert dead dests to compares --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file When allowed by the instruction, replace dead definitions with compare +/// instructions. +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "arm-dead-defs-to-cmp" + +STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced"); + +#define ARM_DEAD_REG_DEF_NAME "ARM Convert dead defs to compares" + +namespace { +class ARMDeadRegisterDefinitions : public MachineFunctionPass { +private: + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + bool Changed; + void processMachineBasicBlock(MachineBasicBlock &MBB); + +public: + static char ID; // Pass identification, replacement for typeid. + ARMDeadRegisterDefinitions() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &F) override; + + StringRef getPassName() const override { return ARM_DEAD_REG_DEF_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +char ARMDeadRegisterDefinitions::ID = 0; +} // end anonymous namespace + +INITIALIZE_PASS(ARMDeadRegisterDefinitions, "arm-dead-defs-to-cmp", + ARM_DEAD_REG_DEF_NAME, false, false) + +static bool usesFrameIndex(const MachineInstr &MI) { + for (const MachineOperand &MO : MI.uses()) + if (MO.isFI()) + return true; + return false; +} + +static std::optional mapToCmpCmnTstTeqOpcode(unsigned Opc) { + switch (Opc) { + // ARM encodings + case ARM::SUBri: + return ARM::CMPri; + case ARM::SUBrr: + return ARM::CMPrr; + case ARM::SUBrsi: + return ARM::CMPrsi; + case ARM::SUBrsr: + return ARM::CMPrsr; + + case ARM::ADDri: + return ARM::CMNri; + case ARM::ADDrr: + return ARM::CMNzrr; + case ARM::ADDrsi: + return ARM::CMNzrsi; + case ARM::ADDrsr: + return ARM::CMNzrsr; + + case ARM::ANDri: + return ARM::TSTri; + case ARM::ANDrr: + return ARM::TSTrr; + case ARM::ANDrsi: + return ARM::TSTrsi; + case ARM::ANDrsr: + return ARM::TSTrsr; + + case ARM::EORri: + return ARM::TEQri; + case ARM::EORrr: + return ARM::TEQrr; + case ARM::EORrsi: + return ARM::TEQrsi; + case ARM::EORrsr: + return ARM::TEQrsr; + + // Thumb2 encodings + case ARM::t2SUBri: + return ARM::t2CMPri; + case ARM::t2SUBrr: + return ARM::t2CMPrr; + case ARM::t2SUBrs: + return ARM::t2CMPrs; + + case ARM::t2ADDri: + return ARM::t2CMNri; + case ARM::t2ADDrr: + return ARM::t2CMNzrr; + case ARM::t2ADDrs: + return ARM::t2CMNzrs; + + case ARM::t2ANDri: + return ARM::t2TSTri; + case ARM::t2ANDrr: + return ARM::t2TSTrr; + case ARM::t2ANDrs: + return ARM::t2TSTrs; + + case ARM::t2EORri: + return ARM::t2TEQri; + case ARM::t2EORrr: + return ARM::t2TEQrr; + case ARM::t2EORrs: + return ARM::t2TEQrs; + + // Thumb1 limited support + case ARM::tSUBSrr: + return ARM::tCMPr; + case ARM::tSUBSi3: + return ARM::tCMPi8; + case ARM::tSUBSi8: + return ARM::tCMPi8; + case ARM::tAND: + return ARM::tTST; + default: + return std::nullopt; + } +} + +static void copyNonDefNonPredOperands(MachineInstr &Dst, + const MachineInstr &Src) { + const MCInstrDesc &Desc = Src.getDesc(); + int PIdx = Src.findFirstPredOperandIdx(); + unsigned Start = Desc.getNumDefs(); + unsigned End = + (PIdx == -1) ? Src.getNumOperands() : static_cast(PIdx); + for (unsigned I = Start; I < End; ++I) + Dst.addOperand(Src.getOperand(I)); + if (PIdx != -1) { + Dst.addOperand(Src.getOperand(PIdx)); + Dst.addOperand(Src.getOperand(PIdx + 1)); + } +} + +void ARMDeadRegisterDefinitions::processMachineBasicBlock( + MachineBasicBlock &MBB) { + // Early-increment range: iterator is advanced before the loop body, so it's + // safe to erase the current instruction inside the loop. + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { + if (usesFrameIndex(MI)) + continue; + + // Only consider instructions that set CPSR (flag-setting variants). + if (!ARMBaseInstrInfo::isCPSRDefined(MI)) + continue; + + const MCInstrDesc &Desc = MI.getDesc(); + + for (int I = 0, EE = Desc.getNumDefs(); I != EE; ++I) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef()) + continue; + Register Reg = MO.getReg(); + if (!Reg.isVirtual() || (!MO.isDead() && !MRI->use_nodbg_empty(Reg))) + continue; + assert(!MO.isImplicit() && "Unexpected implicit def!"); + if (MI.isRegTiedToUseOperand(I)) + continue; + + if (std::optional NewOpc = + mapToCmpCmnTstTeqOpcode(MI.getOpcode())) { + MachineInstrBuilder MIB = + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(*NewOpc)); + copyNonDefNonPredOperands(*MIB, MI); + MIB.setMIFlags(MI.getFlags()); + for (MachineMemOperand *MMO : MI.memoperands()) + MIB.addMemOperand(MMO); + + MI.eraseFromParent(); + ++NumDeadDefsReplaced; + Changed = true; + break; + } + } + } +} + +// Scan the function for instructions that have a dead definition of a +// register. Replace that instruction with a compare instruction when possible +bool ARMDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + MRI = &MF.getRegInfo(); + LLVM_DEBUG(dbgs() << "***** ARMDeadRegisterDefinitions *****\n"); + Changed = false; + for (auto &MBB : MF) + processMachineBasicBlock(MBB); + return Changed; +} + +FunctionPass *llvm::createARMDeadRegisterDefinitions() { + return new ARMDeadRegisterDefinitions(); +} diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index fedf9e2cf34b1..d4da3783adf6b 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -79,6 +79,14 @@ static cl::opt EnableGlobalMerge("arm-global-merge", cl::Hidden, cl::desc("Enable the global merge pass")); +static cl::opt EnableARMDeadRegisterElimination( + "arm-enable-dead-defs", cl::Hidden, + cl::desc("Enable the pass that replaces" + " dead-dest flag-setting ALU" + " instructions with compares/tests" + " pre-RA"), + cl::init(true)); + namespace llvm { void initializeARMExecutionDomainFixPass(PassRegistry&); } @@ -510,6 +518,10 @@ bool ARMPassConfig::addGlobalInstructionSelect() { void ARMPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOptLevel::None) { + // Replace dead-dest flag-setting ALU with compares/tests pre-RA. + if (EnableARMDeadRegisterElimination) + addPass(createARMDeadRegisterDefinitions()); + if (getOptLevel() == CodeGenOptLevel::Aggressive) addPass(&MachinePipelinerID); diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt index a39629bd8aeb0..e770867b8ce25 100644 --- a/llvm/lib/Target/ARM/CMakeLists.txt +++ b/llvm/lib/Target/ARM/CMakeLists.txt @@ -30,6 +30,7 @@ add_llvm_target(ARMCodeGen ARMCallLowering.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp + ARMDeadRegisterDefinitionsPass.cpp ARMExpandPseudoInsts.cpp ARMFastISel.cpp ARMFixCortexA57AES1742098Pass.cpp diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index 960d7305e66f6..4e9b8db39f0d4 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -103,6 +103,7 @@ ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions +; CHECK-NEXT: ARM Convert dead defs to compares ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Slot index numbering ; CHECK-NEXT: Live Interval Analysis diff --git a/llvm/test/CodeGen/ARM/addsubcarry-promotion.ll b/llvm/test/CodeGen/ARM/addsubcarry-promotion.ll index 9d07ed655eb99..12c39cb00c5f9 100644 --- a/llvm/test/CodeGen/ARM/addsubcarry-promotion.ll +++ b/llvm/test/CodeGen/ARM/addsubcarry-promotion.ll @@ -11,7 +11,7 @@ define void @fn1(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 { ; ARM-LABEL: fn1: ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: rsb r2, r2, #0 -; ARM-NEXT: adds r0, r1, r0 +; ARM-NEXT: cmn r1, r0 ; ARM-NEXT: movw r1, #65535 ; ARM-NEXT: sxth r2, r2 ; ARM-NEXT: adc r0, r2, #1 @@ -54,7 +54,7 @@ define void @fn1(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 { ; THUMB-LABEL: fn1: ; THUMB: @ %bb.0: @ %entry ; THUMB-NEXT: rsbs r2, r2, #0 -; THUMB-NEXT: adds r0, r0, r1 +; THUMB-NEXT: cmn r1, r0 ; THUMB-NEXT: sxth r2, r2 ; THUMB-NEXT: adc r0, r2, #1 ; THUMB-NEXT: lsls r0, r0, #16 diff --git a/llvm/test/CodeGen/ARM/addsubo-legalization.ll b/llvm/test/CodeGen/ARM/addsubo-legalization.ll index 5ebb115791c66..5fb5629f0eee7 100644 --- a/llvm/test/CodeGen/ARM/addsubo-legalization.ll +++ b/llvm/test/CodeGen/ARM/addsubo-legalization.ll @@ -18,7 +18,7 @@ define <2 x i1> @uaddo(ptr %ptr, ptr %ptr2) { ; CHECK-NEXT: vmov r6, r7, d19 ; CHECK-NEXT: vmov lr, r12, d16 ; CHECK-NEXT: vmov r4, r5, d17 -; CHECK-NEXT: subs.w r3, lr, r3 +; CHECK-NEXT: cmp lr, r3 ; CHECK-NEXT: sbcs.w r2, r12, r2 ; CHECK-NEXT: mov.w r2, #0 ; CHECK-NEXT: it lo @@ -26,7 +26,7 @@ define <2 x i1> @uaddo(ptr %ptr, ptr %ptr2) { ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r2, #-1 -; CHECK-NEXT: subs r3, r4, r6 +; CHECK-NEXT: cmp r4, r6 ; CHECK-NEXT: sbcs.w r3, r5, r7 ; CHECK-NEXT: it lo ; CHECK-NEXT: movlo r1, #1 @@ -57,7 +57,7 @@ define <2 x i1> @usubo(ptr %ptr, ptr %ptr2) { ; CHECK-NEXT: vmov r4, r5, d19 ; CHECK-NEXT: vmov r3, r2, d16 ; CHECK-NEXT: vmov r6, r7, d17 -; CHECK-NEXT: subs.w r3, lr, r3 +; CHECK-NEXT: cmp lr, r3 ; CHECK-NEXT: sbcs.w r2, r12, r2 ; CHECK-NEXT: mov.w r2, #0 ; CHECK-NEXT: it lo @@ -65,7 +65,7 @@ define <2 x i1> @usubo(ptr %ptr, ptr %ptr2) { ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r2, #-1 -; CHECK-NEXT: subs r3, r4, r6 +; CHECK-NEXT: cmp r4, r6 ; CHECK-NEXT: sbcs.w r3, r5, r7 ; CHECK-NEXT: it lo ; CHECK-NEXT: movlo r1, #1 diff --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll index ca9939c0f8c55..919122cc7ef31 100644 --- a/llvm/test/CodeGen/ARM/atomic-64bit.ll +++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE ; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-LE ; RUN: llc < %s -mtriple=armebv7 -target-abi apcs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE @@ -6,214 +7,529 @@ ; RUN: llc < %s -mtriple=armv8m--none-eabi | FileCheck %s --check-prefix=CHECK-M define i64 @test1(ptr %ptr, i64 %val) { -; CHECK-LABEL: test1: -; CHECK: dmb {{ish$}} -; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK-LE: adds [[REG3:(r[0-9]?[02468])]], [[REG1]] -; CHECK-LE: adc [[REG4:(r[0-9]?[13579])]], [[REG2]] -; CHECK-BE: adds [[REG4:(r[0-9]?[13579])]], [[REG2]] -; CHECK-BE: adc [[REG3:(r[0-9]?[02468])]], [[REG1]] -; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] -; CHECK: cmp -; CHECK: bne -; CHECK: dmb {{ish$}} - -; CHECK-THUMB-LABEL: test1: -; CHECK-THUMB: dmb {{ish$}} -; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB-LE: adds.w [[REG3:[a-z0-9]+]], [[REG1]] -; CHECK-THUMB-LE: adc.w [[REG4:[a-z0-9]+]], [[REG2]] -; CHECK-THUMB-BE: adds.w [[REG4:[a-z0-9]+]], [[REG2]] -; CHECK-THUMB-BE: adc.w [[REG3:[a-z0-9]+]], [[REG1]] -; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] -; CHECK-THUMB: cmp -; CHECK-THUMB: bne -; CHECK-THUMB: dmb {{ish$}} - -; CHECK-M: __atomic_fetch_add_8 +; CHECK-LE-LABEL: test1: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: LBB0_1: @ %atomicrmw.start +; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-LE-NEXT: ldrexd r4, r5, [r0] +; CHECK-LE-NEXT: adds r6, r4, r1 +; CHECK-LE-NEXT: adc r7, r5, r2 +; CHECK-LE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-LE-NEXT: cmp r3, #0 +; CHECK-LE-NEXT: bne LBB0_1 +; CHECK-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-LE-NEXT: mov r0, r4 +; CHECK-LE-NEXT: mov r1, r5 +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: pop {r4, r5, r6, r7, pc} +; +; CHECK-THUMB-LE-LABEL: test1: +; CHECK-THUMB-LE: @ %bb.0: +; CHECK-THUMB-LE-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: mov r12, r0 +; CHECK-THUMB-LE-NEXT: dmb ish +; CHECK-THUMB-LE-NEXT: .LBB0_1: @ %atomicrmw.start +; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-LE-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-LE-NEXT: adds.w lr, r0, r2 +; CHECK-THUMB-LE-NEXT: adc.w r4, r1, r3 +; CHECK-THUMB-LE-NEXT: strexd r5, lr, r4, [r12] +; CHECK-THUMB-LE-NEXT: cmp r5, #0 +; CHECK-THUMB-LE-NEXT: bne .LBB0_1 +; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-LE-NEXT: dmb ish +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-BE-LABEL: test1: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: .LBB0_1: @ %atomicrmw.start +; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: ldrexd r4, r5, [r0] +; CHECK-BE-NEXT: adds r7, r5, r2 +; CHECK-BE-NEXT: adc r6, r4, r1 +; CHECK-BE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-BE-NEXT: cmp r3, #0 +; CHECK-BE-NEXT: bne .LBB0_1 +; CHECK-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-BE-NEXT: mov r0, r4 +; CHECK-BE-NEXT: mov r1, r5 +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc} +; +; CHECK-THUMB-BE-LABEL: test1: +; CHECK-THUMB-BE: @ %bb.0: +; CHECK-THUMB-BE-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: mov r12, r0 +; CHECK-THUMB-BE-NEXT: dmb ish +; CHECK-THUMB-BE-NEXT: .LBB0_1: @ %atomicrmw.start +; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-BE-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-BE-NEXT: adds.w lr, r1, r3 +; CHECK-THUMB-BE-NEXT: adc.w r4, r0, r2 +; CHECK-THUMB-BE-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-BE-NEXT: cmp r5, #0 +; CHECK-THUMB-BE-NEXT: bne .LBB0_1 +; CHECK-THUMB-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-BE-NEXT: dmb ish +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r7, pc} + + %r = atomicrmw add ptr %ptr, i64 %val seq_cst ret i64 %r } define i64 @test2(ptr %ptr, i64 %val) { -; CHECK-LABEL: test2: -; CHECK: dmb {{ish$}} -; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK-LE: subs [[REG3:(r[0-9]?[02468])]], [[REG1]] -; CHECK-LE: sbc [[REG4:(r[0-9]?[13579])]], [[REG2]] -; CHECK-BE: subs [[REG4:(r[0-9]?[13579])]], [[REG2]] -; CHECK-BE: sbc [[REG3:(r[0-9]?[02468])]], [[REG1]] -; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] -; CHECK: cmp -; CHECK: bne -; CHECK: dmb {{ish$}} - -; CHECK-THUMB-LABEL: test2: -; CHECK-THUMB: dmb {{ish$}} -; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB-LE: subs.w [[REG3:[a-z0-9]+]], [[REG1]] -; CHECK-THUMB-LE: sbc.w [[REG4:[a-z0-9]+]], [[REG2]] -; CHECK-THUMB-BE: subs.w [[REG4:[a-z0-9]+]], [[REG2]] -; CHECK-THUMB-BE: sbc.w [[REG3:[a-z0-9]+]], [[REG1]] -; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] -; CHECK-THUMB: cmp -; CHECK-THUMB: bne -; CHECK-THUMB: dmb {{ish$}} - -; CHECK-M: __atomic_fetch_sub_8 +; CHECK-LE-LABEL: test2: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: LBB1_1: @ %atomicrmw.start +; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-LE-NEXT: ldrexd r4, r5, [r0] +; CHECK-LE-NEXT: subs r6, r4, r1 +; CHECK-LE-NEXT: sbc r7, r5, r2 +; CHECK-LE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-LE-NEXT: cmp r3, #0 +; CHECK-LE-NEXT: bne LBB1_1 +; CHECK-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-LE-NEXT: mov r0, r4 +; CHECK-LE-NEXT: mov r1, r5 +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: pop {r4, r5, r6, r7, pc} +; +; CHECK-THUMB-LE-LABEL: test2: +; CHECK-THUMB-LE: @ %bb.0: +; CHECK-THUMB-LE-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: mov r12, r0 +; CHECK-THUMB-LE-NEXT: dmb ish +; CHECK-THUMB-LE-NEXT: .LBB1_1: @ %atomicrmw.start +; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-LE-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-LE-NEXT: subs.w lr, r0, r2 +; CHECK-THUMB-LE-NEXT: sbc.w r4, r1, r3 +; CHECK-THUMB-LE-NEXT: strexd r5, lr, r4, [r12] +; CHECK-THUMB-LE-NEXT: cmp r5, #0 +; CHECK-THUMB-LE-NEXT: bne .LBB1_1 +; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-LE-NEXT: dmb ish +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-BE-LABEL: test2: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: .LBB1_1: @ %atomicrmw.start +; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: ldrexd r4, r5, [r0] +; CHECK-BE-NEXT: subs r7, r5, r2 +; CHECK-BE-NEXT: sbc r6, r4, r1 +; CHECK-BE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-BE-NEXT: cmp r3, #0 +; CHECK-BE-NEXT: bne .LBB1_1 +; CHECK-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-BE-NEXT: mov r0, r4 +; CHECK-BE-NEXT: mov r1, r5 +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc} +; +; CHECK-THUMB-BE-LABEL: test2: +; CHECK-THUMB-BE: @ %bb.0: +; CHECK-THUMB-BE-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: mov r12, r0 +; CHECK-THUMB-BE-NEXT: dmb ish +; CHECK-THUMB-BE-NEXT: .LBB1_1: @ %atomicrmw.start +; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-BE-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-BE-NEXT: subs.w lr, r1, r3 +; CHECK-THUMB-BE-NEXT: sbc.w r4, r0, r2 +; CHECK-THUMB-BE-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-BE-NEXT: cmp r5, #0 +; CHECK-THUMB-BE-NEXT: bne .LBB1_1 +; CHECK-THUMB-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-BE-NEXT: dmb ish +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r7, pc} + + %r = atomicrmw sub ptr %ptr, i64 %val seq_cst ret i64 %r } define i64 @test3(ptr %ptr, i64 %val) { -; CHECK-LABEL: test3: -; CHECK: dmb {{ish$}} -; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK-LE-DAG: and [[REG3:(r[0-9]?[02468])]], [[REG1]], -; CHECK-LE-DAG: and [[REG4:(r[0-9]?[13579])]], [[REG2]], -; CHECK-BE-DAG: and [[REG4:(r[0-9]?[13579])]], [[REG2]], -; CHECK-BE-DAG: and [[REG3:(r[0-9]?[02468])]], [[REG1]], -; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] -; CHECK: cmp -; CHECK: bne -; CHECK: dmb {{ish$}} - +; CHECK-LE-LABEL: test3: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: LBB2_1: @ %atomicrmw.start +; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-LE-NEXT: ldrexd r4, r5, [r0] +; CHECK-LE-NEXT: and r6, r4, r1 +; CHECK-LE-NEXT: and r7, r5, r2 +; CHECK-LE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-LE-NEXT: cmp r3, #0 +; CHECK-LE-NEXT: bne LBB2_1 +; CHECK-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-LE-NEXT: mov r0, r4 +; CHECK-LE-NEXT: mov r1, r5 +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: pop {r4, r5, r6, r7, pc} +; ; CHECK-THUMB-LABEL: test3: -; CHECK-THUMB: dmb {{ish$}} -; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB-LE-DAG: and.w [[REG3:[a-z0-9]+]], [[REG1]], -; CHECK-THUMB-LE-DAG: and.w [[REG4:[a-z0-9]+]], [[REG2]], -; CHECK-THUMB-BE-DAG: and.w [[REG4:[a-z0-9]+]], [[REG2]], -; CHECK-THUMB-BE-DAG: and.w [[REG3:[a-z0-9]+]], [[REG1]], -; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] -; CHECK-THUMB: cmp -; CHECK-THUMB: bne -; CHECK-THUMB: dmb {{ish$}} - -; CHECK-M: _atomic_fetch_and_8 +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: mov r12, r0 +; CHECK-THUMB-NEXT: dmb ish +; CHECK-THUMB-NEXT: .LBB2_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-NEXT: and.w lr, r0, r2 +; CHECK-THUMB-NEXT: and.w r4, r1, r3 +; CHECK-THUMB-NEXT: strexd r5, lr, r4, [r12] +; CHECK-THUMB-NEXT: cmp r5, #0 +; CHECK-THUMB-NEXT: bne .LBB2_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: dmb ish +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-BE-LABEL: test3: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: .LBB2_1: @ %atomicrmw.start +; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: ldrexd r4, r5, [r0] +; CHECK-BE-NEXT: and r6, r4, r1 +; CHECK-BE-NEXT: and r7, r5, r2 +; CHECK-BE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-BE-NEXT: cmp r3, #0 +; CHECK-BE-NEXT: bne .LBB2_1 +; CHECK-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-BE-NEXT: mov r0, r4 +; CHECK-BE-NEXT: mov r1, r5 +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc} + + %r = atomicrmw and ptr %ptr, i64 %val seq_cst ret i64 %r } define i64 @test4(ptr %ptr, i64 %val) { -; CHECK-LABEL: test4: -; CHECK: dmb {{ish$}} -; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK-LE-DAG: orr [[REG3:(r[0-9]?[02468])]], [[REG1]], -; CHECK-LE-DAG: orr [[REG4:(r[0-9]?[13579])]], [[REG2]], -; CHECK-BE-DAG: orr [[REG4:(r[0-9]?[13579])]], [[REG2]], -; CHECK-BE-DAG: orr [[REG3:(r[0-9]?[02468])]], [[REG1]], -; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] -; CHECK: cmp -; CHECK: bne -; CHECK: dmb {{ish$}} - +; CHECK-LE-LABEL: test4: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: LBB3_1: @ %atomicrmw.start +; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-LE-NEXT: ldrexd r4, r5, [r0] +; CHECK-LE-NEXT: orr r6, r4, r1 +; CHECK-LE-NEXT: orr r7, r5, r2 +; CHECK-LE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-LE-NEXT: cmp r3, #0 +; CHECK-LE-NEXT: bne LBB3_1 +; CHECK-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-LE-NEXT: mov r0, r4 +; CHECK-LE-NEXT: mov r1, r5 +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: pop {r4, r5, r6, r7, pc} +; ; CHECK-THUMB-LABEL: test4: -; CHECK-THUMB: dmb {{ish$}} -; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB-LE-DAG: orr.w [[REG3:[a-z0-9]+]], [[REG1]], -; CHECK-THUMB-LE-DAG: orr.w [[REG4:[a-z0-9]+]], [[REG2]], -; CHECK-THUMB-BE-DAG: orr.w [[REG4:[a-z0-9]+]], [[REG2]], -; CHECK-THUMB-BE-DAG: orr.w [[REG3:[a-z0-9]+]], [[REG1]], -; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] -; CHECK-THUMB: cmp -; CHECK-THUMB: bne -; CHECK-THUMB: dmb {{ish$}} - -; CHECK-M: __atomic_fetch_or_8 +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: mov r12, r0 +; CHECK-THUMB-NEXT: dmb ish +; CHECK-THUMB-NEXT: .LBB3_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-NEXT: orr.w lr, r0, r2 +; CHECK-THUMB-NEXT: orr.w r4, r1, r3 +; CHECK-THUMB-NEXT: strexd r5, lr, r4, [r12] +; CHECK-THUMB-NEXT: cmp r5, #0 +; CHECK-THUMB-NEXT: bne .LBB3_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: dmb ish +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-BE-LABEL: test4: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: .LBB3_1: @ %atomicrmw.start +; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: ldrexd r4, r5, [r0] +; CHECK-BE-NEXT: orr r6, r4, r1 +; CHECK-BE-NEXT: orr r7, r5, r2 +; CHECK-BE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-BE-NEXT: cmp r3, #0 +; CHECK-BE-NEXT: bne .LBB3_1 +; CHECK-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-BE-NEXT: mov r0, r4 +; CHECK-BE-NEXT: mov r1, r5 +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc} + + %r = atomicrmw or ptr %ptr, i64 %val seq_cst ret i64 %r } define i64 @test5(ptr %ptr, i64 %val) { -; CHECK-LABEL: test5: -; CHECK: dmb {{ish$}} -; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK-LE-DAG: eor [[REG3:(r[0-9]?[02468])]], [[REG1]], -; CHECK-LE-DAG: eor [[REG4:(r[0-9]?[13579])]], [[REG2]], -; CHECK-BE-DAG: eor [[REG4:(r[0-9]?[13579])]], [[REG2]], -; CHECK-BE-DAG: eor [[REG3:(r[0-9]?[02468])]], [[REG1]], -; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] -; CHECK: cmp -; CHECK: bne -; CHECK: dmb {{ish$}} - +; CHECK-LE-LABEL: test5: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: LBB4_1: @ %atomicrmw.start +; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-LE-NEXT: ldrexd r4, r5, [r0] +; CHECK-LE-NEXT: eor r6, r4, r1 +; CHECK-LE-NEXT: eor r7, r5, r2 +; CHECK-LE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-LE-NEXT: cmp r3, #0 +; CHECK-LE-NEXT: bne LBB4_1 +; CHECK-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-LE-NEXT: mov r0, r4 +; CHECK-LE-NEXT: mov r1, r5 +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: pop {r4, r5, r6, r7, pc} +; ; CHECK-THUMB-LABEL: test5: -; CHECK-THUMB: dmb {{ish$}} -; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB-LE-DAG: eor.w [[REG3:[a-z0-9]+]], [[REG1]], -; CHECK-THUMB-LE-DAG: eor.w [[REG4:[a-z0-9]+]], [[REG2]], -; CHECK-THUMB-BE-DAG: eor.w [[REG4:[a-z0-9]+]], [[REG2]], -; CHECK-THUMB-BE-DAG: eor.w [[REG3:[a-z0-9]+]], [[REG1]], -; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] -; CHECK-THUMB: cmp -; CHECK-THUMB: bne -; CHECK-THUMB: dmb {{ish$}} - -; CHECK-M: __atomic_fetch_xor_8 +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: mov r12, r0 +; CHECK-THUMB-NEXT: dmb ish +; CHECK-THUMB-NEXT: .LBB4_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-NEXT: eor.w lr, r0, r2 +; CHECK-THUMB-NEXT: eor.w r4, r1, r3 +; CHECK-THUMB-NEXT: strexd r5, lr, r4, [r12] +; CHECK-THUMB-NEXT: cmp r5, #0 +; CHECK-THUMB-NEXT: bne .LBB4_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: dmb ish +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-BE-LABEL: test5: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: .LBB4_1: @ %atomicrmw.start +; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: ldrexd r4, r5, [r0] +; CHECK-BE-NEXT: eor r6, r4, r1 +; CHECK-BE-NEXT: eor r7, r5, r2 +; CHECK-BE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-BE-NEXT: cmp r3, #0 +; CHECK-BE-NEXT: bne .LBB4_1 +; CHECK-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-BE-NEXT: mov r0, r4 +; CHECK-BE-NEXT: mov r1, r5 +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc} + + %r = atomicrmw xor ptr %ptr, i64 %val seq_cst ret i64 %r } define i64 @test6(ptr %ptr, i64 %val) { -; CHECK-LABEL: test6: -; CHECK: dmb {{ish$}} -; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} -; CHECK: cmp -; CHECK: bne -; CHECK: dmb {{ish$}} - +; CHECK-LE-LABEL: test6: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: push {r4, r5, lr} +; CHECK-LE-NEXT: mov r5, r2 +; CHECK-LE-NEXT: mov r2, r0 +; CHECK-LE-NEXT: mov r4, r1 +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: LBB5_1: @ %atomicrmw.start +; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-LE-NEXT: ldrexd r0, r1, [r2] +; CHECK-LE-NEXT: strexd r3, r4, r5, [r2] +; CHECK-LE-NEXT: cmp r3, #0 +; CHECK-LE-NEXT: bne LBB5_1 +; CHECK-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: pop {r4, r5, pc} +; ; CHECK-THUMB-LABEL: test6: -; CHECK-THUMB: dmb {{ish$}} -; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} -; CHECK-THUMB: cmp -; CHECK-THUMB: bne -; CHECK-THUMB: dmb {{ish$}} +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r7, lr} +; CHECK-THUMB-NEXT: push {r7, lr} +; CHECK-THUMB-NEXT: dmb ish +; CHECK-THUMB-NEXT: .LBB5_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexd r12, r1, [r0] +; CHECK-THUMB-NEXT: strexd lr, r2, r3, [r0] +; CHECK-THUMB-NEXT: cmp.w lr, #0 +; CHECK-THUMB-NEXT: bne .LBB5_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r12 +; CHECK-THUMB-NEXT: dmb ish +; CHECK-THUMB-NEXT: pop {r7, pc} +; +; CHECK-BE-LABEL: test6: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: push {r4, r5, lr} +; CHECK-BE-NEXT: mov r5, r2 +; CHECK-BE-NEXT: mov r2, r0 +; CHECK-BE-NEXT: mov r4, r1 +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: .LBB5_1: @ %atomicrmw.start +; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: ldrexd r0, r1, [r2] +; CHECK-BE-NEXT: strexd r3, r4, r5, [r2] +; CHECK-BE-NEXT: cmp r3, #0 +; CHECK-BE-NEXT: bne .LBB5_1 +; CHECK-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: pop {r4, r5, pc} + -; CHECK-M: __atomic_exchange_8 %r = atomicrmw xchg ptr %ptr, i64 %val seq_cst ret i64 %r } define i64 @test7(ptr %ptr, i64 %val1, i64 %val2) { -; CHECK-LABEL: test7: -; CHECK-DAG: mov [[VAL1LO:r[0-9]+]], r1 -; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK-LE-DAG: eor [[MISMATCH_LO:.*]], [[REG1]], [[VAL1LO]] -; CHECK-LE-DAG: eor [[MISMATCH_HI:.*]], [[REG2]], r2 -; CHECK-BE-DAG: eor [[MISMATCH_LO:.*]], [[REG2]], r2 -; CHECK-BE-DAG: eor [[MISMATCH_HI:.*]], [[REG1]], r1 -; CHECK: orrs {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]] -; CHECK: bne -; CHECK-DAG: dmb {{ish$}} -; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} -; CHECK: cmp -; CHECK: beq -; CHECK: dmb {{ish$}} - -; CHECK-THUMB-LABEL: test7: -; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2 -; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3 -; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG1]], r2 -; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG2]], r3 -; CHECK-THUMB-LE: orrs.w {{.*}}, [[MISMATCH_LO]], [[MISMATCH_HI]] -; CHECK-THUMB: bne -; CHECK-THUMB: dmb {{ish$}} -; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} -; CHECK-THUMB: cmp -; CHECK-THUMB: beq -; CHECK-THUMB: dmb {{ish$}} - -; CHECK-M: __atomic_compare_exchange_8 +; CHECK-LE-LABEL: test7: +; CHECK-LE: @ %bb.0: @ %cmpxchg.start +; CHECK-LE-NEXT: push {r4, r6, r10, r11, lr} +; CHECK-LE-NEXT: mov r10, r3 +; CHECK-LE-NEXT: mov r9, r1 +; CHECK-LE-NEXT: mov r3, r0 +; CHECK-LE-NEXT: ldrexd r0, r1, [r0] +; CHECK-LE-NEXT: eor r12, r1, r2 +; CHECK-LE-NEXT: eor r4, r0, r9 +; CHECK-LE-NEXT: orrs r4, r4, r12 +; CHECK-LE-NEXT: bne LBB6_4 +; CHECK-LE-NEXT: @ %bb.1: @ %cmpxchg.fencedstore +; CHECK-LE-NEXT: ldr r11, [sp, #20] +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: LBB6_2: @ %cmpxchg.trystore +; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-LE-NEXT: strexd r4, r10, r11, [r3] +; CHECK-LE-NEXT: cmp r4, #0 +; CHECK-LE-NEXT: beq LBB6_5 +; CHECK-LE-NEXT: @ %bb.3: @ %cmpxchg.releasedload +; CHECK-LE-NEXT: @ in Loop: Header=BB6_2 Depth=1 +; CHECK-LE-NEXT: ldrexd r0, r1, [r3] +; CHECK-LE-NEXT: eor r4, r0, r9 +; CHECK-LE-NEXT: eor r6, r1, r2 +; CHECK-LE-NEXT: orrs r6, r4, r6 +; CHECK-LE-NEXT: beq LBB6_2 +; CHECK-LE-NEXT: LBB6_4: @ %cmpxchg.nostore +; CHECK-LE-NEXT: clrex +; CHECK-LE-NEXT: LBB6_5: @ %cmpxchg.end +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: pop {r4, r6, r10, r11, pc} +; +; CHECK-THUMB-LE-LABEL: test7: +; CHECK-THUMB-LE: @ %bb.0: @ %cmpxchg.start +; CHECK-THUMB-LE-NEXT: .save {r4, r5, r6, lr} +; CHECK-THUMB-LE-NEXT: push {r4, r5, r6, lr} +; CHECK-THUMB-LE-NEXT: mov r12, r0 +; CHECK-THUMB-LE-NEXT: ldrexd r0, r1, [r0] +; CHECK-THUMB-LE-NEXT: eor.w lr, r1, r3 +; CHECK-THUMB-LE-NEXT: eor.w r4, r0, r2 +; CHECK-THUMB-LE-NEXT: orrs.w r4, r4, lr +; CHECK-THUMB-LE-NEXT: bne .LBB6_4 +; CHECK-THUMB-LE-NEXT: @ %bb.1: @ %cmpxchg.fencedstore +; CHECK-THUMB-LE-NEXT: ldrd r4, lr, [sp, #16] +; CHECK-THUMB-LE-NEXT: dmb ish +; CHECK-THUMB-LE-NEXT: .LBB6_2: @ %cmpxchg.trystore +; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-LE-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-LE-NEXT: cbz r5, .LBB6_5 +; CHECK-THUMB-LE-NEXT: @ %bb.3: @ %cmpxchg.releasedload +; CHECK-THUMB-LE-NEXT: @ in Loop: Header=BB6_2 Depth=1 +; CHECK-THUMB-LE-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-LE-NEXT: eor.w r5, r0, r2 +; CHECK-THUMB-LE-NEXT: eor.w r6, r1, r3 +; CHECK-THUMB-LE-NEXT: orrs r5, r6 +; CHECK-THUMB-LE-NEXT: beq .LBB6_2 +; CHECK-THUMB-LE-NEXT: .LBB6_4: @ %cmpxchg.nostore +; CHECK-THUMB-LE-NEXT: clrex +; CHECK-THUMB-LE-NEXT: .LBB6_5: @ %cmpxchg.end +; CHECK-THUMB-LE-NEXT: dmb ish +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r6, pc} +; +; CHECK-BE-LABEL: test7: +; CHECK-BE: @ %bb.0: @ %cmpxchg.start +; CHECK-BE-NEXT: push {r4, r6, r8, r9, lr} +; CHECK-BE-NEXT: mov r8, r3 +; CHECK-BE-NEXT: mov r12, r1 +; CHECK-BE-NEXT: mov r3, r0 +; CHECK-BE-NEXT: ldrexd r0, r1, [r0] +; CHECK-BE-NEXT: eor lr, r0, r12 +; CHECK-BE-NEXT: eor r6, r1, r2 +; CHECK-BE-NEXT: orrs r6, r6, lr +; CHECK-BE-NEXT: bne .LBB6_4 +; CHECK-BE-NEXT: @ %bb.1: @ %cmpxchg.fencedstore +; CHECK-BE-NEXT: ldr r9, [sp, #20] +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: .LBB6_2: @ %cmpxchg.trystore +; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: strexd r6, r8, r9, [r3] +; CHECK-BE-NEXT: cmp r6, #0 +; CHECK-BE-NEXT: beq .LBB6_5 +; CHECK-BE-NEXT: @ %bb.3: @ %cmpxchg.releasedload +; CHECK-BE-NEXT: @ in Loop: Header=BB6_2 Depth=1 +; CHECK-BE-NEXT: ldrexd r0, r1, [r3] +; CHECK-BE-NEXT: eor r6, r0, r12 +; CHECK-BE-NEXT: eor r4, r1, r2 +; CHECK-BE-NEXT: orrs r6, r4, r6 +; CHECK-BE-NEXT: beq .LBB6_2 +; CHECK-BE-NEXT: .LBB6_4: @ %cmpxchg.nostore +; CHECK-BE-NEXT: clrex +; CHECK-BE-NEXT: .LBB6_5: @ %cmpxchg.end +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: pop {r4, r6, r8, r9, pc} +; +; CHECK-THUMB-BE-LABEL: test7: +; CHECK-THUMB-BE: @ %bb.0: @ %cmpxchg.start +; CHECK-THUMB-BE-NEXT: .save {r4, r5, r6, lr} +; CHECK-THUMB-BE-NEXT: push {r4, r5, r6, lr} +; CHECK-THUMB-BE-NEXT: mov r12, r0 +; CHECK-THUMB-BE-NEXT: ldrexd r0, r1, [r0] +; CHECK-THUMB-BE-NEXT: eor.w lr, r0, r2 +; CHECK-THUMB-BE-NEXT: eor.w r4, r1, r3 +; CHECK-THUMB-BE-NEXT: orrs.w r4, r4, lr +; CHECK-THUMB-BE-NEXT: bne .LBB6_4 +; CHECK-THUMB-BE-NEXT: @ %bb.1: @ %cmpxchg.fencedstore +; CHECK-THUMB-BE-NEXT: ldrd r4, lr, [sp, #16] +; CHECK-THUMB-BE-NEXT: dmb ish +; CHECK-THUMB-BE-NEXT: .LBB6_2: @ %cmpxchg.trystore +; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-BE-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-BE-NEXT: cbz r5, .LBB6_5 +; CHECK-THUMB-BE-NEXT: @ %bb.3: @ %cmpxchg.releasedload +; CHECK-THUMB-BE-NEXT: @ in Loop: Header=BB6_2 Depth=1 +; CHECK-THUMB-BE-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-BE-NEXT: eor.w r5, r0, r2 +; CHECK-THUMB-BE-NEXT: eor.w r6, r1, r3 +; CHECK-THUMB-BE-NEXT: orrs r5, r6 +; CHECK-THUMB-BE-NEXT: beq .LBB6_2 +; CHECK-THUMB-BE-NEXT: .LBB6_4: @ %cmpxchg.nostore +; CHECK-THUMB-BE-NEXT: clrex +; CHECK-THUMB-BE-NEXT: .LBB6_5: @ %cmpxchg.end +; CHECK-THUMB-BE-NEXT: dmb ish +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r6, pc} + + %pair = cmpxchg ptr %ptr, i64 %val1, i64 %val2 seq_cst seq_cst %r = extractvalue { i64, i1 } %pair, 0 @@ -224,20 +540,20 @@ define i64 @test7(ptr %ptr, i64 %val1, i64 %val2) { ; isn't supported. define i64 @test8(ptr %ptr) { ; CHECK-LABEL: test8: -; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK-NOT: strexd -; CHECK: clrex -; CHECK-NOT: strexd -; CHECK: dmb {{ish$}} - +; CHECK: @ %bb.0: +; CHECK-NEXT: ldrexd r0, r1, [r0] +; CHECK-NEXT: clrex +; CHECK-NEXT: dmb ish +; CHECK-NEXT: bx lr +; ; CHECK-THUMB-LABEL: test8: -; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB-NOT: strexd -; CHECK-THUMB: clrex -; CHECK-THUMB-NOT: strexd -; CHECK-THUMB: dmb {{ish$}} +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: ldrexd r0, r1, [r0] +; CHECK-THUMB-NEXT: clrex +; CHECK-THUMB-NEXT: dmb ish +; CHECK-THUMB-NEXT: bx lr + -; CHECK-M: __atomic_load_8 %r = load atomic i64, ptr %ptr seq_cst, align 8 ret i64 %r @@ -247,185 +563,444 @@ define i64 @test8(ptr %ptr) { ; way to write it. Except on M class devices, where ldrexd/strexd aren't ; supported. define void @test9(ptr %ptr, i64 %val) { -; CHECK-LABEL: test9: -; CHECK: dmb {{ish$}} -; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} -; CHECK: cmp -; CHECK: bne -; CHECK: dmb {{ish$}} - +; CHECK-LE-LABEL: test9: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: push {r4, r5, lr} +; CHECK-LE-NEXT: mov r3, r2 +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: mov r2, r1 +; CHECK-LE-NEXT: LBB8_1: @ %atomicrmw.start +; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-LE-NEXT: ldrexd r4, r5, [r0] +; CHECK-LE-NEXT: strexd r1, r2, r3, [r0] +; CHECK-LE-NEXT: cmp r1, #0 +; CHECK-LE-NEXT: bne LBB8_1 +; CHECK-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: pop {r4, r5, pc} +; ; CHECK-THUMB-LABEL: test9: -; CHECK-THUMB: dmb {{ish$}} -; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} -; CHECK-THUMB: cmp -; CHECK-THUMB: bne -; CHECK-THUMB: dmb {{ish$}} +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: dmb ish +; CHECK-THUMB-NEXT: .LBB8_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexd r1, r12, [r0] +; CHECK-THUMB-NEXT: strexd r1, r2, r3, [r0] +; CHECK-THUMB-NEXT: cmp r1, #0 +; CHECK-THUMB-NEXT: bne .LBB8_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: dmb ish +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-BE-LABEL: test9: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: push {r4, r5, lr} +; CHECK-BE-NEXT: mov r3, r2 +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: mov r2, r1 +; CHECK-BE-NEXT: .LBB8_1: @ %atomicrmw.start +; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: ldrexd r4, r5, [r0] +; CHECK-BE-NEXT: strexd r1, r2, r3, [r0] +; CHECK-BE-NEXT: cmp r1, #0 +; CHECK-BE-NEXT: bne .LBB8_1 +; CHECK-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: pop {r4, r5, pc} + -; CHECK-M: __atomic_store_8 store atomic i64 %val, ptr %ptr seq_cst, align 8 ret void } define i64 @test10(ptr %ptr, i64 %val) { -; CHECK-LABEL: test10: -; CHECK: dmb {{ish$}} -; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 -; CHECK-LE: subs {{[^,]+}}, r1, [[REG1]] -; CHECK-BE: subs {{[^,]+}}, r2, [[REG2]] -; CHECK-LE: sbcs {{[^,]+}}, r2, [[REG2]] -; CHECK-BE: sbcs {{[^,]+}}, r1, [[REG1]] -; CHECK: movge [[OUT_HI]], [[REG2]] -; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 -; CHECK: movge [[OUT_LO]], [[REG1]] -; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] -; CHECK: cmp -; CHECK: bne -; CHECK: dmb {{ish$}} - -; CHECK-THUMB-LABEL: test10: -; CHECK-THUMB: dmb {{ish$}} -; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 -; CHECK-THUMB-LE: subs.w {{[^,]+}}, r2, [[REG1]] -; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]] -; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]] -; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]] -; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 -; CHECK-THUMB: itt ge -; CHECK-THUMB: movge [[OUT_HI]], [[REG2]] -; CHECK-THUMB: movge [[OUT_LO]], [[REG1]] -; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] -; CHECK-THUMB: cmp -; CHECK-THUMB: bne -; CHECK-THUMB: dmb {{ish$}} - -; CHECK-M: __atomic_compare_exchange_8 +; CHECK-LE-LABEL: test10: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: LBB9_1: @ %atomicrmw.start +; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-LE-NEXT: ldrexd r4, r5, [r0] +; CHECK-LE-NEXT: mov r7, r2 +; CHECK-LE-NEXT: cmp r1, r4 +; CHECK-LE-NEXT: sbcs r3, r2, r5 +; CHECK-LE-NEXT: movge r7, r5 +; CHECK-LE-NEXT: mov r6, r1 +; CHECK-LE-NEXT: movge r6, r4 +; CHECK-LE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-LE-NEXT: cmp r3, #0 +; CHECK-LE-NEXT: bne LBB9_1 +; CHECK-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-LE-NEXT: mov r0, r4 +; CHECK-LE-NEXT: mov r1, r5 +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: pop {r4, r5, r6, r7, pc} +; +; CHECK-THUMB-LE-LABEL: test10: +; CHECK-THUMB-LE: @ %bb.0: +; CHECK-THUMB-LE-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: mov r12, r0 +; CHECK-THUMB-LE-NEXT: dmb ish +; CHECK-THUMB-LE-NEXT: .LBB9_1: @ %atomicrmw.start +; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-LE-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-LE-NEXT: mov r4, r2 +; CHECK-THUMB-LE-NEXT: cmp r2, r0 +; CHECK-THUMB-LE-NEXT: sbcs.w lr, r3, r1 +; CHECK-THUMB-LE-NEXT: mov lr, r3 +; CHECK-THUMB-LE-NEXT: itt ge +; CHECK-THUMB-LE-NEXT: movge lr, r1 +; CHECK-THUMB-LE-NEXT: movge r4, r0 +; CHECK-THUMB-LE-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-LE-NEXT: cmp r5, #0 +; CHECK-THUMB-LE-NEXT: bne .LBB9_1 +; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-LE-NEXT: dmb ish +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-BE-LABEL: test10: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: .LBB9_1: @ %atomicrmw.start +; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: ldrexd r4, r5, [r0] +; CHECK-BE-NEXT: mov r7, r2 +; CHECK-BE-NEXT: cmp r2, r5 +; CHECK-BE-NEXT: sbcs r3, r1, r4 +; CHECK-BE-NEXT: movge r7, r5 +; CHECK-BE-NEXT: mov r6, r1 +; CHECK-BE-NEXT: movge r6, r4 +; CHECK-BE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-BE-NEXT: cmp r3, #0 +; CHECK-BE-NEXT: bne .LBB9_1 +; CHECK-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-BE-NEXT: mov r0, r4 +; CHECK-BE-NEXT: mov r1, r5 +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc} +; +; CHECK-THUMB-BE-LABEL: test10: +; CHECK-THUMB-BE: @ %bb.0: +; CHECK-THUMB-BE-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: mov r12, r0 +; CHECK-THUMB-BE-NEXT: dmb ish +; CHECK-THUMB-BE-NEXT: .LBB9_1: @ %atomicrmw.start +; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-BE-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-BE-NEXT: mov r4, r2 +; CHECK-THUMB-BE-NEXT: cmp r3, r1 +; CHECK-THUMB-BE-NEXT: sbcs.w lr, r2, r0 +; CHECK-THUMB-BE-NEXT: mov lr, r3 +; CHECK-THUMB-BE-NEXT: itt ge +; CHECK-THUMB-BE-NEXT: movge lr, r1 +; CHECK-THUMB-BE-NEXT: movge r4, r0 +; CHECK-THUMB-BE-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-BE-NEXT: cmp r5, #0 +; CHECK-THUMB-BE-NEXT: bne .LBB9_1 +; CHECK-THUMB-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-BE-NEXT: dmb ish +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r7, pc} + + %r = atomicrmw min ptr %ptr, i64 %val seq_cst ret i64 %r } define i64 @test11(ptr %ptr, i64 %val) { -; CHECK-LABEL: test11: -; CHECK: dmb {{ish$}} -; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 -; CHECK-LE: subs {{[^,]+}}, r1, [[REG1]] -; CHECK-BE: subs {{[^,]+}}, r2, [[REG2]] -; CHECK-LE: sbcs {{[^,]+}}, r2, [[REG2]] -; CHECK-BE: sbcs {{[^,]+}}, r1, [[REG1]] -; CHECK: movhs [[OUT_HI]], [[REG2]] -; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 -; CHECK: movhs [[OUT_LO]], [[REG1]] -; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] -; CHECK: cmp -; CHECK: bne -; CHECK: dmb {{ish$}} - -; CHECK-THUMB-LABEL: test11: -; CHECK-THUMB: dmb {{ish$}} -; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 -; CHECK-THUMB-LE: subs.w {{[^,]+}}, r2, [[REG1]] -; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]] -; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]] -; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]] -; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 -; CHECK-THUMB: itt hs -; CHECK-THUMB: movhs [[OUT_HI]], [[REG2]] -; CHECK-THUMB: movhs [[OUT_LO]], [[REG1]] -; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] -; CHECK-THUMB: cmp -; CHECK-THUMB: bne -; CHECK-THUMB: dmb {{ish$}} - -; CHECK-M: __atomic_compare_exchange_8 +; CHECK-LE-LABEL: test11: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: LBB10_1: @ %atomicrmw.start +; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-LE-NEXT: ldrexd r4, r5, [r0] +; CHECK-LE-NEXT: mov r7, r2 +; CHECK-LE-NEXT: cmp r1, r4 +; CHECK-LE-NEXT: sbcs r3, r2, r5 +; CHECK-LE-NEXT: movhs r7, r5 +; CHECK-LE-NEXT: mov r6, r1 +; CHECK-LE-NEXT: movhs r6, r4 +; CHECK-LE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-LE-NEXT: cmp r3, #0 +; CHECK-LE-NEXT: bne LBB10_1 +; CHECK-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-LE-NEXT: mov r0, r4 +; CHECK-LE-NEXT: mov r1, r5 +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: pop {r4, r5, r6, r7, pc} +; +; CHECK-THUMB-LE-LABEL: test11: +; CHECK-THUMB-LE: @ %bb.0: +; CHECK-THUMB-LE-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: mov r12, r0 +; CHECK-THUMB-LE-NEXT: dmb ish +; CHECK-THUMB-LE-NEXT: .LBB10_1: @ %atomicrmw.start +; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-LE-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-LE-NEXT: mov r4, r2 +; CHECK-THUMB-LE-NEXT: cmp r2, r0 +; CHECK-THUMB-LE-NEXT: sbcs.w lr, r3, r1 +; CHECK-THUMB-LE-NEXT: mov lr, r3 +; CHECK-THUMB-LE-NEXT: itt hs +; CHECK-THUMB-LE-NEXT: movhs lr, r1 +; CHECK-THUMB-LE-NEXT: movhs r4, r0 +; CHECK-THUMB-LE-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-LE-NEXT: cmp r5, #0 +; CHECK-THUMB-LE-NEXT: bne .LBB10_1 +; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-LE-NEXT: dmb ish +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-BE-LABEL: test11: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: .LBB10_1: @ %atomicrmw.start +; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: ldrexd r4, r5, [r0] +; CHECK-BE-NEXT: mov r7, r2 +; CHECK-BE-NEXT: cmp r2, r5 +; CHECK-BE-NEXT: sbcs r3, r1, r4 +; CHECK-BE-NEXT: movhs r7, r5 +; CHECK-BE-NEXT: mov r6, r1 +; CHECK-BE-NEXT: movhs r6, r4 +; CHECK-BE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-BE-NEXT: cmp r3, #0 +; CHECK-BE-NEXT: bne .LBB10_1 +; CHECK-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-BE-NEXT: mov r0, r4 +; CHECK-BE-NEXT: mov r1, r5 +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc} +; +; CHECK-THUMB-BE-LABEL: test11: +; CHECK-THUMB-BE: @ %bb.0: +; CHECK-THUMB-BE-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: mov r12, r0 +; CHECK-THUMB-BE-NEXT: dmb ish +; CHECK-THUMB-BE-NEXT: .LBB10_1: @ %atomicrmw.start +; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-BE-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-BE-NEXT: mov r4, r2 +; CHECK-THUMB-BE-NEXT: cmp r3, r1 +; CHECK-THUMB-BE-NEXT: sbcs.w lr, r2, r0 +; CHECK-THUMB-BE-NEXT: mov lr, r3 +; CHECK-THUMB-BE-NEXT: itt hs +; CHECK-THUMB-BE-NEXT: movhs lr, r1 +; CHECK-THUMB-BE-NEXT: movhs r4, r0 +; CHECK-THUMB-BE-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-BE-NEXT: cmp r5, #0 +; CHECK-THUMB-BE-NEXT: bne .LBB10_1 +; CHECK-THUMB-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-BE-NEXT: dmb ish +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r7, pc} + + %r = atomicrmw umin ptr %ptr, i64 %val seq_cst ret i64 %r } define i64 @test12(ptr %ptr, i64 %val) { -; CHECK-LABEL: test12: -; CHECK: dmb {{ish$}} -; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 -; CHECK-LE: subs {{[^,]+}}, r1, [[REG1]] -; CHECK-BE: subs {{[^,]+}}, r2, [[REG2]] -; CHECK-LE: sbcs {{[^,]+}}, r2, [[REG2]] -; CHECK-BE: sbcs {{[^,]+}}, r1, [[REG1]] -; CHECK: movlt [[OUT_HI]], [[REG2]] -; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 -; CHECK: movlt [[OUT_LO]], [[REG1]] -; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] -; CHECK: cmp -; CHECK: bne -; CHECK: dmb {{ish$}} - -; CHECK-THUMB-LABEL: test12: -; CHECK-THUMB: dmb {{ish$}} -; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 -; CHECK-THUMB-LE: subs.w {{[^,]+}}, r2, [[REG1]] -; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]] -; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]] -; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]] -; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 -; CHECK-THUMB: itt lt -; CHECK-THUMB: movlt [[OUT_HI]], [[REG2]] -; CHECK-THUMB: movlt [[OUT_LO]], [[REG1]] -; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] -; CHECK-THUMB: cmp -; CHECK-THUMB: bne -; CHECK-THUMB: dmb {{ish$}} - -; CHECK-M: __atomic_compare_exchange_8 +; CHECK-LE-LABEL: test12: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: LBB11_1: @ %atomicrmw.start +; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-LE-NEXT: ldrexd r4, r5, [r0] +; CHECK-LE-NEXT: mov r7, r2 +; CHECK-LE-NEXT: cmp r1, r4 +; CHECK-LE-NEXT: sbcs r3, r2, r5 +; CHECK-LE-NEXT: movlt r7, r5 +; CHECK-LE-NEXT: mov r6, r1 +; CHECK-LE-NEXT: movlt r6, r4 +; CHECK-LE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-LE-NEXT: cmp r3, #0 +; CHECK-LE-NEXT: bne LBB11_1 +; CHECK-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-LE-NEXT: mov r0, r4 +; CHECK-LE-NEXT: mov r1, r5 +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: pop {r4, r5, r6, r7, pc} +; +; CHECK-THUMB-LE-LABEL: test12: +; CHECK-THUMB-LE: @ %bb.0: +; CHECK-THUMB-LE-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: mov r12, r0 +; CHECK-THUMB-LE-NEXT: dmb ish +; CHECK-THUMB-LE-NEXT: .LBB11_1: @ %atomicrmw.start +; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-LE-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-LE-NEXT: mov r4, r2 +; CHECK-THUMB-LE-NEXT: cmp r2, r0 +; CHECK-THUMB-LE-NEXT: sbcs.w lr, r3, r1 +; CHECK-THUMB-LE-NEXT: mov lr, r3 +; CHECK-THUMB-LE-NEXT: itt lt +; CHECK-THUMB-LE-NEXT: movlt lr, r1 +; CHECK-THUMB-LE-NEXT: movlt r4, r0 +; CHECK-THUMB-LE-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-LE-NEXT: cmp r5, #0 +; CHECK-THUMB-LE-NEXT: bne .LBB11_1 +; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-LE-NEXT: dmb ish +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-BE-LABEL: test12: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: .LBB11_1: @ %atomicrmw.start +; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: ldrexd r4, r5, [r0] +; CHECK-BE-NEXT: mov r7, r2 +; CHECK-BE-NEXT: cmp r2, r5 +; CHECK-BE-NEXT: sbcs r3, r1, r4 +; CHECK-BE-NEXT: movlt r7, r5 +; CHECK-BE-NEXT: mov r6, r1 +; CHECK-BE-NEXT: movlt r6, r4 +; CHECK-BE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-BE-NEXT: cmp r3, #0 +; CHECK-BE-NEXT: bne .LBB11_1 +; CHECK-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-BE-NEXT: mov r0, r4 +; CHECK-BE-NEXT: mov r1, r5 +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc} +; +; CHECK-THUMB-BE-LABEL: test12: +; CHECK-THUMB-BE: @ %bb.0: +; CHECK-THUMB-BE-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: mov r12, r0 +; CHECK-THUMB-BE-NEXT: dmb ish +; CHECK-THUMB-BE-NEXT: .LBB11_1: @ %atomicrmw.start +; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-BE-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-BE-NEXT: mov r4, r2 +; CHECK-THUMB-BE-NEXT: cmp r3, r1 +; CHECK-THUMB-BE-NEXT: sbcs.w lr, r2, r0 +; CHECK-THUMB-BE-NEXT: mov lr, r3 +; CHECK-THUMB-BE-NEXT: itt lt +; CHECK-THUMB-BE-NEXT: movlt lr, r1 +; CHECK-THUMB-BE-NEXT: movlt r4, r0 +; CHECK-THUMB-BE-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-BE-NEXT: cmp r5, #0 +; CHECK-THUMB-BE-NEXT: bne .LBB11_1 +; CHECK-THUMB-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-BE-NEXT: dmb ish +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r7, pc} + + %r = atomicrmw max ptr %ptr, i64 %val seq_cst ret i64 %r } define i64 @test13(ptr %ptr, i64 %val) { -; CHECK-LABEL: test13: -; CHECK: dmb {{ish$}} -; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 -; CHECK-LE: subs {{[^,]+}}, r1, [[REG1]] -; CHECK-BE: subs {{[^,]+}}, r2, [[REG2]] -; CHECK-LE: sbcs {{[^,]+}}, r2, [[REG2]] -; CHECK-BE: sbcs {{[^,]+}}, r1, [[REG1]] -; CHECK: movlo [[OUT_HI]], [[REG2]] -; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 -; CHECK: movlo [[OUT_LO]], [[REG1]] -; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] -; CHECK: cmp -; CHECK: bne -; CHECK: dmb {{ish$}} - -; CHECK-THUMB-LABEL: test13: -; CHECK-THUMB: dmb {{ish$}} -; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 -; CHECK-THUMB-LE: subs.w {{[^,]+}}, r2, [[REG1]] -; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]] -; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]] -; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]] -; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 -; CHECK-THUMB: itt lo -; CHECK-THUMB: movlo [[OUT_HI]], [[REG2]] -; CHECK-THUMB: movlo [[OUT_LO]], [[REG1]] -; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] -; CHECK-THUMB: cmp -; CHECK-THUMB: bne -; CHECK-THUMB: dmb {{ish$}} - -; CHECK-M: __atomic_compare_exchange_8 +; CHECK-LE-LABEL: test13: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: LBB12_1: @ %atomicrmw.start +; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-LE-NEXT: ldrexd r4, r5, [r0] +; CHECK-LE-NEXT: mov r7, r2 +; CHECK-LE-NEXT: cmp r1, r4 +; CHECK-LE-NEXT: sbcs r3, r2, r5 +; CHECK-LE-NEXT: movlo r7, r5 +; CHECK-LE-NEXT: mov r6, r1 +; CHECK-LE-NEXT: movlo r6, r4 +; CHECK-LE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-LE-NEXT: cmp r3, #0 +; CHECK-LE-NEXT: bne LBB12_1 +; CHECK-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-LE-NEXT: mov r0, r4 +; CHECK-LE-NEXT: mov r1, r5 +; CHECK-LE-NEXT: dmb ish +; CHECK-LE-NEXT: pop {r4, r5, r6, r7, pc} +; +; CHECK-THUMB-LE-LABEL: test13: +; CHECK-THUMB-LE: @ %bb.0: +; CHECK-THUMB-LE-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: mov r12, r0 +; CHECK-THUMB-LE-NEXT: dmb ish +; CHECK-THUMB-LE-NEXT: .LBB12_1: @ %atomicrmw.start +; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-LE-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-LE-NEXT: mov r4, r2 +; CHECK-THUMB-LE-NEXT: cmp r2, r0 +; CHECK-THUMB-LE-NEXT: sbcs.w lr, r3, r1 +; CHECK-THUMB-LE-NEXT: mov lr, r3 +; CHECK-THUMB-LE-NEXT: itt lo +; CHECK-THUMB-LE-NEXT: movlo lr, r1 +; CHECK-THUMB-LE-NEXT: movlo r4, r0 +; CHECK-THUMB-LE-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-LE-NEXT: cmp r5, #0 +; CHECK-THUMB-LE-NEXT: bne .LBB12_1 +; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-LE-NEXT: dmb ish +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-BE-LABEL: test13: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: .LBB12_1: @ %atomicrmw.start +; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: ldrexd r4, r5, [r0] +; CHECK-BE-NEXT: mov r7, r2 +; CHECK-BE-NEXT: cmp r2, r5 +; CHECK-BE-NEXT: sbcs r3, r1, r4 +; CHECK-BE-NEXT: movlo r7, r5 +; CHECK-BE-NEXT: mov r6, r1 +; CHECK-BE-NEXT: movlo r6, r4 +; CHECK-BE-NEXT: strexd r3, r6, r7, [r0] +; CHECK-BE-NEXT: cmp r3, #0 +; CHECK-BE-NEXT: bne .LBB12_1 +; CHECK-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-BE-NEXT: mov r0, r4 +; CHECK-BE-NEXT: mov r1, r5 +; CHECK-BE-NEXT: dmb ish +; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc} +; +; CHECK-THUMB-BE-LABEL: test13: +; CHECK-THUMB-BE: @ %bb.0: +; CHECK-THUMB-BE-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: mov r12, r0 +; CHECK-THUMB-BE-NEXT: dmb ish +; CHECK-THUMB-BE-NEXT: .LBB12_1: @ %atomicrmw.start +; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-BE-NEXT: ldrexd r0, r1, [r12] +; CHECK-THUMB-BE-NEXT: mov r4, r2 +; CHECK-THUMB-BE-NEXT: cmp r3, r1 +; CHECK-THUMB-BE-NEXT: sbcs.w lr, r2, r0 +; CHECK-THUMB-BE-NEXT: mov lr, r3 +; CHECK-THUMB-BE-NEXT: itt lo +; CHECK-THUMB-BE-NEXT: movlo lr, r1 +; CHECK-THUMB-BE-NEXT: movlo r4, r0 +; CHECK-THUMB-BE-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-BE-NEXT: cmp r5, #0 +; CHECK-THUMB-BE-NEXT: bne .LBB12_1 +; CHECK-THUMB-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-BE-NEXT: dmb ish +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r7, pc} + + %r = atomicrmw umax ptr %ptr, i64 %val seq_cst ret i64 %r } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-M: {{.*}} diff --git a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll index d48b070aa862e..9b26bc06f3990 100644 --- a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll +++ b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE --check-prefix=CHECK-ARM --check-prefix=CHECK-ARM-LE ; RUN: llc -mtriple=armebv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE --check-prefix=CHECK-ARM --check-prefix=CHECK-ARM-BE ; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-LE @@ -9,1387 +10,2387 @@ @var64 = global i64 0 define i8 @test_atomic_load_add_i8(i8 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_add_i8: +; CHECK-ARM-LABEL: test_atomic_load_add_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var8 +; CHECK-ARM-NEXT: movt r12, :upper16:var8 +; CHECK-ARM-NEXT: .LBB0_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaexb r1, [r12] +; CHECK-ARM-NEXT: add r3, r1, r0 +; CHECK-ARM-NEXT: stlexb r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB0_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_add_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var8 +; CHECK-THUMB-NEXT: movt r12, :upper16:var8 +; CHECK-THUMB-NEXT: .LBB0_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaexb r1, [r12] +; CHECK-THUMB-NEXT: adds r3, r1, r0 +; CHECK-THUMB-NEXT: stlexb r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB0_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw add ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i8 %old } define i16 @test_atomic_load_add_i16(i16 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_add_i16: +; CHECK-ARM-LABEL: test_atomic_load_add_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var16 +; CHECK-ARM-NEXT: movt r12, :upper16:var16 +; CHECK-ARM-NEXT: .LBB1_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaexh r1, [r12] +; CHECK-ARM-NEXT: add r3, r1, r0 +; CHECK-ARM-NEXT: strexh r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB1_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_add_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var16 +; CHECK-THUMB-NEXT: movt r12, :upper16:var16 +; CHECK-THUMB-NEXT: .LBB1_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaexh r1, [r12] +; CHECK-THUMB-NEXT: adds r3, r1, r0 +; CHECK-THUMB-NEXT: strexh r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB1_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw add ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK: movt r[[ADDR]], :upper16:var16 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i16 %old } define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_add_i32: +; CHECK-ARM-LABEL: test_atomic_load_add_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var32 +; CHECK-ARM-NEXT: movt r12, :upper16:var32 +; CHECK-ARM-NEXT: .LBB2_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrex r1, [r12] +; CHECK-ARM-NEXT: add r3, r1, r0 +; CHECK-ARM-NEXT: stlex r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB2_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_add_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var32 +; CHECK-THUMB-NEXT: movt r12, :upper16:var32 +; CHECK-THUMB-NEXT: .LBB2_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrex r1, [r12] +; CHECK-THUMB-NEXT: adds r3, r1, r0 +; CHECK-THUMB-NEXT: stlex r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB2_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw add ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 -; CHECK: movt r[[ADDR]], :upper16:var32 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i32 %old } define void @test_atomic_load_add_i64(i64 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_add_i64: +; CHECK-ARM-LE-LABEL: test_atomic_load_add_i64: +; CHECK-ARM-LE: @ %bb.0: +; CHECK-ARM-LE-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-LE-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-LE-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-LE-NEXT: .LBB3_1: @ %atomicrmw.start +; CHECK-ARM-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-LE-NEXT: ldrexd r4, r5, [r2] +; CHECK-ARM-LE-NEXT: adds r6, r4, r0 +; CHECK-ARM-LE-NEXT: adc r7, r5, r1 +; CHECK-ARM-LE-NEXT: strexd r3, r6, r7, [r2] +; CHECK-ARM-LE-NEXT: cmp r3, #0 +; CHECK-ARM-LE-NEXT: bne .LBB3_1 +; CHECK-ARM-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-LE-NEXT: strd r4, r5, [r2] +; CHECK-ARM-LE-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-ARM-BE-LABEL: test_atomic_load_add_i64: +; CHECK-ARM-BE: @ %bb.0: +; CHECK-ARM-BE-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-BE-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-BE-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-BE-NEXT: .LBB3_1: @ %atomicrmw.start +; CHECK-ARM-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-BE-NEXT: ldrexd r4, r5, [r2] +; CHECK-ARM-BE-NEXT: adds r7, r5, r1 +; CHECK-ARM-BE-NEXT: adc r6, r4, r0 +; CHECK-ARM-BE-NEXT: strexd r3, r6, r7, [r2] +; CHECK-ARM-BE-NEXT: cmp r3, #0 +; CHECK-ARM-BE-NEXT: bne .LBB3_1 +; CHECK-ARM-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-BE-NEXT: strd r4, r5, [r2] +; CHECK-ARM-BE-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-THUMB-LE-LABEL: test_atomic_load_add_i64: +; CHECK-THUMB-LE: @ %bb.0: +; CHECK-THUMB-LE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-LE-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-LE-NEXT: .LBB3_1: @ %atomicrmw.start +; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-LE-NEXT: ldrexd r3, r2, [r12] +; CHECK-THUMB-LE-NEXT: adds.w lr, r3, r0 +; CHECK-THUMB-LE-NEXT: adc.w r4, r2, r1 +; CHECK-THUMB-LE-NEXT: strexd r5, lr, r4, [r12] +; CHECK-THUMB-LE-NEXT: cmp r5, #0 +; CHECK-THUMB-LE-NEXT: bne .LBB3_1 +; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-LE-NEXT: strd r3, r2, [r12] +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-THUMB-BE-LABEL: test_atomic_load_add_i64: +; CHECK-THUMB-BE: @ %bb.0: +; CHECK-THUMB-BE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-BE-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-BE-NEXT: .LBB3_1: @ %atomicrmw.start +; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-BE-NEXT: ldrexd r3, r2, [r12] +; CHECK-THUMB-BE-NEXT: adds.w lr, r2, r1 +; CHECK-THUMB-BE-NEXT: adc.w r4, r3, r0 +; CHECK-THUMB-BE-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-BE-NEXT: cmp r5, #0 +; CHECK-THUMB-BE-NEXT: bne .LBB3_1 +; CHECK-THUMB-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-BE-NEXT: strd r3, r2, [r12] +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r7, pc} %old = atomicrmw add ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 -; CHECK: movt r[[ADDR]], :upper16:var64 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-LE-NEXT: adds{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0 -; CHECK-LE-NEXT: adc{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 -; CHECK-BE-NEXT: adds{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1 -; CHECK-BE-NEXT: adc{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, ptr @var64 ret void } define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_sub_i8: +; CHECK-ARM-LABEL: test_atomic_load_sub_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var8 +; CHECK-ARM-NEXT: movt r12, :upper16:var8 +; CHECK-ARM-NEXT: .LBB4_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrexb r1, [r12] +; CHECK-ARM-NEXT: sub r3, r1, r0 +; CHECK-ARM-NEXT: strexb r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB4_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_sub_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var8 +; CHECK-THUMB-NEXT: movt r12, :upper16:var8 +; CHECK-THUMB-NEXT: .LBB4_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexb r1, [r12] +; CHECK-THUMB-NEXT: subs r3, r1, r0 +; CHECK-THUMB-NEXT: strexb r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB4_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw sub ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i8 %old } define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_sub_i16: +; CHECK-ARM-LABEL: test_atomic_load_sub_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var16 +; CHECK-ARM-NEXT: movt r12, :upper16:var16 +; CHECK-ARM-NEXT: .LBB5_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrexh r1, [r12] +; CHECK-ARM-NEXT: sub r3, r1, r0 +; CHECK-ARM-NEXT: stlexh r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB5_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_sub_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var16 +; CHECK-THUMB-NEXT: movt r12, :upper16:var16 +; CHECK-THUMB-NEXT: .LBB5_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexh r1, [r12] +; CHECK-THUMB-NEXT: subs r3, r1, r0 +; CHECK-THUMB-NEXT: stlexh r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB5_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw sub ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK: movt r[[ADDR]], :upper16:var16 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i16 %old } define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_sub_i32: +; CHECK-ARM-LABEL: test_atomic_load_sub_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var32 +; CHECK-ARM-NEXT: movt r12, :upper16:var32 +; CHECK-ARM-NEXT: .LBB6_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaex r1, [r12] +; CHECK-ARM-NEXT: sub r3, r1, r0 +; CHECK-ARM-NEXT: strex r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB6_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_sub_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var32 +; CHECK-THUMB-NEXT: movt r12, :upper16:var32 +; CHECK-THUMB-NEXT: .LBB6_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaex r1, [r12] +; CHECK-THUMB-NEXT: subs r3, r1, r0 +; CHECK-THUMB-NEXT: strex r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB6_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw sub ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 -; CHECK: movt r[[ADDR]], :upper16:var32 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: strex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i32 %old } define void @test_atomic_load_sub_i64(i64 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_sub_i64: +; CHECK-ARM-LE-LABEL: test_atomic_load_sub_i64: +; CHECK-ARM-LE: @ %bb.0: +; CHECK-ARM-LE-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-LE-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-LE-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-LE-NEXT: .LBB7_1: @ %atomicrmw.start +; CHECK-ARM-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-LE-NEXT: ldaexd r4, r5, [r2] +; CHECK-ARM-LE-NEXT: subs r6, r4, r0 +; CHECK-ARM-LE-NEXT: sbc r7, r5, r1 +; CHECK-ARM-LE-NEXT: stlexd r3, r6, r7, [r2] +; CHECK-ARM-LE-NEXT: cmp r3, #0 +; CHECK-ARM-LE-NEXT: bne .LBB7_1 +; CHECK-ARM-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-LE-NEXT: strd r4, r5, [r2] +; CHECK-ARM-LE-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-ARM-BE-LABEL: test_atomic_load_sub_i64: +; CHECK-ARM-BE: @ %bb.0: +; CHECK-ARM-BE-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-BE-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-BE-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-BE-NEXT: .LBB7_1: @ %atomicrmw.start +; CHECK-ARM-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-BE-NEXT: ldaexd r4, r5, [r2] +; CHECK-ARM-BE-NEXT: subs r7, r5, r1 +; CHECK-ARM-BE-NEXT: sbc r6, r4, r0 +; CHECK-ARM-BE-NEXT: stlexd r3, r6, r7, [r2] +; CHECK-ARM-BE-NEXT: cmp r3, #0 +; CHECK-ARM-BE-NEXT: bne .LBB7_1 +; CHECK-ARM-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-BE-NEXT: strd r4, r5, [r2] +; CHECK-ARM-BE-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-THUMB-LE-LABEL: test_atomic_load_sub_i64: +; CHECK-THUMB-LE: @ %bb.0: +; CHECK-THUMB-LE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-LE-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-LE-NEXT: .LBB7_1: @ %atomicrmw.start +; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-LE-NEXT: ldaexd r3, r2, [r12] +; CHECK-THUMB-LE-NEXT: subs.w lr, r3, r0 +; CHECK-THUMB-LE-NEXT: sbc.w r4, r2, r1 +; CHECK-THUMB-LE-NEXT: stlexd r5, lr, r4, [r12] +; CHECK-THUMB-LE-NEXT: cmp r5, #0 +; CHECK-THUMB-LE-NEXT: bne .LBB7_1 +; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-LE-NEXT: strd r3, r2, [r12] +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-THUMB-BE-LABEL: test_atomic_load_sub_i64: +; CHECK-THUMB-BE: @ %bb.0: +; CHECK-THUMB-BE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-BE-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-BE-NEXT: .LBB7_1: @ %atomicrmw.start +; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-BE-NEXT: ldaexd r3, r2, [r12] +; CHECK-THUMB-BE-NEXT: subs.w lr, r2, r1 +; CHECK-THUMB-BE-NEXT: sbc.w r4, r3, r0 +; CHECK-THUMB-BE-NEXT: stlexd r5, r4, lr, [r12] +; CHECK-THUMB-BE-NEXT: cmp r5, #0 +; CHECK-THUMB-BE-NEXT: bne .LBB7_1 +; CHECK-THUMB-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-BE-NEXT: strd r3, r2, [r12] +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r7, pc} %old = atomicrmw sub ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 -; CHECK: movt r[[ADDR]], :upper16:var64 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-LE-NEXT: subs{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0 -; CHECK-LE-NEXT: sbc{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 -; CHECK-BE-NEXT: subs{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1 -; CHECK-BE-NEXT: sbc{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 -; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, ptr @var64 ret void } define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_and_i8: +; CHECK-ARM-LABEL: test_atomic_load_and_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var8 +; CHECK-ARM-NEXT: movt r12, :upper16:var8 +; CHECK-ARM-NEXT: .LBB8_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrexb r1, [r12] +; CHECK-ARM-NEXT: and r3, r1, r0 +; CHECK-ARM-NEXT: stlexb r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB8_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_and_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var8 +; CHECK-THUMB-NEXT: movt r12, :upper16:var8 +; CHECK-THUMB-NEXT: .LBB8_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexb r1, [r12] +; CHECK-THUMB-NEXT: and.w r3, r1, r0 +; CHECK-THUMB-NEXT: stlexb r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB8_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw and ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i8 %old } define i16 @test_atomic_load_and_i16(i16 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_and_i16: +; CHECK-ARM-LABEL: test_atomic_load_and_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var16 +; CHECK-ARM-NEXT: movt r12, :upper16:var16 +; CHECK-ARM-NEXT: .LBB9_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrexh r1, [r12] +; CHECK-ARM-NEXT: and r3, r1, r0 +; CHECK-ARM-NEXT: strexh r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB9_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_and_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var16 +; CHECK-THUMB-NEXT: movt r12, :upper16:var16 +; CHECK-THUMB-NEXT: .LBB9_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexh r1, [r12] +; CHECK-THUMB-NEXT: and.w r3, r1, r0 +; CHECK-THUMB-NEXT: strexh r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB9_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw and ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK: movt r[[ADDR]], :upper16:var16 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i16 %old } define i32 @test_atomic_load_and_i32(i32 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_and_i32: +; CHECK-ARM-LABEL: test_atomic_load_and_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var32 +; CHECK-ARM-NEXT: movt r12, :upper16:var32 +; CHECK-ARM-NEXT: .LBB10_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaex r1, [r12] +; CHECK-ARM-NEXT: and r3, r1, r0 +; CHECK-ARM-NEXT: stlex r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB10_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_and_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var32 +; CHECK-THUMB-NEXT: movt r12, :upper16:var32 +; CHECK-THUMB-NEXT: .LBB10_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaex r1, [r12] +; CHECK-THUMB-NEXT: and.w r3, r1, r0 +; CHECK-THUMB-NEXT: stlex r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB10_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw and ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 -; CHECK: movt r[[ADDR]], :upper16:var32 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i32 %old } define void @test_atomic_load_and_i64(i64 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_and_i64: +; CHECK-ARM-LABEL: test_atomic_load_and_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-NEXT: .LBB11_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaexd r4, r5, [r2] +; CHECK-ARM-NEXT: and r7, r5, r1 +; CHECK-ARM-NEXT: and r6, r4, r0 +; CHECK-ARM-NEXT: strexd r3, r6, r7, [r2] +; CHECK-ARM-NEXT: cmp r3, #0 +; CHECK-ARM-NEXT: bne .LBB11_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: strd r4, r5, [r2] +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-THUMB-LABEL: test_atomic_load_and_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-NEXT: .LBB11_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaexd r3, r2, [r12] +; CHECK-THUMB-NEXT: and.w lr, r2, r1 +; CHECK-THUMB-NEXT: and.w r4, r3, r0 +; CHECK-THUMB-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-NEXT: cmp r5, #0 +; CHECK-THUMB-NEXT: bne .LBB11_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: strd r3, r2, [r12] +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} %old = atomicrmw and ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 -; CHECK: movt r[[ADDR]], :upper16:var64 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-LE-DAG: and{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0 -; CHECK-LE-DAG: and{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1 -; CHECK-BE-DAG: and{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1 -; CHECK-BE-DAG: and{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0 -; CHECK: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, ptr @var64 ret void } define i8 @test_atomic_load_or_i8(i8 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_or_i8: +; CHECK-ARM-LABEL: test_atomic_load_or_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var8 +; CHECK-ARM-NEXT: movt r12, :upper16:var8 +; CHECK-ARM-NEXT: .LBB12_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaexb r1, [r12] +; CHECK-ARM-NEXT: orr r3, r1, r0 +; CHECK-ARM-NEXT: stlexb r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB12_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_or_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var8 +; CHECK-THUMB-NEXT: movt r12, :upper16:var8 +; CHECK-THUMB-NEXT: .LBB12_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaexb r1, [r12] +; CHECK-THUMB-NEXT: orr.w r3, r1, r0 +; CHECK-THUMB-NEXT: stlexb r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB12_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw or ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i8 %old } define i16 @test_atomic_load_or_i16(i16 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_or_i16: +; CHECK-ARM-LABEL: test_atomic_load_or_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var16 +; CHECK-ARM-NEXT: movt r12, :upper16:var16 +; CHECK-ARM-NEXT: .LBB13_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrexh r1, [r12] +; CHECK-ARM-NEXT: orr r3, r1, r0 +; CHECK-ARM-NEXT: strexh r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB13_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_or_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var16 +; CHECK-THUMB-NEXT: movt r12, :upper16:var16 +; CHECK-THUMB-NEXT: .LBB13_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexh r1, [r12] +; CHECK-THUMB-NEXT: orr.w r3, r1, r0 +; CHECK-THUMB-NEXT: strexh r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB13_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw or ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK: movt r[[ADDR]], :upper16:var16 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i16 %old } define i32 @test_atomic_load_or_i32(i32 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_or_i32: +; CHECK-ARM-LABEL: test_atomic_load_or_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var32 +; CHECK-ARM-NEXT: movt r12, :upper16:var32 +; CHECK-ARM-NEXT: .LBB14_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaex r1, [r12] +; CHECK-ARM-NEXT: orr r3, r1, r0 +; CHECK-ARM-NEXT: strex r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB14_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_or_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var32 +; CHECK-THUMB-NEXT: movt r12, :upper16:var32 +; CHECK-THUMB-NEXT: .LBB14_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaex r1, [r12] +; CHECK-THUMB-NEXT: orr.w r3, r1, r0 +; CHECK-THUMB-NEXT: strex r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB14_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw or ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 -; CHECK: movt r[[ADDR]], :upper16:var32 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: strex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i32 %old } define void @test_atomic_load_or_i64(i64 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_or_i64: +; CHECK-ARM-LABEL: test_atomic_load_or_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-NEXT: .LBB15_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrexd r4, r5, [r2] +; CHECK-ARM-NEXT: orr r7, r5, r1 +; CHECK-ARM-NEXT: orr r6, r4, r0 +; CHECK-ARM-NEXT: stlexd r3, r6, r7, [r2] +; CHECK-ARM-NEXT: cmp r3, #0 +; CHECK-ARM-NEXT: bne .LBB15_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: strd r4, r5, [r2] +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-THUMB-LABEL: test_atomic_load_or_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-NEXT: .LBB15_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexd r3, r2, [r12] +; CHECK-THUMB-NEXT: orr.w lr, r2, r1 +; CHECK-THUMB-NEXT: orr.w r4, r3, r0 +; CHECK-THUMB-NEXT: stlexd r5, r4, lr, [r12] +; CHECK-THUMB-NEXT: cmp r5, #0 +; CHECK-THUMB-NEXT: bne .LBB15_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: strd r3, r2, [r12] +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} %old = atomicrmw or ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 -; CHECK: movt r[[ADDR]], :upper16:var64 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-LE-DAG: orr{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0 -; CHECK-LE-DAG: orr{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1 -; CHECK-BE-DAG: orr{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1 -; CHECK-BE-DAG: orr{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0 -; CHECK: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, ptr @var64 ret void } define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_xor_i8: +; CHECK-ARM-LABEL: test_atomic_load_xor_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var8 +; CHECK-ARM-NEXT: movt r12, :upper16:var8 +; CHECK-ARM-NEXT: .LBB16_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaexb r1, [r12] +; CHECK-ARM-NEXT: eor r3, r1, r0 +; CHECK-ARM-NEXT: strexb r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB16_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_xor_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var8 +; CHECK-THUMB-NEXT: movt r12, :upper16:var8 +; CHECK-THUMB-NEXT: .LBB16_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaexb r1, [r12] +; CHECK-THUMB-NEXT: eor.w r3, r1, r0 +; CHECK-THUMB-NEXT: strexb r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB16_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw xor ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i8 %old } define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_xor_i16: +; CHECK-ARM-LABEL: test_atomic_load_xor_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var16 +; CHECK-ARM-NEXT: movt r12, :upper16:var16 +; CHECK-ARM-NEXT: .LBB17_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrexh r1, [r12] +; CHECK-ARM-NEXT: eor r3, r1, r0 +; CHECK-ARM-NEXT: stlexh r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB17_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_xor_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var16 +; CHECK-THUMB-NEXT: movt r12, :upper16:var16 +; CHECK-THUMB-NEXT: .LBB17_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexh r1, [r12] +; CHECK-THUMB-NEXT: eor.w r3, r1, r0 +; CHECK-THUMB-NEXT: stlexh r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB17_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw xor ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK: movt r[[ADDR]], :upper16:var16 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i16 %old } define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_xor_i32: +; CHECK-ARM-LABEL: test_atomic_load_xor_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var32 +; CHECK-ARM-NEXT: movt r12, :upper16:var32 +; CHECK-ARM-NEXT: .LBB18_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaex r1, [r12] +; CHECK-ARM-NEXT: eor r3, r1, r0 +; CHECK-ARM-NEXT: stlex r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB18_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_xor_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var32 +; CHECK-THUMB-NEXT: movt r12, :upper16:var32 +; CHECK-THUMB-NEXT: .LBB18_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaex r1, [r12] +; CHECK-THUMB-NEXT: eor.w r3, r1, r0 +; CHECK-THUMB-NEXT: stlex r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB18_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw xor ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 -; CHECK: movt r[[ADDR]], :upper16:var32 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 -; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i32 %old } define void @test_atomic_load_xor_i64(i64 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_xor_i64: +; CHECK-ARM-LABEL: test_atomic_load_xor_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-NEXT: .LBB19_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrexd r4, r5, [r2] +; CHECK-ARM-NEXT: eor r7, r5, r1 +; CHECK-ARM-NEXT: eor r6, r4, r0 +; CHECK-ARM-NEXT: strexd r3, r6, r7, [r2] +; CHECK-ARM-NEXT: cmp r3, #0 +; CHECK-ARM-NEXT: bne .LBB19_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: strd r4, r5, [r2] +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-THUMB-LABEL: test_atomic_load_xor_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-NEXT: .LBB19_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexd r3, r2, [r12] +; CHECK-THUMB-NEXT: eor.w lr, r2, r1 +; CHECK-THUMB-NEXT: eor.w r4, r3, r0 +; CHECK-THUMB-NEXT: strexd r5, r4, lr, [r12] +; CHECK-THUMB-NEXT: cmp r5, #0 +; CHECK-THUMB-NEXT: bne .LBB19_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: strd r3, r2, [r12] +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} %old = atomicrmw xor ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 -; CHECK: movt r[[ADDR]], :upper16:var64 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-LE-DAG: eor{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0 -; CHECK-LE-DAG: eor{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1 -; CHECK-BE-DAG: eor{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1 -; CHECK-BE-DAG: eor{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0 -; CHECK: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]] + store i64 %old, ptr @var64 ret void } define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r2, :lower16:var8 +; CHECK-NEXT: movt r2, :upper16:var8 +; CHECK-NEXT: .LBB20_1: @ %atomicrmw.start +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexb r1, [r2] +; CHECK-NEXT: strexb r3, r0, [r2] +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: bne .LBB20_1 +; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bx lr %old = atomicrmw xchg ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r0, [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD]] ret i8 %old } define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r2, :lower16:var16 +; CHECK-NEXT: movt r2, :upper16:var16 +; CHECK-NEXT: .LBB21_1: @ %atomicrmw.start +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldaexh r1, [r2] +; CHECK-NEXT: stlexh r3, r0, [r2] +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: bne .LBB21_1 +; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bx lr %old = atomicrmw xchg ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK: movt r[[ADDR]], :upper16:var16 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r0, [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD]] ret i16 %old } define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r2, :lower16:var32 +; CHECK-NEXT: movt r2, :upper16:var32 +; CHECK-NEXT: .LBB22_1: @ %atomicrmw.start +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r1, [r2] +; CHECK-NEXT: stlex r3, r0, [r2] +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: bne .LBB22_1 +; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bx lr %old = atomicrmw xchg ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 -; CHECK: movt r[[ADDR]], :upper16:var32 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r0, [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: mov r0, r[[OLD]] ret i32 %old } define void @test_atomic_load_xchg_i64(i64 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_xchg_i64: +; CHECK-ARM-LABEL: test_atomic_load_xchg_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: push {r4, r5, r11, lr} +; CHECK-ARM-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-NEXT: @ kill: def $r1 killed $r1 killed $r0_r1 def $r0_r1 +; CHECK-ARM-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-NEXT: @ kill: def $r0 killed $r0 killed $r0_r1 def $r0_r1 +; CHECK-ARM-NEXT: .LBB23_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaexd r4, r5, [r2] +; CHECK-ARM-NEXT: strexd r3, r0, r1, [r2] +; CHECK-ARM-NEXT: cmp r3, #0 +; CHECK-ARM-NEXT: bne .LBB23_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: strd r4, r5, [r2] +; CHECK-ARM-NEXT: pop {r4, r5, r11, pc} +; +; CHECK-THUMB-LABEL: test_atomic_load_xchg_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: push {r7, lr} +; CHECK-THUMB-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-NEXT: .LBB23_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaexd r3, lr, [r12] +; CHECK-THUMB-NEXT: strexd r2, r0, r1, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB23_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: strd r3, lr, [r12] +; CHECK-THUMB-NEXT: pop {r7, pc} %old = atomicrmw xchg ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 -; CHECK: movt r[[ADDR]], :upper16:var64 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] store i64 %old, ptr @var64 ret void } define i8 @test_atomic_load_min_i8(i8 signext %offset) nounwind { -; CHECK-LABEL: test_atomic_load_min_i8: +; CHECK-ARM-LABEL: test_atomic_load_min_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var8 +; CHECK-ARM-NEXT: movt r12, :upper16:var8 +; CHECK-ARM-NEXT: .LBB24_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaexb r1, [r12] +; CHECK-ARM-NEXT: sxtb r3, r1 +; CHECK-ARM-NEXT: cmp r3, r0 +; CHECK-ARM-NEXT: mov r3, r0 +; CHECK-ARM-NEXT: movle r3, r1 +; CHECK-ARM-NEXT: strexb r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB24_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_min_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var8 +; CHECK-THUMB-NEXT: movt r12, :upper16:var8 +; CHECK-THUMB-NEXT: .LBB24_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaexb r1, [r12] +; CHECK-THUMB-NEXT: sxtb r3, r1 +; CHECK-THUMB-NEXT: cmp r3, r0 +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: it le +; CHECK-THUMB-NEXT: movle r3, r1 +; CHECK-THUMB-NEXT: strexb r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB24_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw min ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK-DAG: movw [[ADDR:r[0-9]+|lr]], :lower16:var8 -; CHECK-DAG: movt [[ADDR]], :upper16:var8 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]] -; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]] + ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: cmp r[[OLDX]], r0 ; Thumb mode: it le -; CHECK: movle r[[OLDX]], r[[OLD]] -; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i8 %old } define i16 @test_atomic_load_min_i16(i16 signext %offset) nounwind { -; CHECK-LABEL: test_atomic_load_min_i16: +; CHECK-ARM-LABEL: test_atomic_load_min_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var16 +; CHECK-ARM-NEXT: movt r12, :upper16:var16 +; CHECK-ARM-NEXT: .LBB25_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrexh r1, [r12] +; CHECK-ARM-NEXT: sxth r3, r1 +; CHECK-ARM-NEXT: cmp r3, r0 +; CHECK-ARM-NEXT: mov r3, r0 +; CHECK-ARM-NEXT: movle r3, r1 +; CHECK-ARM-NEXT: stlexh r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB25_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_min_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var16 +; CHECK-THUMB-NEXT: movt r12, :upper16:var16 +; CHECK-THUMB-NEXT: .LBB25_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexh r1, [r12] +; CHECK-THUMB-NEXT: sxth r3, r1 +; CHECK-THUMB-NEXT: cmp r3, r0 +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: it le +; CHECK-THUMB-NEXT: movle r3, r1 +; CHECK-THUMB-NEXT: stlexh r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB25_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw min ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16 -; CHECK: movt [[ADDR]], :upper16:var16 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]] -; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]] + ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: cmp r[[OLDX]], r0 ; Thumb mode: it le -; CHECK: movle r[[OLDX]], r[[OLD]] -; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i16 %old } define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_min_i32: +; CHECK-ARM-LABEL: test_atomic_load_min_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var32 +; CHECK-ARM-NEXT: movt r12, :upper16:var32 +; CHECK-ARM-NEXT: .LBB26_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrex r1, [r12] +; CHECK-ARM-NEXT: mov r3, r0 +; CHECK-ARM-NEXT: cmp r1, r0 +; CHECK-ARM-NEXT: movle r3, r1 +; CHECK-ARM-NEXT: strex r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB26_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_min_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var32 +; CHECK-THUMB-NEXT: movt r12, :upper16:var32 +; CHECK-THUMB-NEXT: .LBB26_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrex r1, [r12] +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: cmp r1, r0 +; CHECK-THUMB-NEXT: it le +; CHECK-THUMB-NEXT: movle r3, r1 +; CHECK-THUMB-NEXT: strex r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB26_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw min ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 -; CHECK: movt r[[ADDR]], :upper16:var32 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 -; CHECK-NEXT: cmp r[[OLD]], r0 ; Thumb mode: it le -; CHECK: movle r[[NEW]], r[[OLD]] -; CHECK-NEXT: strex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i32 %old } define void @test_atomic_load_min_i64(i64 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_min_i64: +; CHECK-ARM-LE-LABEL: test_atomic_load_min_i64: +; CHECK-ARM-LE: @ %bb.0: +; CHECK-ARM-LE-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-LE-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-LE-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-LE-NEXT: .LBB27_1: @ %atomicrmw.start +; CHECK-ARM-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-LE-NEXT: ldaexd r4, r5, [r2] +; CHECK-ARM-LE-NEXT: mov r7, r1 +; CHECK-ARM-LE-NEXT: cmp r0, r4 +; CHECK-ARM-LE-NEXT: sbcs r3, r1, r5 +; CHECK-ARM-LE-NEXT: movge r7, r5 +; CHECK-ARM-LE-NEXT: mov r6, r0 +; CHECK-ARM-LE-NEXT: movge r6, r4 +; CHECK-ARM-LE-NEXT: stlexd r3, r6, r7, [r2] +; CHECK-ARM-LE-NEXT: cmp r3, #0 +; CHECK-ARM-LE-NEXT: bne .LBB27_1 +; CHECK-ARM-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-LE-NEXT: strd r4, r5, [r2] +; CHECK-ARM-LE-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-ARM-BE-LABEL: test_atomic_load_min_i64: +; CHECK-ARM-BE: @ %bb.0: +; CHECK-ARM-BE-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-BE-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-BE-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-BE-NEXT: .LBB27_1: @ %atomicrmw.start +; CHECK-ARM-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-BE-NEXT: ldaexd r4, r5, [r2] +; CHECK-ARM-BE-NEXT: mov r7, r1 +; CHECK-ARM-BE-NEXT: cmp r1, r5 +; CHECK-ARM-BE-NEXT: sbcs r3, r0, r4 +; CHECK-ARM-BE-NEXT: movge r7, r5 +; CHECK-ARM-BE-NEXT: mov r6, r0 +; CHECK-ARM-BE-NEXT: movge r6, r4 +; CHECK-ARM-BE-NEXT: stlexd r3, r6, r7, [r2] +; CHECK-ARM-BE-NEXT: cmp r3, #0 +; CHECK-ARM-BE-NEXT: bne .LBB27_1 +; CHECK-ARM-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-BE-NEXT: strd r4, r5, [r2] +; CHECK-ARM-BE-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-THUMB-LE-LABEL: test_atomic_load_min_i64: +; CHECK-THUMB-LE: @ %bb.0: +; CHECK-THUMB-LE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-LE-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-LE-NEXT: .LBB27_1: @ %atomicrmw.start +; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-LE-NEXT: ldaexd r3, lr, [r12] +; CHECK-THUMB-LE-NEXT: mov r4, r0 +; CHECK-THUMB-LE-NEXT: cmp r0, r3 +; CHECK-THUMB-LE-NEXT: sbcs.w r2, r1, lr +; CHECK-THUMB-LE-NEXT: mov r2, r1 +; CHECK-THUMB-LE-NEXT: itt ge +; CHECK-THUMB-LE-NEXT: movge r2, lr +; CHECK-THUMB-LE-NEXT: movge r4, r3 +; CHECK-THUMB-LE-NEXT: stlexd r5, r4, r2, [r12] +; CHECK-THUMB-LE-NEXT: cmp r5, #0 +; CHECK-THUMB-LE-NEXT: bne .LBB27_1 +; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-LE-NEXT: strd r3, lr, [r12] +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-THUMB-BE-LABEL: test_atomic_load_min_i64: +; CHECK-THUMB-BE: @ %bb.0: +; CHECK-THUMB-BE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-BE-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-BE-NEXT: .LBB27_1: @ %atomicrmw.start +; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-BE-NEXT: ldaexd r3, lr, [r12] +; CHECK-THUMB-BE-NEXT: mov r4, r0 +; CHECK-THUMB-BE-NEXT: cmp r1, lr +; CHECK-THUMB-BE-NEXT: sbcs.w r2, r0, r3 +; CHECK-THUMB-BE-NEXT: mov r2, r1 +; CHECK-THUMB-BE-NEXT: itt ge +; CHECK-THUMB-BE-NEXT: movge r2, lr +; CHECK-THUMB-BE-NEXT: movge r4, r3 +; CHECK-THUMB-BE-NEXT: stlexd r5, r4, r2, [r12] +; CHECK-THUMB-BE-NEXT: cmp r5, #0 +; CHECK-THUMB-BE-NEXT: bne .LBB27_1 +; CHECK-THUMB-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-BE-NEXT: strd r3, lr, [r12] +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r7, pc} %old = atomicrmw min ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 -; CHECK: movt r[[ADDR]], :upper16:var64 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1 -; CHECK-ARM-LE: subs {{[^,]+}}, r0, [[OLD1]] -; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]] -; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]] -; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]] -; CHECK-ARM: movge [[MINHI]], [[OLD2]] -; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 -; CHECK-ARM: movge [[MINLO]], [[OLD1]] -; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] -; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, ptr @var64 ret void } define i8 @test_atomic_load_max_i8(i8 signext %offset) nounwind { -; CHECK-LABEL: test_atomic_load_max_i8: +; CHECK-ARM-LABEL: test_atomic_load_max_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var8 +; CHECK-ARM-NEXT: movt r12, :upper16:var8 +; CHECK-ARM-NEXT: .LBB28_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaexb r1, [r12] +; CHECK-ARM-NEXT: sxtb r3, r1 +; CHECK-ARM-NEXT: cmp r3, r0 +; CHECK-ARM-NEXT: mov r3, r0 +; CHECK-ARM-NEXT: movgt r3, r1 +; CHECK-ARM-NEXT: stlexb r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB28_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_max_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var8 +; CHECK-THUMB-NEXT: movt r12, :upper16:var8 +; CHECK-THUMB-NEXT: .LBB28_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaexb r1, [r12] +; CHECK-THUMB-NEXT: sxtb r3, r1 +; CHECK-THUMB-NEXT: cmp r3, r0 +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: it gt +; CHECK-THUMB-NEXT: movgt r3, r1 +; CHECK-THUMB-NEXT: stlexb r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB28_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw max ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8 -; CHECK: movt [[ADDR]], :upper16:var8 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]] -; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]] + ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: cmp r[[OLDX]], r0 ; Thumb mode: it gt -; CHECK: movgt r[[OLDX]], r[[OLD]] -; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i8 %old } define i16 @test_atomic_load_max_i16(i16 signext %offset) nounwind { -; CHECK-LABEL: test_atomic_load_max_i16: +; CHECK-ARM-LABEL: test_atomic_load_max_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var16 +; CHECK-ARM-NEXT: movt r12, :upper16:var16 +; CHECK-ARM-NEXT: .LBB29_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaexh r1, [r12] +; CHECK-ARM-NEXT: sxth r3, r1 +; CHECK-ARM-NEXT: cmp r3, r0 +; CHECK-ARM-NEXT: mov r3, r0 +; CHECK-ARM-NEXT: movgt r3, r1 +; CHECK-ARM-NEXT: strexh r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB29_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_max_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var16 +; CHECK-THUMB-NEXT: movt r12, :upper16:var16 +; CHECK-THUMB-NEXT: .LBB29_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaexh r1, [r12] +; CHECK-THUMB-NEXT: sxth r3, r1 +; CHECK-THUMB-NEXT: cmp r3, r0 +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: it gt +; CHECK-THUMB-NEXT: movgt r3, r1 +; CHECK-THUMB-NEXT: strexh r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB29_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw max ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK: movt r[[ADDR]], :upper16:var16 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] -; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]] + ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: cmp r[[OLDX]], r0 ; Thumb mode: it gt -; CHECK: movgt r[[OLDX]], r[[OLD]] -; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i16 %old } define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_max_i32: +; CHECK-ARM-LABEL: test_atomic_load_max_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var32 +; CHECK-ARM-NEXT: movt r12, :upper16:var32 +; CHECK-ARM-NEXT: .LBB30_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrex r1, [r12] +; CHECK-ARM-NEXT: mov r3, r0 +; CHECK-ARM-NEXT: cmp r1, r0 +; CHECK-ARM-NEXT: movgt r3, r1 +; CHECK-ARM-NEXT: stlex r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB30_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_max_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var32 +; CHECK-THUMB-NEXT: movt r12, :upper16:var32 +; CHECK-THUMB-NEXT: .LBB30_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrex r1, [r12] +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: cmp r1, r0 +; CHECK-THUMB-NEXT: it gt +; CHECK-THUMB-NEXT: movgt r3, r1 +; CHECK-THUMB-NEXT: stlex r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB30_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw max ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 -; CHECK: movt r[[ADDR]], :upper16:var32 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 -; CHECK-NEXT: cmp r[[OLD]], r0 ; Thumb mode: it gt -; CHECK: movgt r[[NEW]], r[[OLD]] -; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i32 %old } define void @test_atomic_load_max_i64(i64 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_max_i64: +; CHECK-ARM-LE-LABEL: test_atomic_load_max_i64: +; CHECK-ARM-LE: @ %bb.0: +; CHECK-ARM-LE-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-LE-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-LE-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-LE-NEXT: .LBB31_1: @ %atomicrmw.start +; CHECK-ARM-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-LE-NEXT: ldrexd r4, r5, [r2] +; CHECK-ARM-LE-NEXT: mov r7, r1 +; CHECK-ARM-LE-NEXT: cmp r0, r4 +; CHECK-ARM-LE-NEXT: sbcs r3, r1, r5 +; CHECK-ARM-LE-NEXT: movlt r7, r5 +; CHECK-ARM-LE-NEXT: mov r6, r0 +; CHECK-ARM-LE-NEXT: movlt r6, r4 +; CHECK-ARM-LE-NEXT: strexd r3, r6, r7, [r2] +; CHECK-ARM-LE-NEXT: cmp r3, #0 +; CHECK-ARM-LE-NEXT: bne .LBB31_1 +; CHECK-ARM-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-LE-NEXT: strd r4, r5, [r2] +; CHECK-ARM-LE-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-ARM-BE-LABEL: test_atomic_load_max_i64: +; CHECK-ARM-BE: @ %bb.0: +; CHECK-ARM-BE-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-BE-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-BE-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-BE-NEXT: .LBB31_1: @ %atomicrmw.start +; CHECK-ARM-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-BE-NEXT: ldrexd r4, r5, [r2] +; CHECK-ARM-BE-NEXT: mov r7, r1 +; CHECK-ARM-BE-NEXT: cmp r1, r5 +; CHECK-ARM-BE-NEXT: sbcs r3, r0, r4 +; CHECK-ARM-BE-NEXT: movlt r7, r5 +; CHECK-ARM-BE-NEXT: mov r6, r0 +; CHECK-ARM-BE-NEXT: movlt r6, r4 +; CHECK-ARM-BE-NEXT: strexd r3, r6, r7, [r2] +; CHECK-ARM-BE-NEXT: cmp r3, #0 +; CHECK-ARM-BE-NEXT: bne .LBB31_1 +; CHECK-ARM-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-BE-NEXT: strd r4, r5, [r2] +; CHECK-ARM-BE-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-THUMB-LE-LABEL: test_atomic_load_max_i64: +; CHECK-THUMB-LE: @ %bb.0: +; CHECK-THUMB-LE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-LE-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-LE-NEXT: .LBB31_1: @ %atomicrmw.start +; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-LE-NEXT: ldrexd r3, lr, [r12] +; CHECK-THUMB-LE-NEXT: mov r4, r0 +; CHECK-THUMB-LE-NEXT: cmp r0, r3 +; CHECK-THUMB-LE-NEXT: sbcs.w r2, r1, lr +; CHECK-THUMB-LE-NEXT: mov r2, r1 +; CHECK-THUMB-LE-NEXT: itt lt +; CHECK-THUMB-LE-NEXT: movlt r2, lr +; CHECK-THUMB-LE-NEXT: movlt r4, r3 +; CHECK-THUMB-LE-NEXT: strexd r5, r4, r2, [r12] +; CHECK-THUMB-LE-NEXT: cmp r5, #0 +; CHECK-THUMB-LE-NEXT: bne .LBB31_1 +; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-LE-NEXT: strd r3, lr, [r12] +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-THUMB-BE-LABEL: test_atomic_load_max_i64: +; CHECK-THUMB-BE: @ %bb.0: +; CHECK-THUMB-BE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-BE-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-BE-NEXT: .LBB31_1: @ %atomicrmw.start +; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-BE-NEXT: ldrexd r3, lr, [r12] +; CHECK-THUMB-BE-NEXT: mov r4, r0 +; CHECK-THUMB-BE-NEXT: cmp r1, lr +; CHECK-THUMB-BE-NEXT: sbcs.w r2, r0, r3 +; CHECK-THUMB-BE-NEXT: mov r2, r1 +; CHECK-THUMB-BE-NEXT: itt lt +; CHECK-THUMB-BE-NEXT: movlt r2, lr +; CHECK-THUMB-BE-NEXT: movlt r4, r3 +; CHECK-THUMB-BE-NEXT: strexd r5, r4, r2, [r12] +; CHECK-THUMB-BE-NEXT: cmp r5, #0 +; CHECK-THUMB-BE-NEXT: bne .LBB31_1 +; CHECK-THUMB-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-BE-NEXT: strd r3, lr, [r12] +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r7, pc} %old = atomicrmw max ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 -; CHECK: movt r[[ADDR]], :upper16:var64 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1 -; CHECK-ARM-LE: subs {{[^,]+}}, r0, [[OLD1]] -; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]] -; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]] -; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]] -; CHECK-ARM: movlt [[MINHI]], [[OLD2]] -; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 -; CHECK-ARM: movlt [[MINLO]], [[OLD1]] -; CHECK-ARM: strexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] -; CHECK-THUMB: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, ptr @var64 ret void } define i8 @test_atomic_load_umin_i8(i8 zeroext %offset) nounwind { -; CHECK-LABEL: test_atomic_load_umin_i8: +; CHECK-ARM-LABEL: test_atomic_load_umin_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var8 +; CHECK-ARM-NEXT: movt r12, :upper16:var8 +; CHECK-ARM-NEXT: .LBB32_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrexb r1, [r12] +; CHECK-ARM-NEXT: mov r3, r0 +; CHECK-ARM-NEXT: cmp r1, r0 +; CHECK-ARM-NEXT: movls r3, r1 +; CHECK-ARM-NEXT: strexb r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB32_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_umin_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var8 +; CHECK-THUMB-NEXT: movt r12, :upper16:var8 +; CHECK-THUMB-NEXT: .LBB32_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexb r1, [r12] +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: cmp r1, r0 +; CHECK-THUMB-NEXT: it ls +; CHECK-THUMB-NEXT: movls r3, r1 +; CHECK-THUMB-NEXT: strexb r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB32_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw umin ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8 -; CHECK: movt [[ADDR]], :upper16:var8 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 -; CHECK-NEXT: cmp r[[OLD]], r0 ; Thumb mode: it ls -; CHECK: movls r[[NEW]], r[[OLD]] -; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i8 %old } define i16 @test_atomic_load_umin_i16(i16 zeroext %offset) nounwind { -; CHECK-LABEL: test_atomic_load_umin_i16: +; CHECK-ARM-LABEL: test_atomic_load_umin_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var16 +; CHECK-ARM-NEXT: movt r12, :upper16:var16 +; CHECK-ARM-NEXT: .LBB33_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaexh r1, [r12] +; CHECK-ARM-NEXT: mov r3, r0 +; CHECK-ARM-NEXT: cmp r1, r0 +; CHECK-ARM-NEXT: movls r3, r1 +; CHECK-ARM-NEXT: strexh r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB33_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_umin_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var16 +; CHECK-THUMB-NEXT: movt r12, :upper16:var16 +; CHECK-THUMB-NEXT: .LBB33_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaexh r1, [r12] +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: cmp r1, r0 +; CHECK-THUMB-NEXT: it ls +; CHECK-THUMB-NEXT: movls r3, r1 +; CHECK-THUMB-NEXT: strexh r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB33_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw umin ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16 -; CHECK: movt [[ADDR]], :upper16:var16 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 -; CHECK-NEXT: cmp r[[OLD]], r0 ; Thumb mode: it ls -; CHECK: movls r[[NEW]], r[[OLD]] -; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i16 %old } define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_umin_i32: +; CHECK-ARM-LABEL: test_atomic_load_umin_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var32 +; CHECK-ARM-NEXT: movt r12, :upper16:var32 +; CHECK-ARM-NEXT: .LBB34_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaex r1, [r12] +; CHECK-ARM-NEXT: mov r3, r0 +; CHECK-ARM-NEXT: cmp r1, r0 +; CHECK-ARM-NEXT: movls r3, r1 +; CHECK-ARM-NEXT: stlex r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB34_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_umin_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var32 +; CHECK-THUMB-NEXT: movt r12, :upper16:var32 +; CHECK-THUMB-NEXT: .LBB34_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaex r1, [r12] +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: cmp r1, r0 +; CHECK-THUMB-NEXT: it ls +; CHECK-THUMB-NEXT: movls r3, r1 +; CHECK-THUMB-NEXT: stlex r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB34_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw umin ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 -; CHECK: movt r[[ADDR]], :upper16:var32 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 -; CHECK-NEXT: cmp r[[OLD]], r0 ; Thumb mode: it ls -; CHECK: movls r[[NEW]], r[[OLD]] -; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i32 %old } define void @test_atomic_load_umin_i64(i64 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_umin_i64: +; CHECK-ARM-LE-LABEL: test_atomic_load_umin_i64: +; CHECK-ARM-LE: @ %bb.0: +; CHECK-ARM-LE-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-LE-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-LE-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-LE-NEXT: .LBB35_1: @ %atomicrmw.start +; CHECK-ARM-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-LE-NEXT: ldaexd r4, r5, [r2] +; CHECK-ARM-LE-NEXT: mov r7, r1 +; CHECK-ARM-LE-NEXT: cmp r0, r4 +; CHECK-ARM-LE-NEXT: sbcs r3, r1, r5 +; CHECK-ARM-LE-NEXT: movhs r7, r5 +; CHECK-ARM-LE-NEXT: mov r6, r0 +; CHECK-ARM-LE-NEXT: movhs r6, r4 +; CHECK-ARM-LE-NEXT: stlexd r3, r6, r7, [r2] +; CHECK-ARM-LE-NEXT: cmp r3, #0 +; CHECK-ARM-LE-NEXT: bne .LBB35_1 +; CHECK-ARM-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-LE-NEXT: strd r4, r5, [r2] +; CHECK-ARM-LE-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-ARM-BE-LABEL: test_atomic_load_umin_i64: +; CHECK-ARM-BE: @ %bb.0: +; CHECK-ARM-BE-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-BE-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-BE-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-BE-NEXT: .LBB35_1: @ %atomicrmw.start +; CHECK-ARM-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-BE-NEXT: ldaexd r4, r5, [r2] +; CHECK-ARM-BE-NEXT: mov r7, r1 +; CHECK-ARM-BE-NEXT: cmp r1, r5 +; CHECK-ARM-BE-NEXT: sbcs r3, r0, r4 +; CHECK-ARM-BE-NEXT: movhs r7, r5 +; CHECK-ARM-BE-NEXT: mov r6, r0 +; CHECK-ARM-BE-NEXT: movhs r6, r4 +; CHECK-ARM-BE-NEXT: stlexd r3, r6, r7, [r2] +; CHECK-ARM-BE-NEXT: cmp r3, #0 +; CHECK-ARM-BE-NEXT: bne .LBB35_1 +; CHECK-ARM-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-BE-NEXT: strd r4, r5, [r2] +; CHECK-ARM-BE-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-THUMB-LE-LABEL: test_atomic_load_umin_i64: +; CHECK-THUMB-LE: @ %bb.0: +; CHECK-THUMB-LE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-LE-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-LE-NEXT: .LBB35_1: @ %atomicrmw.start +; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-LE-NEXT: ldaexd r3, lr, [r12] +; CHECK-THUMB-LE-NEXT: mov r4, r0 +; CHECK-THUMB-LE-NEXT: cmp r0, r3 +; CHECK-THUMB-LE-NEXT: sbcs.w r2, r1, lr +; CHECK-THUMB-LE-NEXT: mov r2, r1 +; CHECK-THUMB-LE-NEXT: itt hs +; CHECK-THUMB-LE-NEXT: movhs r2, lr +; CHECK-THUMB-LE-NEXT: movhs r4, r3 +; CHECK-THUMB-LE-NEXT: stlexd r5, r4, r2, [r12] +; CHECK-THUMB-LE-NEXT: cmp r5, #0 +; CHECK-THUMB-LE-NEXT: bne .LBB35_1 +; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-LE-NEXT: strd r3, lr, [r12] +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-THUMB-BE-LABEL: test_atomic_load_umin_i64: +; CHECK-THUMB-BE: @ %bb.0: +; CHECK-THUMB-BE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-BE-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-BE-NEXT: .LBB35_1: @ %atomicrmw.start +; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-BE-NEXT: ldaexd r3, lr, [r12] +; CHECK-THUMB-BE-NEXT: mov r4, r0 +; CHECK-THUMB-BE-NEXT: cmp r1, lr +; CHECK-THUMB-BE-NEXT: sbcs.w r2, r0, r3 +; CHECK-THUMB-BE-NEXT: mov r2, r1 +; CHECK-THUMB-BE-NEXT: itt hs +; CHECK-THUMB-BE-NEXT: movhs r2, lr +; CHECK-THUMB-BE-NEXT: movhs r4, r3 +; CHECK-THUMB-BE-NEXT: stlexd r5, r4, r2, [r12] +; CHECK-THUMB-BE-NEXT: cmp r5, #0 +; CHECK-THUMB-BE-NEXT: bne .LBB35_1 +; CHECK-THUMB-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-BE-NEXT: strd r3, lr, [r12] +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r7, pc} %old = atomicrmw umin ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 -; CHECK: movt r[[ADDR]], :upper16:var64 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1 -; CHECK-ARM-LE: subs {{[^,]+}}, r0, [[OLD1]] -; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]] -; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]] -; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]] -; CHECK-ARM: movhs [[MINHI]], [[OLD2]] -; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 -; CHECK-ARM: movhs [[MINLO]], [[OLD1]] -; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] -; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, ptr @var64 ret void } define i8 @test_atomic_load_umax_i8(i8 zeroext %offset) nounwind { -; CHECK-LABEL: test_atomic_load_umax_i8: +; CHECK-ARM-LABEL: test_atomic_load_umax_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var8 +; CHECK-ARM-NEXT: movt r12, :upper16:var8 +; CHECK-ARM-NEXT: .LBB36_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaexb r1, [r12] +; CHECK-ARM-NEXT: mov r3, r0 +; CHECK-ARM-NEXT: cmp r1, r0 +; CHECK-ARM-NEXT: movhi r3, r1 +; CHECK-ARM-NEXT: stlexb r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB36_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_umax_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var8 +; CHECK-THUMB-NEXT: movt r12, :upper16:var8 +; CHECK-THUMB-NEXT: .LBB36_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaexb r1, [r12] +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: cmp r1, r0 +; CHECK-THUMB-NEXT: it hi +; CHECK-THUMB-NEXT: movhi r3, r1 +; CHECK-THUMB-NEXT: stlexb r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB36_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw umax ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8 -; CHECK: movt [[ADDR]], :upper16:var8 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 -; CHECK-NEXT: cmp r[[OLD]], r0 ; Thumb mode: it hi -; CHECK: movhi r[[NEW]], r[[OLD]] -; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i8 %old } define i16 @test_atomic_load_umax_i16(i16 zeroext %offset) nounwind { -; CHECK-LABEL: test_atomic_load_umax_i16: +; CHECK-ARM-LABEL: test_atomic_load_umax_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var16 +; CHECK-ARM-NEXT: movt r12, :upper16:var16 +; CHECK-ARM-NEXT: .LBB37_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrexh r1, [r12] +; CHECK-ARM-NEXT: mov r3, r0 +; CHECK-ARM-NEXT: cmp r1, r0 +; CHECK-ARM-NEXT: movhi r3, r1 +; CHECK-ARM-NEXT: strexh r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB37_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_umax_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var16 +; CHECK-THUMB-NEXT: movt r12, :upper16:var16 +; CHECK-THUMB-NEXT: .LBB37_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexh r1, [r12] +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: cmp r1, r0 +; CHECK-THUMB-NEXT: it hi +; CHECK-THUMB-NEXT: movhi r3, r1 +; CHECK-THUMB-NEXT: strexh r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB37_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw umax ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16 -; CHECK: movt [[ADDR]], :upper16:var16 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 -; CHECK-NEXT: cmp r[[OLD]], r0 ; Thumb mode: it hi -; CHECK: movhi r[[NEW]], r[[OLD]] -; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i16 %old } define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_umax_i32: +; CHECK-ARM-LABEL: test_atomic_load_umax_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var32 +; CHECK-ARM-NEXT: movt r12, :upper16:var32 +; CHECK-ARM-NEXT: .LBB38_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaex r1, [r12] +; CHECK-ARM-NEXT: mov r3, r0 +; CHECK-ARM-NEXT: cmp r1, r0 +; CHECK-ARM-NEXT: movhi r3, r1 +; CHECK-ARM-NEXT: stlex r2, r3, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB38_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_load_umax_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var32 +; CHECK-THUMB-NEXT: movt r12, :upper16:var32 +; CHECK-THUMB-NEXT: .LBB38_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaex r1, [r12] +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: cmp r1, r0 +; CHECK-THUMB-NEXT: it hi +; CHECK-THUMB-NEXT: movhi r3, r1 +; CHECK-THUMB-NEXT: stlex r2, r3, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB38_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr %old = atomicrmw umax ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 -; CHECK: movt r[[ADDR]], :upper16:var32 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 -; CHECK-NEXT: cmp r[[OLD]], r0 ; Thumb mode: it hi -; CHECK: movhi r[[NEW]], r[[OLD]] -; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: mov r0, r[[OLD]] + ret i32 %old } define void @test_atomic_load_umax_i64(i64 %offset) nounwind { -; CHECK-LABEL: test_atomic_load_umax_i64: +; CHECK-ARM-LE-LABEL: test_atomic_load_umax_i64: +; CHECK-ARM-LE: @ %bb.0: +; CHECK-ARM-LE-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-LE-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-LE-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-LE-NEXT: .LBB39_1: @ %atomicrmw.start +; CHECK-ARM-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-LE-NEXT: ldaexd r4, r5, [r2] +; CHECK-ARM-LE-NEXT: mov r7, r1 +; CHECK-ARM-LE-NEXT: cmp r0, r4 +; CHECK-ARM-LE-NEXT: sbcs r3, r1, r5 +; CHECK-ARM-LE-NEXT: movlo r7, r5 +; CHECK-ARM-LE-NEXT: mov r6, r0 +; CHECK-ARM-LE-NEXT: movlo r6, r4 +; CHECK-ARM-LE-NEXT: stlexd r3, r6, r7, [r2] +; CHECK-ARM-LE-NEXT: cmp r3, #0 +; CHECK-ARM-LE-NEXT: bne .LBB39_1 +; CHECK-ARM-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-LE-NEXT: strd r4, r5, [r2] +; CHECK-ARM-LE-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-ARM-BE-LABEL: test_atomic_load_umax_i64: +; CHECK-ARM-BE: @ %bb.0: +; CHECK-ARM-BE-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-ARM-BE-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-BE-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-BE-NEXT: .LBB39_1: @ %atomicrmw.start +; CHECK-ARM-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-BE-NEXT: ldaexd r4, r5, [r2] +; CHECK-ARM-BE-NEXT: mov r7, r1 +; CHECK-ARM-BE-NEXT: cmp r1, r5 +; CHECK-ARM-BE-NEXT: sbcs r3, r0, r4 +; CHECK-ARM-BE-NEXT: movlo r7, r5 +; CHECK-ARM-BE-NEXT: mov r6, r0 +; CHECK-ARM-BE-NEXT: movlo r6, r4 +; CHECK-ARM-BE-NEXT: stlexd r3, r6, r7, [r2] +; CHECK-ARM-BE-NEXT: cmp r3, #0 +; CHECK-ARM-BE-NEXT: bne .LBB39_1 +; CHECK-ARM-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-BE-NEXT: strd r4, r5, [r2] +; CHECK-ARM-BE-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; CHECK-THUMB-LE-LABEL: test_atomic_load_umax_i64: +; CHECK-THUMB-LE: @ %bb.0: +; CHECK-THUMB-LE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-LE-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-LE-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-LE-NEXT: .LBB39_1: @ %atomicrmw.start +; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-LE-NEXT: ldaexd r3, lr, [r12] +; CHECK-THUMB-LE-NEXT: mov r4, r0 +; CHECK-THUMB-LE-NEXT: cmp r0, r3 +; CHECK-THUMB-LE-NEXT: sbcs.w r2, r1, lr +; CHECK-THUMB-LE-NEXT: mov r2, r1 +; CHECK-THUMB-LE-NEXT: itt lo +; CHECK-THUMB-LE-NEXT: movlo r2, lr +; CHECK-THUMB-LE-NEXT: movlo r4, r3 +; CHECK-THUMB-LE-NEXT: stlexd r5, r4, r2, [r12] +; CHECK-THUMB-LE-NEXT: cmp r5, #0 +; CHECK-THUMB-LE-NEXT: bne .LBB39_1 +; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-LE-NEXT: strd r3, lr, [r12] +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r7, pc} +; +; CHECK-THUMB-BE-LABEL: test_atomic_load_umax_i64: +; CHECK-THUMB-BE: @ %bb.0: +; CHECK-THUMB-BE-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-BE-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-BE-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-BE-NEXT: .LBB39_1: @ %atomicrmw.start +; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-BE-NEXT: ldaexd r3, lr, [r12] +; CHECK-THUMB-BE-NEXT: mov r4, r0 +; CHECK-THUMB-BE-NEXT: cmp r1, lr +; CHECK-THUMB-BE-NEXT: sbcs.w r2, r0, r3 +; CHECK-THUMB-BE-NEXT: mov r2, r1 +; CHECK-THUMB-BE-NEXT: itt lo +; CHECK-THUMB-BE-NEXT: movlo r2, lr +; CHECK-THUMB-BE-NEXT: movlo r4, r3 +; CHECK-THUMB-BE-NEXT: stlexd r5, r4, r2, [r12] +; CHECK-THUMB-BE-NEXT: cmp r5, #0 +; CHECK-THUMB-BE-NEXT: bne .LBB39_1 +; CHECK-THUMB-BE-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-BE-NEXT: strd r3, lr, [r12] +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r7, pc} %old = atomicrmw umax ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 -; CHECK: movt r[[ADDR]], :upper16:var64 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1 -; CHECK-ARM-LE: subs {{[^,]+}}, r0, [[OLD1]] -; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]] -; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]] -; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]] -; CHECK-ARM: movlo [[MINHI]], [[OLD2]] -; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 -; CHECK-ARM: movlo [[MINLO]], [[OLD1]] -; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] -; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, ptr @var64 ret void } define i8 @test_atomic_cmpxchg_i8(i8 zeroext %wanted, i8 zeroext %new) nounwind { -; CHECK-LABEL: test_atomic_cmpxchg_i8: +; CHECK-ARM-LABEL: test_atomic_cmpxchg_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var8 +; CHECK-ARM-NEXT: movt r12, :upper16:var8 +; CHECK-ARM-NEXT: .LBB40_1: @ %cmpxchg.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaexb r2, [r12] +; CHECK-ARM-NEXT: cmp r2, r0 +; CHECK-ARM-NEXT: bne .LBB40_4 +; CHECK-ARM-NEXT: @ %bb.2: @ %cmpxchg.trystore +; CHECK-ARM-NEXT: @ in Loop: Header=BB40_1 Depth=1 +; CHECK-ARM-NEXT: strexb r3, r1, [r12] +; CHECK-ARM-NEXT: cmp r3, #0 +; CHECK-ARM-NEXT: bne .LBB40_1 +; CHECK-ARM-NEXT: @ %bb.3: @ %cmpxchg.end +; CHECK-ARM-NEXT: mov r0, r2 +; CHECK-ARM-NEXT: bx lr +; CHECK-ARM-NEXT: .LBB40_4: @ %cmpxchg.nostore +; CHECK-ARM-NEXT: clrex +; CHECK-ARM-NEXT: mov r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_cmpxchg_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var8 +; CHECK-THUMB-NEXT: mov r2, r0 +; CHECK-THUMB-NEXT: movt r12, :upper16:var8 +; CHECK-THUMB-NEXT: .LBB40_1: @ %cmpxchg.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaexb r0, [r12] +; CHECK-THUMB-NEXT: cmp r0, r2 +; CHECK-THUMB-NEXT: bne .LBB40_3 +; CHECK-THUMB-NEXT: @ %bb.2: @ %cmpxchg.trystore +; CHECK-THUMB-NEXT: @ in Loop: Header=BB40_1 Depth=1 +; CHECK-THUMB-NEXT: strexb r3, r1, [r12] +; CHECK-THUMB-NEXT: cmp r3, #0 +; CHECK-THUMB-NEXT: it eq +; CHECK-THUMB-NEXT: bxeq lr +; CHECK-THUMB-NEXT: b .LBB40_1 +; CHECK-THUMB-NEXT: .LBB40_3: @ %cmpxchg.nostore +; CHECK-THUMB-NEXT: clrex +; CHECK-THUMB-NEXT: bx lr %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire %old = extractvalue { i8, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK-DAG: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK-DAG: movt r[[ADDR]], :upper16:var8 -; CHECK-THUMB-DAG: mov r[[WANTED:[0-9]+]], r0 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM-NEXT: cmp r[[OLD]], r0 -; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]] -; CHECK-NEXT: bne .LBB{{[0-9]+}}_{{[0-9]}} -; CHECK-NEXT: %bb.2: ; As above, r1 is a reasonable guess. -; CHECK: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-ARM-NEXT: bne .LBB{{[0-9]+}}_{{[0-9]}} -; CHECK-THUMB-NEXT: it eq -; CHECK-THUMB-NEXT: bxeq lr -; CHECK-ARM: mov r0, r[[OLD]] -; CHECK-ARM: clrex -; CHECK: bx lr -; CHECK-NOT: dmb -; CHECK-NOT: mcr ret i8 %old } define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounwind { -; CHECK-LABEL: test_atomic_cmpxchg_i16: +; CHECK-ARM-LABEL: test_atomic_cmpxchg_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var16 +; CHECK-ARM-NEXT: movt r12, :upper16:var16 +; CHECK-ARM-NEXT: .LBB41_1: @ %cmpxchg.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldaexh r2, [r12] +; CHECK-ARM-NEXT: cmp r2, r0 +; CHECK-ARM-NEXT: bne .LBB41_4 +; CHECK-ARM-NEXT: @ %bb.2: @ %cmpxchg.trystore +; CHECK-ARM-NEXT: @ in Loop: Header=BB41_1 Depth=1 +; CHECK-ARM-NEXT: stlexh r3, r1, [r12] +; CHECK-ARM-NEXT: cmp r3, #0 +; CHECK-ARM-NEXT: bne .LBB41_1 +; CHECK-ARM-NEXT: @ %bb.3: @ %cmpxchg.end +; CHECK-ARM-NEXT: mov r0, r2 +; CHECK-ARM-NEXT: bx lr +; CHECK-ARM-NEXT: .LBB41_4: @ %cmpxchg.nostore +; CHECK-ARM-NEXT: clrex +; CHECK-ARM-NEXT: mov r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_cmpxchg_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var16 +; CHECK-THUMB-NEXT: mov r2, r0 +; CHECK-THUMB-NEXT: movt r12, :upper16:var16 +; CHECK-THUMB-NEXT: .LBB41_1: @ %cmpxchg.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldaexh r0, [r12] +; CHECK-THUMB-NEXT: cmp r0, r2 +; CHECK-THUMB-NEXT: bne .LBB41_3 +; CHECK-THUMB-NEXT: @ %bb.2: @ %cmpxchg.trystore +; CHECK-THUMB-NEXT: @ in Loop: Header=BB41_1 Depth=1 +; CHECK-THUMB-NEXT: stlexh r3, r1, [r12] +; CHECK-THUMB-NEXT: cmp r3, #0 +; CHECK-THUMB-NEXT: it eq +; CHECK-THUMB-NEXT: bxeq lr +; CHECK-THUMB-NEXT: b .LBB41_1 +; CHECK-THUMB-NEXT: .LBB41_3: @ %cmpxchg.nostore +; CHECK-THUMB-NEXT: clrex +; CHECK-THUMB-NEXT: bx lr %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new seq_cst seq_cst %old = extractvalue { i16, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK-DAG: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK-DAG: movt r[[ADDR]], :upper16:var16 -; CHECK-THUMB-DAG: mov r[[WANTED:[0-9]+]], r0 - -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-ARM-NEXT: cmp r[[OLD]], r0 -; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]] -; CHECK-NEXT: bne .LBB{{[0-9]+}}_{{[0-9]}} -; CHECK-NEXT: %bb.2: ; As above, r1 is a reasonable guess. -; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-ARM-NEXT: bne .LBB{{[0-9]+}}_{{[0-9]}} -; CHECK-THUMB-NEXT: it eq -; CHECK-THUMB-NEXT: bxeq lr -; CHECK-ARM: mov r0, r[[OLD]] -; CHECK: bx lr -; CHECK-ARM-NEXT: .LBB{{[0-9]+}}_{{[0-9]}} -; CHECK-ARM-NEXT: clrex -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK-ARM: mov r0, r[[OLD]] -; CHECK-ARM-NEXT: bx lr + ret i16 %old } define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { -; CHECK-LABEL: test_atomic_cmpxchg_i32: +; CHECK-ARM-LABEL: test_atomic_cmpxchg_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: movw r12, :lower16:var32 +; CHECK-ARM-NEXT: movt r12, :upper16:var32 +; CHECK-ARM-NEXT: .LBB42_1: @ %cmpxchg.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrex r3, [r12] +; CHECK-ARM-NEXT: cmp r3, r0 +; CHECK-ARM-NEXT: bne .LBB42_4 +; CHECK-ARM-NEXT: @ %bb.2: @ %cmpxchg.trystore +; CHECK-ARM-NEXT: @ in Loop: Header=BB42_1 Depth=1 +; CHECK-ARM-NEXT: stlex r2, r1, [r12] +; CHECK-ARM-NEXT: cmp r2, #0 +; CHECK-ARM-NEXT: bne .LBB42_1 +; CHECK-ARM-NEXT: @ %bb.3: @ %cmpxchg.end +; CHECK-ARM-NEXT: str r3, [r12] +; CHECK-ARM-NEXT: bx lr +; CHECK-ARM-NEXT: .LBB42_4: @ %cmpxchg.nostore +; CHECK-ARM-NEXT: clrex +; CHECK-ARM-NEXT: str r3, [r12] +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_atomic_cmpxchg_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var32 +; CHECK-THUMB-NEXT: movt r12, :upper16:var32 +; CHECK-THUMB-NEXT: .LBB42_1: @ %cmpxchg.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrex r3, [r12] +; CHECK-THUMB-NEXT: cmp r3, r0 +; CHECK-THUMB-NEXT: bne .LBB42_4 +; CHECK-THUMB-NEXT: @ %bb.2: @ %cmpxchg.trystore +; CHECK-THUMB-NEXT: @ in Loop: Header=BB42_1 Depth=1 +; CHECK-THUMB-NEXT: stlex r2, r1, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB42_1 +; CHECK-THUMB-NEXT: @ %bb.3: @ %cmpxchg.end +; CHECK-THUMB-NEXT: str.w r3, [r12] +; CHECK-THUMB-NEXT: bx lr +; CHECK-THUMB-NEXT: .LBB42_4: @ %cmpxchg.nostore +; CHECK-THUMB-NEXT: clrex +; CHECK-THUMB-NEXT: str.w r3, [r12] +; CHECK-THUMB-NEXT: bx lr %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new release monotonic %old = extractvalue { i32, i1 } %pair, 0 store i32 %old, ptr @var32 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 -; CHECK: movt r[[ADDR]], :upper16:var32 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: cmp r[[OLD]], r0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 -; CHECK-NEXT: %bb.2: ; As above, r1 is a reasonable guess. -; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK: str{{(.w)?}} r[[OLD]], -; CHECK-NEXT: bx lr -; CHECK-NEXT: .LBB{{[0-9]+}}_4: -; CHECK-NEXT: clrex -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK: str{{(.w)?}} r[[OLD]], -; CHECK-ARM-NEXT: bx lr + ret void } define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { -; CHECK-LABEL: test_atomic_cmpxchg_i64: +; CHECK-ARM-LE-LABEL: test_atomic_cmpxchg_i64: +; CHECK-ARM-LE: @ %bb.0: +; CHECK-ARM-LE-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-LE-NEXT: movw r12, :lower16:var64 +; CHECK-ARM-LE-NEXT: @ kill: def $r3 killed $r3 killed $r2_r3 def $r2_r3 +; CHECK-ARM-LE-NEXT: movt r12, :upper16:var64 +; CHECK-ARM-LE-NEXT: @ kill: def $r2 killed $r2 killed $r2_r3 def $r2_r3 +; CHECK-ARM-LE-NEXT: .LBB43_1: @ %cmpxchg.start +; CHECK-ARM-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-LE-NEXT: ldrexd r4, r5, [r12] +; CHECK-ARM-LE-NEXT: eor lr, r5, r1 +; CHECK-ARM-LE-NEXT: eor r6, r4, r0 +; CHECK-ARM-LE-NEXT: orrs r6, r6, lr +; CHECK-ARM-LE-NEXT: bne .LBB43_4 +; CHECK-ARM-LE-NEXT: @ %bb.2: @ %cmpxchg.trystore +; CHECK-ARM-LE-NEXT: @ in Loop: Header=BB43_1 Depth=1 +; CHECK-ARM-LE-NEXT: strexd r6, r2, r3, [r12] +; CHECK-ARM-LE-NEXT: cmp r6, #0 +; CHECK-ARM-LE-NEXT: bne .LBB43_1 +; CHECK-ARM-LE-NEXT: @ %bb.3: @ %cmpxchg.end +; CHECK-ARM-LE-NEXT: strd r4, r5, [r12] +; CHECK-ARM-LE-NEXT: pop {r4, r5, r6, pc} +; CHECK-ARM-LE-NEXT: .LBB43_4: @ %cmpxchg.nostore +; CHECK-ARM-LE-NEXT: clrex +; CHECK-ARM-LE-NEXT: strd r4, r5, [r12] +; CHECK-ARM-LE-NEXT: pop {r4, r5, r6, pc} +; +; CHECK-ARM-BE-LABEL: test_atomic_cmpxchg_i64: +; CHECK-ARM-BE: @ %bb.0: +; CHECK-ARM-BE-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-BE-NEXT: movw r12, :lower16:var64 +; CHECK-ARM-BE-NEXT: @ kill: def $r3 killed $r3 killed $r2_r3 def $r2_r3 +; CHECK-ARM-BE-NEXT: movt r12, :upper16:var64 +; CHECK-ARM-BE-NEXT: @ kill: def $r2 killed $r2 killed $r2_r3 def $r2_r3 +; CHECK-ARM-BE-NEXT: .LBB43_1: @ %cmpxchg.start +; CHECK-ARM-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-BE-NEXT: ldrexd r4, r5, [r12] +; CHECK-ARM-BE-NEXT: eor lr, r4, r0 +; CHECK-ARM-BE-NEXT: eor r6, r5, r1 +; CHECK-ARM-BE-NEXT: orrs r6, r6, lr +; CHECK-ARM-BE-NEXT: bne .LBB43_4 +; CHECK-ARM-BE-NEXT: @ %bb.2: @ %cmpxchg.trystore +; CHECK-ARM-BE-NEXT: @ in Loop: Header=BB43_1 Depth=1 +; CHECK-ARM-BE-NEXT: strexd r6, r2, r3, [r12] +; CHECK-ARM-BE-NEXT: cmp r6, #0 +; CHECK-ARM-BE-NEXT: bne .LBB43_1 +; CHECK-ARM-BE-NEXT: @ %bb.3: @ %cmpxchg.end +; CHECK-ARM-BE-NEXT: strd r4, r5, [r12] +; CHECK-ARM-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-ARM-BE-NEXT: .LBB43_4: @ %cmpxchg.nostore +; CHECK-ARM-BE-NEXT: clrex +; CHECK-ARM-BE-NEXT: strd r4, r5, [r12] +; CHECK-ARM-BE-NEXT: pop {r4, r5, r6, pc} +; +; CHECK-THUMB-LE-LABEL: test_atomic_cmpxchg_i64: +; CHECK-THUMB-LE: @ %bb.0: +; CHECK-THUMB-LE-NEXT: push {r4, r5, r6, lr} +; CHECK-THUMB-LE-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-LE-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-LE-NEXT: .LBB43_1: @ %cmpxchg.start +; CHECK-THUMB-LE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-LE-NEXT: ldrexd lr, r4, [r12] +; CHECK-THUMB-LE-NEXT: eor.w r5, r4, r1 +; CHECK-THUMB-LE-NEXT: eor.w r6, lr, r0 +; CHECK-THUMB-LE-NEXT: orrs r5, r6 +; CHECK-THUMB-LE-NEXT: bne .LBB43_4 +; CHECK-THUMB-LE-NEXT: @ %bb.2: @ %cmpxchg.trystore +; CHECK-THUMB-LE-NEXT: @ in Loop: Header=BB43_1 Depth=1 +; CHECK-THUMB-LE-NEXT: strexd r5, r2, r3, [r12] +; CHECK-THUMB-LE-NEXT: cmp r5, #0 +; CHECK-THUMB-LE-NEXT: bne .LBB43_1 +; CHECK-THUMB-LE-NEXT: @ %bb.3: @ %cmpxchg.end +; CHECK-THUMB-LE-NEXT: strd lr, r4, [r12] +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r6, pc} +; CHECK-THUMB-LE-NEXT: .LBB43_4: @ %cmpxchg.nostore +; CHECK-THUMB-LE-NEXT: clrex +; CHECK-THUMB-LE-NEXT: strd lr, r4, [r12] +; CHECK-THUMB-LE-NEXT: pop {r4, r5, r6, pc} +; +; CHECK-THUMB-BE-LABEL: test_atomic_cmpxchg_i64: +; CHECK-THUMB-BE: @ %bb.0: +; CHECK-THUMB-BE-NEXT: push {r4, r5, r6, lr} +; CHECK-THUMB-BE-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-BE-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-BE-NEXT: .LBB43_1: @ %cmpxchg.start +; CHECK-THUMB-BE-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-BE-NEXT: ldrexd lr, r4, [r12] +; CHECK-THUMB-BE-NEXT: eor.w r5, lr, r0 +; CHECK-THUMB-BE-NEXT: eor.w r6, r4, r1 +; CHECK-THUMB-BE-NEXT: orrs r5, r6 +; CHECK-THUMB-BE-NEXT: bne .LBB43_4 +; CHECK-THUMB-BE-NEXT: @ %bb.2: @ %cmpxchg.trystore +; CHECK-THUMB-BE-NEXT: @ in Loop: Header=BB43_1 Depth=1 +; CHECK-THUMB-BE-NEXT: strexd r5, r2, r3, [r12] +; CHECK-THUMB-BE-NEXT: cmp r5, #0 +; CHECK-THUMB-BE-NEXT: bne .LBB43_1 +; CHECK-THUMB-BE-NEXT: @ %bb.3: @ %cmpxchg.end +; CHECK-THUMB-BE-NEXT: strd lr, r4, [r12] +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r6, pc} +; CHECK-THUMB-BE-NEXT: .LBB43_4: @ %cmpxchg.nostore +; CHECK-THUMB-BE-NEXT: clrex +; CHECK-THUMB-BE-NEXT: strd lr, r4, [r12] +; CHECK-THUMB-BE-NEXT: pop {r4, r5, r6, pc} %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new monotonic monotonic %old = extractvalue { i64, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 -; CHECK: movt r[[ADDR]], :upper16:var64 -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: ldrexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0 -; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1 -; CHECK-ARM-LE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]] -; CHECK-THUMB-LE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_HI]], [[MISMATCH_LO]] -; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1 -; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0 -; CHECK-ARM-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]] -; CHECK-THUMB-BE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_LO]], [[MISMATCH_HI]] -; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 -; CHECK-NEXT: %bb.2: ; As above, r2, r3 is a reasonable guess. -; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] -; CHECK-NEXT: pop -; CHECK-NEXT: .LBB{{[0-9]+}}_4: -; CHECK-NEXT: clrex -; CHECK-NOT: dmb -; CHECK-NOT: mcr - -; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]] + store i64 %old, ptr @var64 ret void } define i8 @test_atomic_load_monotonic_i8() nounwind { ; CHECK-LABEL: test_atomic_load_monotonic_i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, :lower16:var8 +; CHECK-NEXT: movt r0, :upper16:var8 +; CHECK-NEXT: ldrb r0, [r0] +; CHECK-NEXT: bx lr %val = load atomic i8, ptr @var8 monotonic, align 1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 -; CHECK: ldrb r0, [r[[ADDR]]] -; CHECK-NOT: dmb -; CHECK-NOT: mcr ret i8 %val } define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind { -; CHECK-LABEL: test_atomic_load_monotonic_regoff_i8: +; CHECK-LE-LABEL: test_atomic_load_monotonic_regoff_i8: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldrb r0, [r0, r2] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: test_atomic_load_monotonic_regoff_i8: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldrb r0, [r1, r3] +; CHECK-BE-NEXT: bx lr %addr_int = add i64 %base, %off %addr = inttoptr i64 %addr_int to ptr %val = load atomic i8, ptr %addr monotonic, align 1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK-LE: ldrb r0, [r0, r2] -; CHECK-BE: ldrb r0, [r1, r3] -; CHECK-NOT: dmb -; CHECK-NOT: mcr ret i8 %val } define i8 @test_atomic_load_acquire_i8() nounwind { ; CHECK-LABEL: test_atomic_load_acquire_i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, :lower16:var8 +; CHECK-NEXT: movt r0, :upper16:var8 +; CHECK-NEXT: ldab r0, [r0] +; CHECK-NEXT: bx lr %val = load atomic i8, ptr @var8 acquire, align 1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movt r[[ADDR]], :upper16:var8 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: ldab r0, [r[[ADDR]]] -; CHECK-NOT: dmb -; CHECK-NOT: mcr ret i8 %val } define i8 @test_atomic_load_seq_cst_i8() nounwind { ; CHECK-LABEL: test_atomic_load_seq_cst_i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, :lower16:var8 +; CHECK-NEXT: movt r0, :upper16:var8 +; CHECK-NEXT: ldab r0, [r0] +; CHECK-NEXT: bx lr %val = load atomic i8, ptr @var8 seq_cst, align 1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movt r[[ADDR]], :upper16:var8 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: ldab r0, [r[[ADDR]]] -; CHECK-NOT: dmb -; CHECK-NOT: mcr ret i8 %val } define i16 @test_atomic_load_monotonic_i16() nounwind { ; CHECK-LABEL: test_atomic_load_monotonic_i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, :lower16:var16 +; CHECK-NEXT: movt r0, :upper16:var16 +; CHECK-NEXT: ldrh r0, [r0] +; CHECK-NEXT: bx lr %val = load atomic i16, ptr @var16 monotonic, align 2 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movt r[[ADDR]], :upper16:var16 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: ldrh r0, [r[[ADDR]]] -; CHECK-NOT: dmb -; CHECK-NOT: mcr ret i16 %val } define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind { -; CHECK-LABEL: test_atomic_load_monotonic_regoff_i32: +; CHECK-LE-LABEL: test_atomic_load_monotonic_regoff_i32: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r0, [r0, r2] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: test_atomic_load_monotonic_regoff_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldr r0, [r1, r3] +; CHECK-BE-NEXT: bx lr %addr_int = add i64 %base, %off %addr = inttoptr i64 %addr_int to ptr %val = load atomic i32, ptr %addr monotonic, align 4 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK-LE: ldr r0, [r0, r2] -; CHECK-BE: ldr r0, [r1, r3] -; CHECK-NOT: dmb -; CHECK-NOT: mcr ret i32 %val } define i64 @test_atomic_load_seq_cst_i64() nounwind { ; CHECK-LABEL: test_atomic_load_seq_cst_i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, :lower16:var64 +; CHECK-NEXT: movt r0, :upper16:var64 +; CHECK-NEXT: ldaexd r0, r1, [r0] +; CHECK-NEXT: clrex +; CHECK-NEXT: bx lr %val = load atomic i64, ptr @var64 seq_cst, align 8 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movt r[[ADDR]], :upper16:var64 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: ldaexd r0, r1, [r[[ADDR]]] -; CHECK-NOT: dmb -; CHECK-NOT: mcr ret i64 %val } define void @test_atomic_store_monotonic_i8(i8 %val) nounwind { ; CHECK-LABEL: test_atomic_store_monotonic_i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, :lower16:var8 +; CHECK-NEXT: movt r1, :upper16:var8 +; CHECK-NEXT: strb r0, [r1] +; CHECK-NEXT: bx lr store atomic i8 %val, ptr @var8 monotonic, align 1 -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK: movt r[[ADDR]], :upper16:var8 -; CHECK: strb r0, [r[[ADDR]]] ret void } define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) nounwind { -; CHECK-LABEL: test_atomic_store_monotonic_regoff_i8: +; CHECK-ARM-LE-LABEL: test_atomic_store_monotonic_regoff_i8: +; CHECK-ARM-LE: @ %bb.0: +; CHECK-ARM-LE-NEXT: ldrb r1, [sp] +; CHECK-ARM-LE-NEXT: strb r1, [r0, r2] +; CHECK-ARM-LE-NEXT: bx lr +; +; CHECK-ARM-BE-LABEL: test_atomic_store_monotonic_regoff_i8: +; CHECK-ARM-BE: @ %bb.0: +; CHECK-ARM-BE-NEXT: ldrb r0, [sp, #3] +; CHECK-ARM-BE-NEXT: strb r0, [r1, r3] +; CHECK-ARM-BE-NEXT: bx lr +; +; CHECK-THUMB-LE-LABEL: test_atomic_store_monotonic_regoff_i8: +; CHECK-THUMB-LE: @ %bb.0: +; CHECK-THUMB-LE-NEXT: ldrb.w r1, [sp] +; CHECK-THUMB-LE-NEXT: strb r1, [r0, r2] +; CHECK-THUMB-LE-NEXT: bx lr +; +; CHECK-THUMB-BE-LABEL: test_atomic_store_monotonic_regoff_i8: +; CHECK-THUMB-BE: @ %bb.0: +; CHECK-THUMB-BE-NEXT: ldrb.w r0, [sp, #3] +; CHECK-THUMB-BE-NEXT: strb r0, [r1, r3] +; CHECK-THUMB-BE-NEXT: bx lr %addr_int = add i64 %base, %off %addr = inttoptr i64 %addr_int to ptr store atomic i8 %val, ptr %addr monotonic, align 1 -; CHECK-LE: ldr{{b?(\.w)?}} [[VAL:r[0-9]+]], [sp] -; CHECK-LE: strb [[VAL]], [r0, r2] -; CHECK-BE: ldrb{{(\.w)?}} [[VAL:r[0-9]+]], [sp, #3] -; CHECK-BE: strb [[VAL]], [r1, r3] ret void } define void @test_atomic_store_release_i8(i8 %val) nounwind { ; CHECK-LABEL: test_atomic_store_release_i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, :lower16:var8 +; CHECK-NEXT: movt r1, :upper16:var8 +; CHECK-NEXT: stlb r0, [r1] +; CHECK-NEXT: bx lr store atomic i8 %val, ptr @var8 release, align 1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movt r[[ADDR]], :upper16:var8 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: stlb r0, [r[[ADDR]]] -; CHECK-NOT: dmb -; CHECK-NOT: mcr ret void } define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind { ; CHECK-LABEL: test_atomic_store_seq_cst_i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, :lower16:var8 +; CHECK-NEXT: movt r1, :upper16:var8 +; CHECK-NEXT: stlb r0, [r1] +; CHECK-NEXT: bx lr store atomic i8 %val, ptr @var8 seq_cst, align 1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movt r[[ADDR]], :upper16:var8 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: stlb r0, [r[[ADDR]]] -; CHECK-NOT: dmb -; CHECK-NOT: mcr ret void } define void @test_atomic_store_monotonic_i16(i16 %val) nounwind { ; CHECK-LABEL: test_atomic_store_monotonic_i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, :lower16:var16 +; CHECK-NEXT: movt r1, :upper16:var16 +; CHECK-NEXT: strh r0, [r1] +; CHECK-NEXT: bx lr store atomic i16 %val, ptr @var16 monotonic, align 2 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movt r[[ADDR]], :upper16:var16 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: strh r0, [r[[ADDR]]] -; CHECK-NOT: dmb -; CHECK-NOT: mcr ret void } define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %val) nounwind { -; CHECK-LABEL: test_atomic_store_monotonic_regoff_i32: +; CHECK-LE-LABEL: test_atomic_store_monotonic_regoff_i32: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r1, [sp] +; CHECK-LE-NEXT: str r1, [r0, r2] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: test_atomic_store_monotonic_regoff_i32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldr r0, [sp] +; CHECK-BE-NEXT: str r0, [r1, r3] +; CHECK-BE-NEXT: bx lr %addr_int = add i64 %base, %off %addr = inttoptr i64 %addr_int to ptr store atomic i32 %val, ptr %addr monotonic, align 4 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: ldr [[VAL:r[0-9]+]], [sp] -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK-LE: str [[VAL]], [r0, r2] -; CHECK-BE: str [[VAL]], [r1, r3] -; CHECK-NOT: dmb -; CHECK-NOT: mcr ret void } define void @test_atomic_store_release_i64(i64 %val) nounwind { -; CHECK-LABEL: test_atomic_store_release_i64: +; CHECK-ARM-LABEL: test_atomic_store_release_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: push {r4, r5, r11, lr} +; CHECK-ARM-NEXT: movw r2, :lower16:var64 +; CHECK-ARM-NEXT: @ kill: def $r1 killed $r1 killed $r0_r1 def $r0_r1 +; CHECK-ARM-NEXT: movt r2, :upper16:var64 +; CHECK-ARM-NEXT: @ kill: def $r0 killed $r0 killed $r0_r1 def $r0_r1 +; CHECK-ARM-NEXT: .LBB57_1: @ %atomicrmw.start +; CHECK-ARM-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-ARM-NEXT: ldrexd r4, r5, [r2] +; CHECK-ARM-NEXT: stlexd r3, r0, r1, [r2] +; CHECK-ARM-NEXT: cmp r3, #0 +; CHECK-ARM-NEXT: bne .LBB57_1 +; CHECK-ARM-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-ARM-NEXT: pop {r4, r5, r11, pc} +; +; CHECK-THUMB-LABEL: test_atomic_store_release_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: movw r12, :lower16:var64 +; CHECK-THUMB-NEXT: movt r12, :upper16:var64 +; CHECK-THUMB-NEXT: .LBB57_1: @ %atomicrmw.start +; CHECK-THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-THUMB-NEXT: ldrexd r3, r2, [r12] +; CHECK-THUMB-NEXT: stlexd r2, r0, r1, [r12] +; CHECK-THUMB-NEXT: cmp r2, #0 +; CHECK-THUMB-NEXT: bne .LBB57_1 +; CHECK-THUMB-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-THUMB-NEXT: bx lr store atomic i64 %val, ptr @var64 release, align 8 -; CHECK-NOT: dmb -; CHECK-NOT: mcr -; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var64 -; CHECK: movt [[ADDR]], :upper16:var64 -; CHECK: .LBB{{[0-9]+}}_1: ; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK: stlexd [[STATUS:r[0-9]+]], r0, r1, {{.*}}[[ADDR]] -; CHECK-NEXT: cmp [[STATUS]], #0 -; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 -; CHECK-NOT: dmb -; CHECK-NOT: mcr ret void } define i32 @not.barriers(ptr %var, i1 %cond) { -; CHECK-LABEL: not.barriers: br i1 %cond, label %atomic_ver, label %simple_ver simple_ver: %oldval = load i32, ptr %var @@ -1401,13 +2402,9 @@ atomic_ver: %val = atomicrmw add ptr %var, i32 -1 monotonic fence seq_cst br label %somewhere -; CHECK: dmb -; CHECK: ldrex -; CHECK: dmb ; The key point here is that the second dmb isn't immediately followed by the ; simple_ver basic block, which LLVM attempted to do when DMB had been marked ; with isBarrier. For now, look for something that looks like "somewhere". -; CHECK-NEXT: {{mov|bx}} somewhere: %combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver] ret i32 %combined diff --git a/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll index 433fb325a7349..4de689170cffe 100644 --- a/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll @@ -78,7 +78,7 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; CHECK-NEXT: ldrexd r0, r1, [r12] ; CHECK-NEXT: adds r6, r0, #1 ; CHECK-NEXT: adc r7, r1, #0 -; CHECK-NEXT: subs r4, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: sbcs r4, r1, r3 ; CHECK-NEXT: movwhs r7, #0 ; CHECK-NEXT: movwhs r6, #0 @@ -173,7 +173,7 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrexd r4, r5, [r0] ; CHECK-NEXT: mov r12, #0 -; CHECK-NEXT: subs r1, r2, r4 +; CHECK-NEXT: cmp r2, r4 ; CHECK-NEXT: sbcs r1, r3, r5 ; CHECK-NEXT: orr r1, r4, r5 ; CHECK-NEXT: clz r1, r1 diff --git a/llvm/test/CodeGen/ARM/carry.ll b/llvm/test/CodeGen/ARM/carry.ll index 558e2b0e43f7d..a336dabf601e1 100644 --- a/llvm/test/CodeGen/ARM/carry.ll +++ b/llvm/test/CodeGen/ARM/carry.ll @@ -1,9 +1,12 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=armv6t2-eabi %s -o - | FileCheck %s define i64 @f1(i64 %a, i64 %b) { ; CHECK-LABEL: f1: -; CHECK: subs r -; CHECK: sbc r +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, r0, r2 +; CHECK-NEXT: sbc r1, r1, r3 +; CHECK-NEXT: bx lr entry: %tmp = sub i64 %a, %b ret i64 %tmp @@ -11,10 +14,12 @@ entry: define i64 @f2(i64 %a, i64 %b) { ; CHECK-LABEL: f2: -; CHECK: lsl r -; CHECK: orr r -; CHECK: rsbs r -; CHECK: sbc r +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsl r1, r1, #1 +; CHECK-NEXT: orr r1, r1, r0, lsr #31 +; CHECK-NEXT: rsbs r0, r2, r0, lsl #1 +; CHECK-NEXT: sbc r1, r1, r3 +; CHECK-NEXT: bx lr entry: %tmp1 = shl i64 %a, 1 %tmp2 = sub i64 %tmp1, %b @@ -24,8 +29,12 @@ entry: ; add with live carry define i64 @f3(i32 %al, i32 %bl) { ; CHECK-LABEL: f3: -; CHECK: adds r -; CHECK: adc r +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: cmn r0, r1 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: adcs r0, r1, #0 +; CHECK-NEXT: adc r1, r2, #0 +; CHECK-NEXT: bx lr entry: ; unsigned wide add %aw = zext i32 %al to i64 @@ -39,20 +48,34 @@ entry: ; rdar://10073745 define i64 @f4(i64 %x) nounwind readnone { -entry: ; CHECK-LABEL: f4: -; CHECK: rsbs r -; CHECK: rsc r +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: rsc r1, r1, #0 +; CHECK-NEXT: bx lr +entry: %0 = sub nsw i64 0, %x ret i64 %0 } ; rdar://12559385 define i64 @f5(i32 %vi) { -entry: ; CHECK-LABEL: f5: -; CHECK: movw [[REG:r[0-9]+]], #36102 -; CHECK: sbc r{{[0-9]+}}, r{{[0-9]+}}, [[REG]] +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r1, #19493 +; CHECK-NEXT: movw r2, #29433 +; CHECK-NEXT: movt r1, #57191 +; CHECK-NEXT: eor r0, r0, r1 +; CHECK-NEXT: movw r3, #46043 +; CHECK-NEXT: movt r2, #65535 +; CHECK-NEXT: adds r0, r0, r0 +; CHECK-NEXT: movw r1, #36102 +; CHECK-NEXT: sbc r2, r2, r1 +; CHECK-NEXT: movt r3, #8344 +; CHECK-NEXT: adds r0, r0, r3 +; CHECK-NEXT: adc r1, r2, r1 +; CHECK-NEXT: bx lr +entry: %v0 = zext i32 %vi to i64 %v1 = xor i64 %v0, -155057456198619 %v4 = add i64 %v1, 155057456198619 diff --git a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll index 16b7403bdb932..bc9457f2c0892 100644 --- a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll +++ b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll @@ -519,19 +519,19 @@ define i32 @icmp64_ule_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV7M-NEXT: ldrd lr, r0, [sp, #8] ; CHECKV7M-NEXT: beq .LBB5_2 ; CHECKV7M-NEXT: @ %bb.1: @ %then -; CHECKV7M-NEXT: subs r2, r2, r4 +; CHECKV7M-NEXT: cmp r2, r4 ; CHECKV7M-NEXT: sbcs r2, r3, #0 ; CHECKV7M-NEXT: mov r2, r0 ; CHECKV7M-NEXT: it lo ; CHECKV7M-NEXT: movlo r2, lr -; CHECKV7M-NEXT: subs.w r3, r12, r4 +; CHECKV7M-NEXT: cmp r12, r4 ; CHECKV7M-NEXT: sbcs r1, r1, #0 ; CHECKV7M-NEXT: it lo ; CHECKV7M-NEXT: movlo r0, lr ; CHECKV7M-NEXT: add r0, r2 ; CHECKV7M-NEXT: pop {r4, pc} ; CHECKV7M-NEXT: .LBB5_2: @ %else -; CHECKV7M-NEXT: subs r1, r2, r4 +; CHECKV7M-NEXT: cmp r2, r4 ; CHECKV7M-NEXT: sbcs r1, r3, #0 ; CHECKV7M-NEXT: it lo ; CHECKV7M-NEXT: movlo r0, lr @@ -549,19 +549,19 @@ define i32 @icmp64_ule_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV7A-NEXT: movt r4, #2 ; CHECKV7A-NEXT: beq .LBB5_2 ; CHECKV7A-NEXT: @ %bb.1: @ %then -; CHECKV7A-NEXT: subs r2, r2, r4 +; CHECKV7A-NEXT: cmp r2, r4 ; CHECKV7A-NEXT: sbcs r2, r3, #0 ; CHECKV7A-NEXT: mov r2, r0 ; CHECKV7A-NEXT: it lo ; CHECKV7A-NEXT: movlo r2, lr -; CHECKV7A-NEXT: subs.w r3, r12, r4 +; CHECKV7A-NEXT: cmp r12, r4 ; CHECKV7A-NEXT: sbcs r1, r1, #0 ; CHECKV7A-NEXT: it lo ; CHECKV7A-NEXT: movlo r0, lr ; CHECKV7A-NEXT: add r0, r2 ; CHECKV7A-NEXT: pop {r4, pc} ; CHECKV7A-NEXT: .LBB5_2: @ %else -; CHECKV7A-NEXT: subs r1, r2, r4 +; CHECKV7A-NEXT: cmp r2, r4 ; CHECKV7A-NEXT: sbcs r1, r3, #0 ; CHECKV7A-NEXT: it lo ; CHECKV7A-NEXT: movlo r0, lr @@ -739,13 +739,13 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV7M-NEXT: ldrd lr, r0, [sp, #16] ; CHECKV7M-NEXT: beq .LBB7_2 ; CHECKV7M-NEXT: @ %bb.1: @ %then -; CHECKV7M-NEXT: subs r2, r4, r2 +; CHECKV7M-NEXT: cmp r4, r2 ; CHECKV7M-NEXT: mov.w r5, #0 ; CHECKV7M-NEXT: sbcs.w r2, r5, r3 ; CHECKV7M-NEXT: mov r2, r0 ; CHECKV7M-NEXT: it lo ; CHECKV7M-NEXT: movlo r2, lr -; CHECKV7M-NEXT: subs.w r3, r4, r12 +; CHECKV7M-NEXT: cmp r4, r12 ; CHECKV7M-NEXT: sbcs.w r1, r5, r1 ; CHECKV7M-NEXT: it lo ; CHECKV7M-NEXT: movlo r0, lr @@ -753,7 +753,7 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV7M-NEXT: pop {r4, r5, r7, pc} ; CHECKV7M-NEXT: .LBB7_2: @ %else ; CHECKV7M-NEXT: movs r1, #0 -; CHECKV7M-NEXT: subs r2, r4, r2 +; CHECKV7M-NEXT: cmp r4, r2 ; CHECKV7M-NEXT: sbcs r1, r3 ; CHECKV7M-NEXT: it lo ; CHECKV7M-NEXT: movlo r0, lr @@ -771,13 +771,13 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV7A-NEXT: movt r4, #2 ; CHECKV7A-NEXT: beq .LBB7_2 ; CHECKV7A-NEXT: @ %bb.1: @ %then -; CHECKV7A-NEXT: subs r2, r4, r2 +; CHECKV7A-NEXT: cmp r4, r2 ; CHECKV7A-NEXT: mov.w r5, #0 ; CHECKV7A-NEXT: sbcs.w r2, r5, r3 ; CHECKV7A-NEXT: mov r2, r0 ; CHECKV7A-NEXT: it lo ; CHECKV7A-NEXT: movlo r2, lr -; CHECKV7A-NEXT: subs.w r3, r4, r12 +; CHECKV7A-NEXT: cmp r4, r12 ; CHECKV7A-NEXT: sbcs.w r1, r5, r1 ; CHECKV7A-NEXT: it lo ; CHECKV7A-NEXT: movlo r0, lr @@ -785,7 +785,7 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV7A-NEXT: pop {r4, r5, r7, pc} ; CHECKV7A-NEXT: .LBB7_2: @ %else ; CHECKV7A-NEXT: movs r1, #0 -; CHECKV7A-NEXT: subs r2, r4, r2 +; CHECKV7A-NEXT: cmp r4, r2 ; CHECKV7A-NEXT: sbcs r1, r3 ; CHECKV7A-NEXT: it lo ; CHECKV7A-NEXT: movlo r0, lr diff --git a/llvm/test/CodeGen/ARM/dagcombine-drop-flags-freeze.ll b/llvm/test/CodeGen/ARM/dagcombine-drop-flags-freeze.ll index 63e16b03caee6..36e4080778361 100644 --- a/llvm/test/CodeGen/ARM/dagcombine-drop-flags-freeze.ll +++ b/llvm/test/CodeGen/ARM/dagcombine-drop-flags-freeze.ll @@ -7,7 +7,7 @@ define i1 @drop_flags(i32 noundef %numentries, i64 %cond, i64 %arg) { ; CHECK-LABEL: drop_flags: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: ldm sp, {r1, r12} -; CHECK-NEXT: subs r1, r2, r1 +; CHECK-NEXT: cmp r2, r1 ; CHECK-NEXT: sbcs r1, r3, r12 ; CHECK-NEXT: movlo r0, r2 ; CHECK-NEXT: rsbs r1, r0, #0 diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index 8ab56b228d2a7..39da4a32a1c5c 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -46,7 +46,7 @@ define i32 @stest_f64i32(double %x) { ; VFP2-NEXT: vmov r0, r1, d0 ; VFP2-NEXT: bl __aeabi_d2lz ; VFP2-NEXT: mvn r12, #-2147483648 -; VFP2-NEXT: subs.w r3, r0, r12 +; VFP2-NEXT: cmp r0, r12 ; VFP2-NEXT: mov.w r2, #0 ; VFP2-NEXT: sbcs r3, r1, #0 ; VFP2-NEXT: it lt @@ -98,7 +98,7 @@ define i32 @utest_f64i32(double %x) { ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, r1, d0 ; VFP2-NEXT: bl __aeabi_d2ulz -; VFP2-NEXT: subs.w r2, r0, #-1 +; VFP2-NEXT: cmp.w r0, #-1 ; VFP2-NEXT: sbcs r1, r1, #0 ; VFP2-NEXT: it hs ; VFP2-NEXT: movhs.w r0, #-1 @@ -159,7 +159,7 @@ define i32 @ustest_f64i32(double %x) { ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, r1, d0 ; VFP2-NEXT: bl __aeabi_d2lz -; VFP2-NEXT: subs.w r3, r0, #-1 +; VFP2-NEXT: cmp.w r0, #-1 ; VFP2-NEXT: mov.w r2, #0 ; VFP2-NEXT: sbcs r3, r1, #0 ; VFP2-NEXT: mov.w r3, #0 @@ -1010,7 +1010,7 @@ define i64 @stest_f64i64(double %x) { ; VFP2-NEXT: .save {r4, r5, r7, lr} ; VFP2-NEXT: push {r4, r5, r7, lr} ; VFP2-NEXT: bl __fixdfti -; VFP2-NEXT: subs.w r4, r0, #-1 +; VFP2-NEXT: cmp.w r0, #-1 ; VFP2-NEXT: mvn lr, #-2147483648 ; VFP2-NEXT: sbcs.w r4, r1, lr ; VFP2-NEXT: mov.w r12, #0 @@ -1042,7 +1042,7 @@ define i64 @stest_f64i64(double %x) { ; FULL-NEXT: .save {r4, r5, r7, lr} ; FULL-NEXT: push {r4, r5, r7, lr} ; FULL-NEXT: bl __fixdfti -; FULL-NEXT: subs.w lr, r0, #-1 +; FULL-NEXT: cmp.w r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 ; FULL-NEXT: sbcs.w lr, r1, r12 ; FULL-NEXT: sbcs lr, r2, #0 @@ -1099,7 +1099,7 @@ define i64 @utest_f64i64(double %x) { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixunsdfti -; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: cmp r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: itt hs @@ -1112,7 +1112,7 @@ define i64 @utest_f64i64(double %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixunsdfti -; FULL-NEXT: subs r2, #1 +; FULL-NEXT: cmp r2, #1 ; FULL-NEXT: mov.w r12, #0 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: csel r0, r0, r12, lo @@ -1161,7 +1161,7 @@ define i64 @ustest_f64i64(double %x) { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixdfti -; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: cmp r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: itt ge @@ -1178,7 +1178,7 @@ define i64 @ustest_f64i64(double %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixdfti -; FULL-NEXT: subs r2, #1 +; FULL-NEXT: cmp r2, #1 ; FULL-NEXT: mov.w r12, #0 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: csel r2, r3, r12, lt @@ -1266,7 +1266,7 @@ define i64 @stest_f32i64(float %x) { ; VFP2-NEXT: .save {r4, r5, r7, lr} ; VFP2-NEXT: push {r4, r5, r7, lr} ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs.w r4, r0, #-1 +; VFP2-NEXT: cmp.w r0, #-1 ; VFP2-NEXT: mvn lr, #-2147483648 ; VFP2-NEXT: sbcs.w r4, r1, lr ; VFP2-NEXT: mov.w r12, #0 @@ -1298,7 +1298,7 @@ define i64 @stest_f32i64(float %x) { ; FULL-NEXT: .save {r4, r5, r7, lr} ; FULL-NEXT: push {r4, r5, r7, lr} ; FULL-NEXT: bl __fixsfti -; FULL-NEXT: subs.w lr, r0, #-1 +; FULL-NEXT: cmp.w r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 ; FULL-NEXT: sbcs.w lr, r1, r12 ; FULL-NEXT: sbcs lr, r2, #0 @@ -1355,7 +1355,7 @@ define i64 @utest_f32i64(float %x) { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixunssfti -; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: cmp r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: itt hs @@ -1368,7 +1368,7 @@ define i64 @utest_f32i64(float %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixunssfti -; FULL-NEXT: subs r2, #1 +; FULL-NEXT: cmp r2, #1 ; FULL-NEXT: mov.w r12, #0 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: csel r0, r0, r12, lo @@ -1417,7 +1417,7 @@ define i64 @ustest_f32i64(float %x) { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: cmp r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: itt ge @@ -1434,7 +1434,7 @@ define i64 @ustest_f32i64(float %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixsfti -; FULL-NEXT: subs r2, #1 +; FULL-NEXT: cmp r2, #1 ; FULL-NEXT: mov.w r12, #0 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: csel r2, r3, r12, lt @@ -1527,7 +1527,7 @@ define i64 @stest_f16i64(half %x) { ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs.w r4, r0, #-1 +; VFP2-NEXT: cmp.w r0, #-1 ; VFP2-NEXT: mvn lr, #-2147483648 ; VFP2-NEXT: sbcs.w r4, r1, lr ; VFP2-NEXT: mov.w r12, #0 @@ -1561,7 +1561,7 @@ define i64 @stest_f16i64(half %x) { ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixhfti -; FULL-NEXT: subs.w lr, r0, #-1 +; FULL-NEXT: cmp.w r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 ; FULL-NEXT: sbcs.w lr, r1, r12 ; FULL-NEXT: sbcs lr, r2, #0 @@ -1623,7 +1623,7 @@ define i64 @utesth_f16i64(half %x) { ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: bl __fixunssfti -; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: cmp r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: itt hs @@ -1638,7 +1638,7 @@ define i64 @utesth_f16i64(half %x) { ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixunshfti -; FULL-NEXT: subs r2, #1 +; FULL-NEXT: cmp r2, #1 ; FULL-NEXT: mov.w r12, #0 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: csel r0, r0, r12, lo @@ -1692,7 +1692,7 @@ define i64 @ustest_f16i64(half %x) { ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: cmp r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: itt ge @@ -1711,7 +1711,7 @@ define i64 @ustest_f16i64(half %x) { ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixhfti -; FULL-NEXT: subs r2, #1 +; FULL-NEXT: cmp r2, #1 ; FULL-NEXT: mov.w r12, #0 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: csel r2, r3, r12, lt @@ -1787,7 +1787,7 @@ define i32 @stest_f64i32_mm(double %x) { ; VFP2-NEXT: vmov r0, r1, d0 ; VFP2-NEXT: bl __aeabi_d2lz ; VFP2-NEXT: mvn r2, #-2147483648 -; VFP2-NEXT: subs r3, r0, r2 +; VFP2-NEXT: cmp r0, r2 ; VFP2-NEXT: sbcs r3, r1, #0 ; VFP2-NEXT: it ge ; VFP2-NEXT: movge r0, r2 @@ -1884,7 +1884,7 @@ define i32 @ustest_f64i32_mm(double %x) { ; VFP2-NEXT: cmp r1, #1 ; VFP2-NEXT: it ge ; VFP2-NEXT: movge.w r0, #-1 -; VFP2-NEXT: ands.w r1, r1, r1, asr #31 +; VFP2-NEXT: tst.w r1, r1, asr #31 ; VFP2-NEXT: it mi ; VFP2-NEXT: movmi r0, #0 ; VFP2-NEXT: pop {r7, pc} @@ -2676,7 +2676,7 @@ define i64 @stest_f64i64_mm(double %x) { ; VFP2-NEXT: .save {r4, r5, r7, lr} ; VFP2-NEXT: push {r4, r5, r7, lr} ; VFP2-NEXT: bl __fixdfti -; VFP2-NEXT: subs.w r4, r0, #-1 +; VFP2-NEXT: cmp.w r0, #-1 ; VFP2-NEXT: mvn lr, #-2147483648 ; VFP2-NEXT: sbcs.w r4, r1, lr ; VFP2-NEXT: mov.w r12, #0 @@ -2711,7 +2711,7 @@ define i64 @stest_f64i64_mm(double %x) { ; FULL-NEXT: .save {r4, r5, r7, lr} ; FULL-NEXT: push {r4, r5, r7, lr} ; FULL-NEXT: bl __fixdfti -; FULL-NEXT: subs.w lr, r0, #-1 +; FULL-NEXT: cmp.w r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 ; FULL-NEXT: sbcs.w lr, r1, r12 ; FULL-NEXT: sbcs lr, r2, #0 @@ -2774,7 +2774,7 @@ define i64 @utest_f64i64_mm(double %x) { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixunsdfti -; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: cmp r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: it lo @@ -2790,7 +2790,7 @@ define i64 @utest_f64i64_mm(double %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixunsdfti -; FULL-NEXT: subs r2, #1 +; FULL-NEXT: cmp r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lo ; FULL-NEXT: cmp r2, #0 @@ -2859,7 +2859,7 @@ define i64 @ustest_f64i64_mm(double %x) { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixdfti -; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: cmp r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: it lt @@ -2880,7 +2880,7 @@ define i64 @ustest_f64i64_mm(double %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixdfti -; FULL-NEXT: subs r2, #1 +; FULL-NEXT: cmp r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 @@ -2989,7 +2989,7 @@ define i64 @stest_f32i64_mm(float %x) { ; VFP2-NEXT: .save {r4, r5, r7, lr} ; VFP2-NEXT: push {r4, r5, r7, lr} ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs.w r4, r0, #-1 +; VFP2-NEXT: cmp.w r0, #-1 ; VFP2-NEXT: mvn lr, #-2147483648 ; VFP2-NEXT: sbcs.w r4, r1, lr ; VFP2-NEXT: mov.w r12, #0 @@ -3024,7 +3024,7 @@ define i64 @stest_f32i64_mm(float %x) { ; FULL-NEXT: .save {r4, r5, r7, lr} ; FULL-NEXT: push {r4, r5, r7, lr} ; FULL-NEXT: bl __fixsfti -; FULL-NEXT: subs.w lr, r0, #-1 +; FULL-NEXT: cmp.w r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 ; FULL-NEXT: sbcs.w lr, r1, r12 ; FULL-NEXT: sbcs lr, r2, #0 @@ -3087,7 +3087,7 @@ define i64 @utest_f32i64_mm(float %x) { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixunssfti -; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: cmp r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: it lo @@ -3103,7 +3103,7 @@ define i64 @utest_f32i64_mm(float %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixunssfti -; FULL-NEXT: subs r2, #1 +; FULL-NEXT: cmp r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lo ; FULL-NEXT: cmp r2, #0 @@ -3172,7 +3172,7 @@ define i64 @ustest_f32i64_mm(float %x) { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: cmp r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: it lt @@ -3193,7 +3193,7 @@ define i64 @ustest_f32i64_mm(float %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixsfti -; FULL-NEXT: subs r2, #1 +; FULL-NEXT: cmp r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 @@ -3307,7 +3307,7 @@ define i64 @stest_f16i64_mm(half %x) { ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs.w r4, r0, #-1 +; VFP2-NEXT: cmp.w r0, #-1 ; VFP2-NEXT: mvn lr, #-2147483648 ; VFP2-NEXT: sbcs.w r4, r1, lr ; VFP2-NEXT: mov.w r12, #0 @@ -3344,7 +3344,7 @@ define i64 @stest_f16i64_mm(half %x) { ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixhfti -; FULL-NEXT: subs.w lr, r0, #-1 +; FULL-NEXT: cmp.w r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 ; FULL-NEXT: sbcs.w lr, r1, r12 ; FULL-NEXT: sbcs lr, r2, #0 @@ -3412,7 +3412,7 @@ define i64 @utesth_f16i64_mm(half %x) { ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: bl __fixunssfti -; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: cmp r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: it lo @@ -3430,7 +3430,7 @@ define i64 @utesth_f16i64_mm(half %x) { ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixunshfti -; FULL-NEXT: subs r2, #1 +; FULL-NEXT: cmp r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lo ; FULL-NEXT: cmp r2, #0 @@ -3504,7 +3504,7 @@ define i64 @ustest_f16i64_mm(half %x) { ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: bl __fixsfti -; VFP2-NEXT: subs r2, #1 +; VFP2-NEXT: cmp r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 ; VFP2-NEXT: it lt @@ -3527,7 +3527,7 @@ define i64 @ustest_f16i64_mm(half %x) { ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixhfti -; FULL-NEXT: subs r2, #1 +; FULL-NEXT: cmp r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 @@ -3949,7 +3949,7 @@ define i32 @stest_f32i32i64(float %x) { ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_f2lz ; VFP2-NEXT: movw r2, #32767 -; VFP2-NEXT: subs r3, r0, r2 +; VFP2-NEXT: cmp r0, r2 ; VFP2-NEXT: sbcs r1, r1, #0 ; VFP2-NEXT: it ge ; VFP2-NEXT: movge r0, r2 @@ -3967,7 +3967,7 @@ define i32 @stest_f32i32i64(float %x) { ; FULL-NEXT: vmov r0, s0 ; FULL-NEXT: bl __aeabi_f2lz ; FULL-NEXT: movw r2, #32767 -; FULL-NEXT: subs r3, r0, r2 +; FULL-NEXT: cmp r0, r2 ; FULL-NEXT: sbcs r1, r1, #0 ; FULL-NEXT: csel r0, r0, r2, lt ; FULL-NEXT: movw r1, #32768 diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll index 96f009a4da02d..1d341c101d77a 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -21,7 +21,7 @@ define <2 x i32> @stest_f64i32(<2 x double> %x) { ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vmov.32 d8[0], r0 ; CHECK-NEXT: mvn r3, #-2147483648 -; CHECK-NEXT: subs r4, r4, r3 +; CHECK-NEXT: cmp r4, r3 ; CHECK-NEXT: adr r2, .LCPI0_0 ; CHECK-NEXT: vmov.32 d9[1], r5 ; CHECK-NEXT: sbcs r5, r5, #0 @@ -30,7 +30,7 @@ define <2 x i32> @stest_f64i32(<2 x double> %x) { ; CHECK-NEXT: movwlt r5, #1 ; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: mvnne r5, #0 -; CHECK-NEXT: subs r0, r0, r3 +; CHECK-NEXT: cmp r0, r3 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: vmov.32 d8[1], r1 ; CHECK-NEXT: mov r0, #0 @@ -95,15 +95,15 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) { ; CHECK-NEXT: vmov.32 d9[0], r4 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: mvn r3, #0 -; CHECK-NEXT: subs r4, r4, r3 +; CHECK-NEXT: cmp r4, r3 ; CHECK-NEXT: sbcs r5, r5, #0 -; CHECK-NEXT: vmov.32 d8[0], r0 -; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: vmov.32 d8[0], r0 ; CHECK-NEXT: movwlo r5, #1 ; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: mvnne r5, #0 -; CHECK-NEXT: subs r0, r0, r3 +; CHECK-NEXT: cmp r0, r3 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: vdup.32 d17, r5 ; CHECK-NEXT: movwlo r2, #1 @@ -140,7 +140,7 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vmov.32 d8[0], r0 ; CHECK-NEXT: mvn r3, #0 -; CHECK-NEXT: subs r4, r4, r3 +; CHECK-NEXT: cmp r4, r3 ; CHECK-NEXT: vmov.i64 q9, #0xffffffff ; CHECK-NEXT: vmov.32 d9[1], r5 ; CHECK-NEXT: sbcs r5, r5, #0 @@ -149,7 +149,7 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-NEXT: movwlt r5, #1 ; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: mvnne r5, #0 -; CHECK-NEXT: subs r0, r0, r3 +; CHECK-NEXT: cmp r0, r3 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: vmov.32 d8[1], r1 ; CHECK-NEXT: mov r0, #0 @@ -210,7 +210,7 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-NEXT: mvn r9, #-2147483648 ; CHECK-NEXT: vmov.32 d13[0], r6 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r6, r9 +; CHECK-NEXT: cmp r6, r9 ; CHECK-NEXT: vmov.32 d12[0], r0 ; CHECK-NEXT: sbcs r2, r7, #0 ; CHECK-NEXT: vmov r8, s16 @@ -219,7 +219,7 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-NEXT: movwlt r2, #1 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: subs r0, r0, r9 +; CHECK-NEXT: cmp r0, r9 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: vdup.32 d17, r2 ; CHECK-NEXT: mov r0, #0 @@ -233,7 +233,7 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-NEXT: vbsl q4, q6, q5 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vmov.32 d13[0], r0 -; CHECK-NEXT: subs r0, r0, r9 +; CHECK-NEXT: cmp r0, r9 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: movwlt r6, #1 @@ -244,8 +244,8 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-NEXT: mvnne r6, #0 ; CHECK-NEXT: vmov r5, r7, d9 ; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: cmp r0, r9 ; CHECK-NEXT: vmov.32 d12[0], r0 -; CHECK-NEXT: subs r0, r0, r9 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: vdup.32 d17, r6 @@ -344,21 +344,21 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: mvn r3, #0 +; CHECK-NEXT: cmp r0, r3 ; CHECK-NEXT: vmov.32 d10[0], r0 -; CHECK-NEXT: subs r0, r0, r3 -; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movwlo r0, #1 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: subs r1, r5, r3 +; CHECK-NEXT: cmp r5, r3 ; CHECK-NEXT: sbcs r1, r4, #0 ; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: movwlo r1, #1 ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: mvnne r1, #0 -; CHECK-NEXT: subs r7, r10, r3 +; CHECK-NEXT: cmp r10, r3 ; CHECK-NEXT: sbcs r7, r8, #0 ; CHECK-NEXT: vdup.32 d19, r1 ; CHECK-NEXT: mov r7, #0 @@ -366,7 +366,7 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-NEXT: movwlo r7, #1 ; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: mvnne r7, #0 -; CHECK-NEXT: subs r3, r6, r3 +; CHECK-NEXT: cmp r6, r3 ; CHECK-NEXT: sbcs r3, r9, #0 ; CHECK-NEXT: vdup.32 d17, r7 ; CHECK-NEXT: movwlo r2, #1 @@ -407,7 +407,7 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-NEXT: vmov.32 d9[0], r6 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mvn r9, #0 -; CHECK-NEXT: subs r2, r6, r9 +; CHECK-NEXT: cmp r6, r9 ; CHECK-NEXT: sbcs r2, r7, #0 ; CHECK-NEXT: vmov.32 d8[0], r0 ; CHECK-NEXT: mov r2, #0 @@ -415,7 +415,7 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-NEXT: movwlt r2, #1 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: subs r0, r0, r9 +; CHECK-NEXT: cmp r0, r9 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: vmov.32 d9[1], r7 ; CHECK-NEXT: mov r0, #0 @@ -435,7 +435,7 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vmov r7, r10, d8 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r5, r9 +; CHECK-NEXT: cmp r5, r9 ; CHECK-NEXT: vmov.32 d12[0], r0 ; CHECK-NEXT: sbcs r2, r6, #0 ; CHECK-NEXT: mov r2, #0 @@ -443,7 +443,7 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-NEXT: movwlt r2, #1 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: subs r0, r0, r9 +; CHECK-NEXT: cmp r0, r9 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: vdup.32 d17, r2 ; CHECK-NEXT: mov r0, #0 @@ -521,25 +521,25 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: vmov r2, s18 ; CHECK-NEON-NEXT: adr r3, .LCPI6_0 -; CHECK-NEON-NEXT: vld1.64 {d8, d9}, [r3:128] ; CHECK-NEON-NEXT: mvn r9, #-2147483648 -; CHECK-NEON-NEXT: subs r3, r6, r9 -; CHECK-NEON-NEXT: mov r4, #0 +; CHECK-NEON-NEXT: cmp r6, r9 +; CHECK-NEON-NEXT: vld1.64 {d8, d9}, [r3:128] ; CHECK-NEON-NEXT: sbcs r3, r5, #0 -; CHECK-NEON-NEXT: vmov.32 d15[0], r0 +; CHECK-NEON-NEXT: mov r4, #0 +; CHECK-NEON-NEXT: vmov r8, s20 ; CHECK-NEON-NEXT: movwlt r4, #1 ; CHECK-NEON-NEXT: cmp r4, #0 ; CHECK-NEON-NEXT: mvnne r4, #0 -; CHECK-NEON-NEXT: subs r0, r0, r9 +; CHECK-NEON-NEXT: cmp r0, r9 +; CHECK-NEON-NEXT: vmov.32 d15[0], r0 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 -; CHECK-NEON-NEXT: vmov.32 d14[0], r6 ; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: vmov r8, s20 +; CHECK-NEON-NEXT: mov r7, #0 ; CHECK-NEON-NEXT: movwlt r0, #1 ; CHECK-NEON-NEXT: cmp r0, #0 +; CHECK-NEON-NEXT: vmov.32 d14[0], r6 ; CHECK-NEON-NEXT: mvnne r0, #0 ; CHECK-NEON-NEXT: vmov.32 d15[1], r1 -; CHECK-NEON-NEXT: mov r7, #0 ; CHECK-NEON-NEXT: vdup.32 d11, r0 ; CHECK-NEON-NEXT: vmov.32 d14[1], r5 ; CHECK-NEON-NEXT: mov r0, r2 @@ -547,7 +547,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: vdup.32 d10, r4 ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: vmov.32 d13[0], r0 -; CHECK-NEON-NEXT: subs r0, r0, r9 +; CHECK-NEON-NEXT: cmp r0, r9 ; CHECK-NEON-NEXT: vbsl q5, q7, q4 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 ; CHECK-NEON-NEXT: mov r6, #0 @@ -560,8 +560,8 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: vmov r5, r4, d11 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz +; CHECK-NEON-NEXT: cmp r0, r9 ; CHECK-NEON-NEXT: vmov.32 d12[0], r0 -; CHECK-NEON-NEXT: subs r0, r0, r9 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 ; CHECK-NEON-NEXT: mov r0, #0 ; CHECK-NEON-NEXT: vdup.32 d17, r6 @@ -643,8 +643,8 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: adr r2, .LCPI6_0 ; CHECK-FP16-NEXT: mvn r10, #-2147483648 +; CHECK-FP16-NEXT: cmp r4, r10 ; CHECK-FP16-NEXT: vld1.64 {d10, d11}, [r2:128] -; CHECK-FP16-NEXT: subs r2, r4, r10 ; CHECK-FP16-NEXT: sbcs r2, r5, #0 ; CHECK-FP16-NEXT: vmov s0, r9 ; CHECK-FP16-NEXT: mov r2, #0 @@ -652,7 +652,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-FP16-NEXT: movwlt r2, #1 ; CHECK-FP16-NEXT: cmp r2, #0 ; CHECK-FP16-NEXT: mvnne r2, #0 -; CHECK-FP16-NEXT: subs r0, r0, r10 +; CHECK-FP16-NEXT: cmp r0, r10 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 ; CHECK-FP16-NEXT: vmov.32 d9[1], r5 ; CHECK-FP16-NEXT: mov r0, #0 @@ -666,7 +666,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-FP16-NEXT: vbif q4, q5, q8 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: vmov.32 d13[0], r0 -; CHECK-FP16-NEXT: subs r0, r0, r10 +; CHECK-FP16-NEXT: cmp r0, r10 ; CHECK-FP16-NEXT: vmov s0, r8 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 ; CHECK-FP16-NEXT: mov r7, #0 @@ -677,8 +677,8 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-FP16-NEXT: vmov r5, r4, d9 ; CHECK-FP16-NEXT: mvnne r7, #0 ; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: cmp r0, r10 ; CHECK-FP16-NEXT: vmov.32 d12[0], r0 -; CHECK-FP16-NEXT: subs r0, r0, r10 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 ; CHECK-FP16-NEXT: mov r0, #0 ; CHECK-FP16-NEXT: vdup.32 d17, r7 @@ -782,21 +782,21 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: vmov.32 d9[0], r4 ; CHECK-NEON-NEXT: bl __aeabi_f2ulz ; CHECK-NEON-NEXT: mvn r3, #0 +; CHECK-NEON-NEXT: cmp r0, r3 ; CHECK-NEON-NEXT: vmov.32 d8[0], r0 -; CHECK-NEON-NEXT: subs r0, r0, r3 -; CHECK-NEON-NEXT: mov r2, #0 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 ; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: mov r2, #0 ; CHECK-NEON-NEXT: movwlo r0, #1 ; CHECK-NEON-NEXT: cmp r0, #0 ; CHECK-NEON-NEXT: mvnne r0, #0 -; CHECK-NEON-NEXT: subs r1, r4, r3 +; CHECK-NEON-NEXT: cmp r4, r3 ; CHECK-NEON-NEXT: sbcs r1, r8, #0 ; CHECK-NEON-NEXT: mov r1, #0 ; CHECK-NEON-NEXT: movwlo r1, #1 ; CHECK-NEON-NEXT: cmp r1, #0 ; CHECK-NEON-NEXT: mvnne r1, #0 -; CHECK-NEON-NEXT: subs r6, r6, r3 +; CHECK-NEON-NEXT: cmp r6, r3 ; CHECK-NEON-NEXT: sbcs r6, r9, #0 ; CHECK-NEON-NEXT: vdup.32 d19, r1 ; CHECK-NEON-NEXT: mov r6, #0 @@ -804,7 +804,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: movwlo r6, #1 ; CHECK-NEON-NEXT: cmp r6, #0 ; CHECK-NEON-NEXT: mvnne r6, #0 -; CHECK-NEON-NEXT: subs r3, r5, r3 +; CHECK-NEON-NEXT: cmp r5, r3 ; CHECK-NEON-NEXT: sbcs r3, r7, #0 ; CHECK-NEON-NEXT: vdup.32 d17, r6 ; CHECK-NEON-NEXT: movwlo r2, #1 @@ -850,21 +850,21 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixunshfdi ; CHECK-FP16-NEXT: mvn r3, #0 +; CHECK-FP16-NEXT: cmp r0, r3 ; CHECK-FP16-NEXT: vmov.32 d8[0], r0 -; CHECK-FP16-NEXT: subs r0, r0, r3 -; CHECK-FP16-NEXT: mov r2, #0 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 ; CHECK-FP16-NEXT: mov r0, #0 +; CHECK-FP16-NEXT: mov r2, #0 ; CHECK-FP16-NEXT: movwlo r0, #1 ; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: mvnne r0, #0 -; CHECK-FP16-NEXT: subs r1, r5, r3 +; CHECK-FP16-NEXT: cmp r5, r3 ; CHECK-FP16-NEXT: sbcs r1, r7, #0 ; CHECK-FP16-NEXT: mov r1, #0 ; CHECK-FP16-NEXT: movwlo r1, #1 ; CHECK-FP16-NEXT: cmp r1, #0 ; CHECK-FP16-NEXT: mvnne r1, #0 -; CHECK-FP16-NEXT: subs r7, r4, r3 +; CHECK-FP16-NEXT: cmp r4, r3 ; CHECK-FP16-NEXT: sbcs r7, r8, #0 ; CHECK-FP16-NEXT: vdup.32 d19, r1 ; CHECK-FP16-NEXT: mov r7, #0 @@ -872,7 +872,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-FP16-NEXT: movwlo r7, #1 ; CHECK-FP16-NEXT: cmp r7, #0 ; CHECK-FP16-NEXT: mvnne r7, #0 -; CHECK-FP16-NEXT: subs r3, r6, r3 +; CHECK-FP16-NEXT: cmp r6, r3 ; CHECK-FP16-NEXT: sbcs r3, r9, #0 ; CHECK-FP16-NEXT: vdup.32 d17, r7 ; CHECK-FP16-NEXT: movwlo r2, #1 @@ -916,18 +916,18 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: vmov.32 d13[0], r5 ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: vmov r2, s18 -; CHECK-NEON-NEXT: vmov.32 d12[0], r0 ; CHECK-NEON-NEXT: mvn r9, #0 -; CHECK-NEON-NEXT: subs r0, r0, r9 +; CHECK-NEON-NEXT: cmp r0, r9 +; CHECK-NEON-NEXT: vmov.32 d12[0], r0 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 -; CHECK-NEON-NEXT: vmov.32 d13[1], r6 -; CHECK-NEON-NEXT: mov r0, #0 ; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: vmov.32 d13[1], r6 ; CHECK-NEON-NEXT: movwlt r0, #1 ; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: vmov.32 d12[1], r1 ; CHECK-NEON-NEXT: mvnne r0, #0 -; CHECK-NEON-NEXT: subs r1, r5, r9 +; CHECK-NEON-NEXT: cmp r5, r9 +; CHECK-NEON-NEXT: vmov.32 d12[1], r1 ; CHECK-NEON-NEXT: sbcs r1, r6, #0 ; CHECK-NEON-NEXT: mov r1, #0 ; CHECK-NEON-NEXT: movwlt r1, #1 @@ -947,7 +947,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: vmov r4, r10, d8 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: subs r2, r5, r9 +; CHECK-NEON-NEXT: cmp r5, r9 ; CHECK-NEON-NEXT: vmov.32 d12[0], r0 ; CHECK-NEON-NEXT: sbcs r2, r6, #0 ; CHECK-NEON-NEXT: mov r2, #0 @@ -955,7 +955,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: movwlt r2, #1 ; CHECK-NEON-NEXT: cmp r2, #0 ; CHECK-NEON-NEXT: mvnne r2, #0 -; CHECK-NEON-NEXT: subs r0, r0, r9 +; CHECK-NEON-NEXT: cmp r0, r9 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 ; CHECK-NEON-NEXT: vdup.32 d17, r2 ; CHECK-NEON-NEXT: mov r0, #0 @@ -1021,7 +1021,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: mvn r10, #0 -; CHECK-FP16-NEXT: subs r2, r4, r10 +; CHECK-FP16-NEXT: cmp r4, r10 ; CHECK-FP16-NEXT: sbcs r2, r5, #0 ; CHECK-FP16-NEXT: vmov.32 d8[0], r0 ; CHECK-FP16-NEXT: mov r2, #0 @@ -1029,7 +1029,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-FP16-NEXT: movwlt r2, #1 ; CHECK-FP16-NEXT: cmp r2, #0 ; CHECK-FP16-NEXT: mvnne r2, #0 -; CHECK-FP16-NEXT: subs r0, r0, r10 +; CHECK-FP16-NEXT: cmp r0, r10 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 ; CHECK-FP16-NEXT: vmov.32 d9[1], r5 ; CHECK-FP16-NEXT: mov r0, #0 @@ -1049,7 +1049,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-FP16-NEXT: vmov.32 d13[0], r0 ; CHECK-FP16-NEXT: vmov r7, r8, d8 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: subs r2, r4, r10 +; CHECK-FP16-NEXT: cmp r4, r10 ; CHECK-FP16-NEXT: vmov.32 d12[0], r0 ; CHECK-FP16-NEXT: sbcs r2, r5, #0 ; CHECK-FP16-NEXT: mov r2, #0 @@ -1057,7 +1057,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-FP16-NEXT: movwlt r2, #1 ; CHECK-FP16-NEXT: cmp r2, #0 ; CHECK-FP16-NEXT: mvnne r2, #0 -; CHECK-FP16-NEXT: subs r0, r0, r10 +; CHECK-FP16-NEXT: cmp r0, r10 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 ; CHECK-FP16-NEXT: vdup.32 d17, r2 ; CHECK-FP16-NEXT: mov r0, #0 @@ -1624,11 +1624,11 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: mvn r9, #0 -; CHECK-NEXT: subs r1, r0, r9 +; CHECK-NEXT: cmp r0, r9 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r4, r5 +; CHECK-NEXT: sbcs r1, r1, r5 ; CHECK-NEXT: vorr d0, d8, d8 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov r7, #0 @@ -1651,7 +1651,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NEXT: movne r7, r0 ; CHECK-NEXT: moveq r4, r8 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r6, r0, r9 +; CHECK-NEXT: cmp r0, r9 ; CHECK-NEXT: vmov.32 d1[0], r7 ; CHECK-NEXT: sbcs r6, r1, r5 ; CHECK-NEXT: sbcs r6, r2, #0 @@ -1696,9 +1696,9 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: subs r1, r2, #1 ; CHECK-NEXT: vorr d0, d8, d8 +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: sbcs r1, r3, #0 ; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: mov r5, #0 @@ -1707,7 +1707,7 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-NEXT: moveq r4, r6 ; CHECK-NEXT: movne r6, r0 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: subs r2, r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: vmov.32 d1[0], r6 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: movwlo r5, #1 @@ -1737,8 +1737,8 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixdfti +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: subs r1, r2, #1 ; CHECK-NEXT: sbcs r1, r3, #0 ; CHECK-NEXT: mov r8, #1 ; CHECK-NEXT: mov r1, #0 @@ -1760,7 +1760,7 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NEXT: moveq r4, r7 ; CHECK-NEXT: movne r7, r1 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r6, r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: vmov.32 d1[0], r7 ; CHECK-NEXT: sbcs r6, r3, #0 ; CHECK-NEXT: movlt r8, r2 @@ -1803,11 +1803,11 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NEXT: vmov.f64 d8, d0 ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: mvn r9, #0 -; CHECK-NEXT: subs r1, r0, r9 +; CHECK-NEXT: cmp r0, r9 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r4, r5 +; CHECK-NEXT: sbcs r1, r1, r5 ; CHECK-NEXT: vmov.f32 s0, s16 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov r7, #0 @@ -1830,7 +1830,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NEXT: movne r7, r0 ; CHECK-NEXT: moveq r4, r8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r6, r0, r9 +; CHECK-NEXT: cmp r0, r9 ; CHECK-NEXT: vmov.32 d1[0], r7 ; CHECK-NEXT: sbcs r6, r1, r5 ; CHECK-NEXT: sbcs r6, r2, #0 @@ -1876,17 +1876,17 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vmov.f32 s0, s16 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: subs r1, r2, #1 -; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: movwlo r6, #1 ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: moveq r4, r6 ; CHECK-NEXT: movne r6, r0 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: subs r2, r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: vmov.32 d1[0], r6 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: movwlo r5, #1 @@ -1916,8 +1916,8 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-NEXT: vmov.f64 d8, d0 ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: subs r1, r2, #1 ; CHECK-NEXT: sbcs r1, r3, #0 ; CHECK-NEXT: mov r8, #1 ; CHECK-NEXT: mov r1, #0 @@ -1939,7 +1939,7 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-NEXT: moveq r4, r7 ; CHECK-NEXT: movne r7, r1 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r6, r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: vmov.32 d1[0], r7 ; CHECK-NEXT: sbcs r6, r3, #0 ; CHECK-NEXT: movlt r8, r2 @@ -1987,11 +1987,11 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: mvn r9, #0 -; CHECK-NEON-NEXT: subs r1, r0, r9 +; CHECK-NEON-NEXT: cmp r0, r9 +; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: mvn r6, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r4, r6 +; CHECK-NEON-NEXT: sbcs r1, r1, r6 ; CHECK-NEON-NEXT: vmov s0, r8 ; CHECK-NEON-NEXT: sbcs r1, r2, #0 ; CHECK-NEON-NEXT: mov r5, #0 @@ -2014,7 +2014,7 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: movne r5, r0 ; CHECK-NEON-NEXT: moveq r4, r8 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: subs r7, r0, r9 +; CHECK-NEON-NEXT: cmp r0, r9 ; CHECK-NEON-NEXT: vmov.32 d1[0], r5 ; CHECK-NEON-NEXT: sbcs r7, r1, r6 ; CHECK-NEON-NEXT: sbcs r7, r2, #0 @@ -2048,11 +2048,11 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: mov r4, r1 ; CHECK-FP16-NEXT: mvn r9, #0 -; CHECK-FP16-NEXT: subs r1, r0, r9 +; CHECK-FP16-NEXT: cmp r0, r9 +; CHECK-FP16-NEXT: mov r4, r1 ; CHECK-FP16-NEXT: mvn r5, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r4, r5 +; CHECK-FP16-NEXT: sbcs r1, r1, r5 ; CHECK-FP16-NEXT: vmov s0, r7 ; CHECK-FP16-NEXT: sbcs r1, r2, #0 ; CHECK-FP16-NEXT: mov r7, #0 @@ -2075,7 +2075,7 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-FP16-NEXT: movne r7, r0 ; CHECK-FP16-NEXT: moveq r4, r8 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: subs r6, r0, r9 +; CHECK-FP16-NEXT: cmp r0, r9 ; CHECK-FP16-NEXT: vmov.32 d1[0], r7 ; CHECK-FP16-NEXT: sbcs r6, r1, r5 ; CHECK-FP16-NEXT: sbcs r6, r2, #0 @@ -2124,9 +2124,9 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixunssfti -; CHECK-NEON-NEXT: mov r4, r1 -; CHECK-NEON-NEXT: subs r1, r2, #1 +; CHECK-NEON-NEXT: cmp r2, #1 ; CHECK-NEON-NEXT: vmov s0, r5 +; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: sbcs r1, r3, #0 ; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: mov r6, #0 @@ -2135,7 +2135,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: moveq r4, r5 ; CHECK-NEON-NEXT: movne r5, r0 ; CHECK-NEON-NEXT: bl __fixunssfti -; CHECK-NEON-NEXT: subs r2, r2, #1 +; CHECK-NEON-NEXT: cmp r2, #1 ; CHECK-NEON-NEXT: vmov.32 d1[0], r5 ; CHECK-NEON-NEXT: sbcs r2, r3, #0 ; CHECK-NEON-NEXT: movwlo r6, #1 @@ -2156,9 +2156,9 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixunshfti -; CHECK-FP16-NEXT: mov r4, r1 -; CHECK-FP16-NEXT: subs r1, r2, #1 +; CHECK-FP16-NEXT: cmp r2, #1 ; CHECK-FP16-NEXT: vmov s0, r6 +; CHECK-FP16-NEXT: mov r4, r1 ; CHECK-FP16-NEXT: sbcs r1, r3, #0 ; CHECK-FP16-NEXT: mov r6, #0 ; CHECK-FP16-NEXT: mov r5, #0 @@ -2167,7 +2167,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-FP16-NEXT: moveq r4, r6 ; CHECK-FP16-NEXT: movne r6, r0 ; CHECK-FP16-NEXT: bl __fixunshfti -; CHECK-FP16-NEXT: subs r2, r2, #1 +; CHECK-FP16-NEXT: cmp r2, #1 ; CHECK-FP16-NEXT: vmov.32 d1[0], r6 ; CHECK-FP16-NEXT: sbcs r2, r3, #0 ; CHECK-FP16-NEXT: movwlo r5, #1 @@ -2201,8 +2201,8 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti +; CHECK-NEON-NEXT: cmp r2, #1 ; CHECK-NEON-NEXT: mov r4, r1 -; CHECK-NEON-NEXT: subs r1, r2, #1 ; CHECK-NEON-NEXT: sbcs r1, r3, #0 ; CHECK-NEON-NEXT: mov r8, #1 ; CHECK-NEON-NEXT: mov r1, #0 @@ -2224,7 +2224,7 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: moveq r4, r7 ; CHECK-NEON-NEXT: movne r7, r1 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: subs r6, r2, #1 +; CHECK-NEON-NEXT: cmp r2, #1 ; CHECK-NEON-NEXT: vmov.32 d1[0], r7 ; CHECK-NEON-NEXT: sbcs r6, r3, #0 ; CHECK-NEON-NEXT: movlt r8, r2 @@ -2256,8 +2256,8 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-FP16-NEXT: vmov.u16 r5, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti +; CHECK-FP16-NEXT: cmp r2, #1 ; CHECK-FP16-NEXT: mov r4, r1 -; CHECK-FP16-NEXT: subs r1, r2, #1 ; CHECK-FP16-NEXT: sbcs r1, r3, #0 ; CHECK-FP16-NEXT: mov r8, #1 ; CHECK-FP16-NEXT: mov r1, #0 @@ -2279,7 +2279,7 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-FP16-NEXT: moveq r4, r7 ; CHECK-FP16-NEXT: movne r7, r1 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: subs r6, r2, #1 +; CHECK-FP16-NEXT: cmp r2, #1 ; CHECK-FP16-NEXT: vmov.32 d1[0], r7 ; CHECK-FP16-NEXT: sbcs r6, r3, #0 ; CHECK-FP16-NEXT: movlt r8, r2 @@ -2329,14 +2329,14 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: mov r8, r1 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: mvn r6, #-2147483648 -; CHECK-NEXT: subs r2, r4, r6 +; CHECK-NEXT: cmp r4, r6 ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: sbcs r2, r8, #0 ; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: movge r4, r6 ; CHECK-NEXT: movwlt r5, #1 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: subs r2, r0, r6 +; CHECK-NEXT: cmp r0, r6 ; CHECK-NEXT: sbcs r2, r1, #0 ; CHECK-NEXT: movlt r6, r0 ; CHECK-NEXT: movwlt r7, #1 @@ -2407,7 +2407,7 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vmov r2, r12, d9 ; CHECK-NEXT: mvn r4, #0 -; CHECK-NEXT: subs r5, r0, r4 +; CHECK-NEXT: cmp r0, r4 ; CHECK-NEXT: mov r3, #0 ; CHECK-NEXT: sbcs r5, r1, #0 ; CHECK-NEXT: mov r6, #0 @@ -2424,7 +2424,7 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: mov r1, r12 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: subs r2, r0, r4 +; CHECK-NEXT: cmp r0, r4 ; CHECK-NEXT: vmov.32 d0[0], r6 ; CHECK-NEXT: sbcs r2, r1, #0 ; CHECK-NEXT: movlt r4, r0 @@ -2466,12 +2466,12 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: mov r11, r0 ; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: mvn r6, #-2147483648 +; CHECK-NEXT: cmp r11, r6 ; CHECK-NEXT: mov r3, #-2147483648 ; CHECK-NEXT: mvn r10, #0 ; CHECK-NEXT: vmov r7, s16 ; CHECK-NEXT: mov r4, #0 ; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: subs r2, r11, r6 ; CHECK-NEXT: sbcs r2, r1, #0 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movge r11, r6 @@ -2482,8 +2482,8 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: sbcs r1, r10, r2 ; CHECK-NEXT: movge r11, r3 ; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: cmp r0, r6 ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: subs r0, r0, r6 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov r9, #0 ; CHECK-NEXT: mov r0, r7 @@ -2492,8 +2492,8 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: cmp r9, #0 ; CHECK-NEXT: movne r9, r1 ; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: cmp r0, r6 ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: subs r0, r0, r6 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov r8, #0 ; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload @@ -2502,7 +2502,7 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: cmp r8, #0 ; CHECK-NEXT: movne r8, r1 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r0, r6 +; CHECK-NEXT: cmp r0, r6 ; CHECK-NEXT: sbcs r2, r1, #0 ; CHECK-NEXT: movlt r6, r0 ; CHECK-NEXT: movwlt r4, #1 @@ -2592,7 +2592,7 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vmov r2, s16 ; CHECK-NEXT: mvn r6, #0 -; CHECK-NEXT: subs r3, r0, r6 +; CHECK-NEXT: cmp r0, r6 ; CHECK-NEXT: mov r4, #0 ; CHECK-NEXT: sbcs r3, r1, #0 ; CHECK-NEXT: vmov r8, s17 @@ -2610,7 +2610,7 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: mov r10, #0 ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r0, r6 +; CHECK-NEXT: cmp r0, r6 ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: sbcs r2, r1, #0 ; CHECK-NEXT: mov r2, #0 @@ -2625,7 +2625,7 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: movne r5, r0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r0, r6 +; CHECK-NEXT: cmp r0, r6 ; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: sbcs r2, r1, #0 ; CHECK-NEXT: mov r2, #0 @@ -2640,7 +2640,7 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: movne r7, r0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r0, r6 +; CHECK-NEXT: cmp r0, r6 ; CHECK-NEXT: vmov.32 d1[0], r7 ; CHECK-NEXT: sbcs r2, r1, #0 ; CHECK-NEXT: movlt r6, r0 @@ -2687,12 +2687,12 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: mov r11, r0 ; CHECK-NEON-NEXT: vmov r0, s18 ; CHECK-NEON-NEXT: mvn r6, #-2147483648 +; CHECK-NEON-NEXT: cmp r11, r6 ; CHECK-NEON-NEXT: mov r3, #-2147483648 ; CHECK-NEON-NEXT: mvn r10, #0 ; CHECK-NEON-NEXT: vmov r7, s20 ; CHECK-NEON-NEXT: mov r4, #0 ; CHECK-NEON-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEON-NEXT: subs r2, r11, r6 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 ; CHECK-NEON-NEXT: mov r2, #0 ; CHECK-NEON-NEXT: movge r11, r6 @@ -2704,8 +2704,8 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: movge r11, r3 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz +; CHECK-NEON-NEXT: cmp r0, r6 ; CHECK-NEON-NEXT: mov r5, r0 -; CHECK-NEON-NEXT: subs r0, r0, r6 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 ; CHECK-NEON-NEXT: mov r8, #0 ; CHECK-NEON-NEXT: mov r0, r7 @@ -2715,8 +2715,8 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: movne r8, r1 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz +; CHECK-NEON-NEXT: cmp r0, r6 ; CHECK-NEON-NEXT: mov r7, r0 -; CHECK-NEON-NEXT: subs r0, r0, r6 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 ; CHECK-NEON-NEXT: mov r9, #0 ; CHECK-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload @@ -2726,7 +2726,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: movne r9, r1 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: subs r2, r0, r6 +; CHECK-NEON-NEXT: cmp r0, r6 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 ; CHECK-NEON-NEXT: movlt r6, r0 ; CHECK-NEON-NEXT: movwlt r4, #1 @@ -2765,19 +2765,19 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-FP16-NEXT: vmov.u16 r6, d0[1] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: mov r10, r0 ; CHECK-FP16-NEXT: mvn r7, #-2147483648 -; CHECK-FP16-NEXT: subs r0, r0, r7 -; CHECK-FP16-NEXT: vmov s0, r6 +; CHECK-FP16-NEXT: cmp r0, r7 +; CHECK-FP16-NEXT: mov r10, r0 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 -; CHECK-FP16-NEXT: mov r2, #-2147483648 ; CHECK-FP16-NEXT: mov r0, #0 ; CHECK-FP16-NEXT: movge r10, r7 ; CHECK-FP16-NEXT: movwlt r0, #1 ; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: movne r0, r1 +; CHECK-FP16-NEXT: vmov s0, r6 ; CHECK-FP16-NEXT: rsbs r1, r10, #-2147483648 ; CHECK-FP16-NEXT: mvn r9, #0 +; CHECK-FP16-NEXT: mov r2, #-2147483648 ; CHECK-FP16-NEXT: sbcs r0, r9, r0 ; CHECK-FP16-NEXT: vmov s16, r4 ; CHECK-FP16-NEXT: mov r11, #0 @@ -2785,26 +2785,26 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-FP16-NEXT: movge r10, r2 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: vmov.f32 s0, s18 +; CHECK-FP16-NEXT: cmp r0, r7 ; CHECK-FP16-NEXT: mov r5, r0 -; CHECK-FP16-NEXT: subs r0, r0, r7 -; CHECK-FP16-NEXT: mov r4, #0 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 +; CHECK-FP16-NEXT: mov r4, #0 ; CHECK-FP16-NEXT: movge r5, r7 ; CHECK-FP16-NEXT: movwlt r4, #1 ; CHECK-FP16-NEXT: cmp r4, #0 ; CHECK-FP16-NEXT: movne r4, r1 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: vmov.f32 s0, s16 +; CHECK-FP16-NEXT: cmp r0, r7 ; CHECK-FP16-NEXT: mov r6, r0 -; CHECK-FP16-NEXT: subs r0, r0, r7 -; CHECK-FP16-NEXT: mov r8, #0 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 +; CHECK-FP16-NEXT: mov r8, #0 ; CHECK-FP16-NEXT: movge r6, r7 ; CHECK-FP16-NEXT: movwlt r8, #1 ; CHECK-FP16-NEXT: cmp r8, #0 ; CHECK-FP16-NEXT: movne r8, r1 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: subs r2, r0, r7 +; CHECK-FP16-NEXT: cmp r0, r7 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 ; CHECK-FP16-NEXT: movlt r7, r0 ; CHECK-FP16-NEXT: movwlt r11, #1 @@ -2945,7 +2945,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: vmov r2, s20 ; CHECK-NEON-NEXT: mvn r6, #0 -; CHECK-NEON-NEXT: subs r3, r0, r6 +; CHECK-NEON-NEXT: cmp r0, r6 ; CHECK-NEON-NEXT: mov r4, #0 ; CHECK-NEON-NEXT: sbcs r3, r1, #0 ; CHECK-NEON-NEXT: vmov r8, s18 @@ -2964,7 +2964,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: mov r0, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: subs r2, r0, r6 +; CHECK-NEON-NEXT: cmp r0, r6 ; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 ; CHECK-NEON-NEXT: mov r2, #0 @@ -2980,7 +2980,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: mov r0, r9 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: subs r2, r0, r6 +; CHECK-NEON-NEXT: cmp r0, r6 ; CHECK-NEON-NEXT: mov r7, #0 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 ; CHECK-NEON-NEXT: mov r2, #0 @@ -2996,7 +2996,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: mov r0, r8 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: subs r2, r0, r6 +; CHECK-NEON-NEXT: cmp r0, r6 ; CHECK-NEON-NEXT: vmov.32 d1[0], r7 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 ; CHECK-NEON-NEXT: movlt r6, r0 @@ -3029,13 +3029,12 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-FP16-NEXT: vmov.u16 r2, d8[1] ; CHECK-FP16-NEXT: mvn r4, #0 ; CHECK-FP16-NEXT: vmov.u16 r3, d8[2] +; CHECK-FP16-NEXT: cmp r0, r4 ; CHECK-FP16-NEXT: vmov s0, r5 ; CHECK-FP16-NEXT: mov r6, #0 ; CHECK-FP16-NEXT: mov r8, #0 ; CHECK-FP16-NEXT: vmov s16, r2 -; CHECK-FP16-NEXT: subs r2, r0, r4 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 -; CHECK-FP16-NEXT: vmov s18, r3 ; CHECK-FP16-NEXT: mov r2, #0 ; CHECK-FP16-NEXT: movge r0, r4 ; CHECK-FP16-NEXT: movwlt r2, #1 @@ -3043,11 +3042,12 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-FP16-NEXT: movne r2, r1 ; CHECK-FP16-NEXT: rsbs r1, r0, #0 ; CHECK-FP16-NEXT: rscs r1, r2, #0 +; CHECK-FP16-NEXT: vmov s18, r3 ; CHECK-FP16-NEXT: movwlt r6, #1 ; CHECK-FP16-NEXT: cmp r6, #0 ; CHECK-FP16-NEXT: movne r6, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: subs r2, r0, r4 +; CHECK-FP16-NEXT: cmp r0, r4 ; CHECK-FP16-NEXT: vmov.f32 s0, s18 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 ; CHECK-FP16-NEXT: mov r7, #0 @@ -3062,7 +3062,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-FP16-NEXT: cmp r7, #0 ; CHECK-FP16-NEXT: movne r7, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: subs r2, r0, r4 +; CHECK-FP16-NEXT: cmp r0, r4 ; CHECK-FP16-NEXT: vmov.f32 s0, s16 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 ; CHECK-FP16-NEXT: mov r5, #0 @@ -3077,7 +3077,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-FP16-NEXT: cmp r5, #0 ; CHECK-FP16-NEXT: movne r5, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: subs r2, r0, r4 +; CHECK-FP16-NEXT: cmp r0, r4 ; CHECK-FP16-NEXT: vmov.32 d1[0], r5 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 ; CHECK-FP16-NEXT: movlt r4, r0 @@ -3599,11 +3599,11 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: mvn r9, #0 -; CHECK-NEXT: subs r1, r0, r9 +; CHECK-NEXT: cmp r0, r9 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r4, r5 +; CHECK-NEXT: sbcs r1, r1, r5 ; CHECK-NEXT: vorr d0, d8, d8 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov r7, #0 @@ -3626,7 +3626,7 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: movne r7, r0 ; CHECK-NEXT: moveq r4, r8 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r6, r0, r9 +; CHECK-NEXT: cmp r0, r9 ; CHECK-NEXT: vmov.32 d1[0], r7 ; CHECK-NEXT: sbcs r6, r1, r5 ; CHECK-NEXT: sbcs r6, r2, #0 @@ -3669,9 +3669,9 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: subs r1, r2, #1 ; CHECK-NEXT: vorr d0, d8, d8 +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: sbcs r1, r3, #0 ; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: mov r5, #0 @@ -3680,7 +3680,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: moveq r4, r6 ; CHECK-NEXT: movne r6, r0 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: subs r2, r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: vmov.32 d1[0], r6 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: movwlo r5, #1 @@ -3709,8 +3709,8 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixdfti +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: subs r0, r2, #1 ; CHECK-NEXT: sbcs r0, r3, #0 ; CHECK-NEXT: vorr d0, d8, d8 ; CHECK-NEXT: mov r0, #0 @@ -3725,7 +3725,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: movwmi r4, #0 ; CHECK-NEXT: movwmi r5, #0 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r2, r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: vmov.32 d1[0], r5 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: movwlt r6, #1 @@ -3759,11 +3759,11 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: vmov.f64 d8, d0 ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: mvn r9, #0 -; CHECK-NEXT: subs r1, r0, r9 +; CHECK-NEXT: cmp r0, r9 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r4, r5 +; CHECK-NEXT: sbcs r1, r1, r5 ; CHECK-NEXT: vmov.f32 s0, s16 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov r7, #0 @@ -3786,7 +3786,7 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: movne r7, r0 ; CHECK-NEXT: moveq r4, r8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r6, r0, r9 +; CHECK-NEXT: cmp r0, r9 ; CHECK-NEXT: vmov.32 d1[0], r7 ; CHECK-NEXT: sbcs r6, r1, r5 ; CHECK-NEXT: sbcs r6, r2, #0 @@ -3830,17 +3830,17 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vmov.f32 s0, s16 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: subs r1, r2, #1 -; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: movwlo r6, #1 ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: moveq r4, r6 ; CHECK-NEXT: movne r6, r0 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: subs r2, r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: vmov.32 d1[0], r6 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: movwlo r5, #1 @@ -3870,22 +3870,22 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vmov.f32 s0, s16 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: subs r0, r2, #1 -; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: sbcs r0, r3, #0 -; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: movwlt r0, #1 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: moveq r5, r0 ; CHECK-NEXT: moveq r4, r0 ; CHECK-NEXT: movne r0, r3 ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: movwmi r4, #0 ; CHECK-NEXT: movwmi r5, #0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r2, r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: vmov.32 d1[0], r5 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: movwlt r6, #1 @@ -3924,11 +3924,11 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: mvn r9, #0 -; CHECK-NEON-NEXT: subs r1, r0, r9 +; CHECK-NEON-NEXT: cmp r0, r9 +; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: mvn r6, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r4, r6 +; CHECK-NEON-NEXT: sbcs r1, r1, r6 ; CHECK-NEON-NEXT: vmov s0, r8 ; CHECK-NEON-NEXT: sbcs r1, r2, #0 ; CHECK-NEON-NEXT: mov r5, #0 @@ -3951,7 +3951,7 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: movne r5, r0 ; CHECK-NEON-NEXT: moveq r4, r8 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: subs r7, r0, r9 +; CHECK-NEON-NEXT: cmp r0, r9 ; CHECK-NEON-NEXT: vmov.32 d1[0], r5 ; CHECK-NEON-NEXT: sbcs r7, r1, r6 ; CHECK-NEON-NEXT: sbcs r7, r2, #0 @@ -3985,11 +3985,11 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: mov r4, r1 ; CHECK-FP16-NEXT: mvn r9, #0 -; CHECK-FP16-NEXT: subs r1, r0, r9 +; CHECK-FP16-NEXT: cmp r0, r9 +; CHECK-FP16-NEXT: mov r4, r1 ; CHECK-FP16-NEXT: mvn r5, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r4, r5 +; CHECK-FP16-NEXT: sbcs r1, r1, r5 ; CHECK-FP16-NEXT: vmov s0, r7 ; CHECK-FP16-NEXT: sbcs r1, r2, #0 ; CHECK-FP16-NEXT: mov r7, #0 @@ -4012,7 +4012,7 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-NEXT: movne r7, r0 ; CHECK-FP16-NEXT: moveq r4, r8 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: subs r6, r0, r9 +; CHECK-FP16-NEXT: cmp r0, r9 ; CHECK-FP16-NEXT: vmov.32 d1[0], r7 ; CHECK-FP16-NEXT: sbcs r6, r1, r5 ; CHECK-FP16-NEXT: sbcs r6, r2, #0 @@ -4059,9 +4059,9 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixunssfti -; CHECK-NEON-NEXT: mov r4, r1 -; CHECK-NEON-NEXT: subs r1, r2, #1 +; CHECK-NEON-NEXT: cmp r2, #1 ; CHECK-NEON-NEXT: vmov s0, r5 +; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: sbcs r1, r3, #0 ; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: mov r6, #0 @@ -4070,7 +4070,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: moveq r4, r5 ; CHECK-NEON-NEXT: movne r5, r0 ; CHECK-NEON-NEXT: bl __fixunssfti -; CHECK-NEON-NEXT: subs r2, r2, #1 +; CHECK-NEON-NEXT: cmp r2, #1 ; CHECK-NEON-NEXT: vmov.32 d1[0], r5 ; CHECK-NEON-NEXT: sbcs r2, r3, #0 ; CHECK-NEON-NEXT: movwlo r6, #1 @@ -4091,9 +4091,9 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixunshfti -; CHECK-FP16-NEXT: mov r4, r1 -; CHECK-FP16-NEXT: subs r1, r2, #1 +; CHECK-FP16-NEXT: cmp r2, #1 ; CHECK-FP16-NEXT: vmov s0, r6 +; CHECK-FP16-NEXT: mov r4, r1 ; CHECK-FP16-NEXT: sbcs r1, r3, #0 ; CHECK-FP16-NEXT: mov r6, #0 ; CHECK-FP16-NEXT: mov r5, #0 @@ -4102,7 +4102,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-NEXT: moveq r4, r6 ; CHECK-FP16-NEXT: movne r6, r0 ; CHECK-FP16-NEXT: bl __fixunshfti -; CHECK-FP16-NEXT: subs r2, r2, #1 +; CHECK-FP16-NEXT: cmp r2, #1 ; CHECK-FP16-NEXT: vmov.32 d1[0], r6 ; CHECK-FP16-NEXT: sbcs r2, r3, #0 ; CHECK-FP16-NEXT: movwlo r5, #1 @@ -4135,8 +4135,8 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti +; CHECK-NEON-NEXT: cmp r2, #1 ; CHECK-NEON-NEXT: mov r5, r0 -; CHECK-NEON-NEXT: subs r0, r2, #1 ; CHECK-NEON-NEXT: sbcs r0, r3, #0 ; CHECK-NEON-NEXT: vmov s0, r6 ; CHECK-NEON-NEXT: mov r0, #0 @@ -4151,7 +4151,7 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: movwmi r4, #0 ; CHECK-NEON-NEXT: movwmi r5, #0 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: subs r2, r2, #1 +; CHECK-NEON-NEXT: cmp r2, #1 ; CHECK-NEON-NEXT: vmov.32 d1[0], r5 ; CHECK-NEON-NEXT: sbcs r2, r3, #0 ; CHECK-NEON-NEXT: movwlt r7, #1 @@ -4176,8 +4176,8 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti +; CHECK-FP16-NEXT: cmp r2, #1 ; CHECK-FP16-NEXT: mov r5, r0 -; CHECK-FP16-NEXT: subs r0, r2, #1 ; CHECK-FP16-NEXT: sbcs r0, r3, #0 ; CHECK-FP16-NEXT: vmov s0, r7 ; CHECK-FP16-NEXT: mov r0, #0 @@ -4192,7 +4192,7 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-NEXT: movwmi r4, #0 ; CHECK-FP16-NEXT: movwmi r5, #0 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: subs r2, r2, #1 +; CHECK-FP16-NEXT: cmp r2, #1 ; CHECK-FP16-NEXT: vmov.32 d1[0], r5 ; CHECK-FP16-NEXT: sbcs r2, r3, #0 ; CHECK-FP16-NEXT: movwlt r6, #1 diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll index 7cc623fb0a616..169b7a51f9185 100644 --- a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -371,8 +371,8 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { ; ARM6-LABEL: scalar_i64_lowestbit_eq: ; ARM6: @ %bb.0: -; ARM6-NEXT: subs r1, r2, #32 ; ARM6-NEXT: lsl r0, r0, r2 +; ARM6-NEXT: cmp r2, #32 ; ARM6-NEXT: movpl r0, #0 ; ARM6-NEXT: mov r1, #1 ; ARM6-NEXT: bic r0, r1, r0 @@ -380,8 +380,8 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { ; ; ARM78-LABEL: scalar_i64_lowestbit_eq: ; ARM78: @ %bb.0: -; ARM78-NEXT: subs r1, r2, #32 ; ARM78-NEXT: lsl r0, r0, r2 +; ARM78-NEXT: cmp r2, #32 ; ARM78-NEXT: movwpl r0, #0 ; ARM78-NEXT: mov r1, #1 ; ARM78-NEXT: bic r0, r1, r0 @@ -400,7 +400,7 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { ; THUMB78-LABEL: scalar_i64_lowestbit_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: lsls r0, r2 -; THUMB78-NEXT: subs.w r1, r2, #32 +; THUMB78-NEXT: cmp r2, #32 ; THUMB78-NEXT: it pl ; THUMB78-NEXT: movpl r0, #0 ; THUMB78-NEXT: movs r1, #1 @@ -970,7 +970,7 @@ define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { ; ARM6: @ %bb.0: ; ARM6-NEXT: uxtb r1, r1 ; ARM6-NEXT: mov r2, #24 -; ARM6-NEXT: ands r0, r0, r2, lsr r1 +; ARM6-NEXT: tst r0, r2, lsr r1 ; ARM6-NEXT: mov r0, #0 ; ARM6-NEXT: movmi r0, #1 ; ARM6-NEXT: bx lr @@ -979,7 +979,7 @@ define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { ; ARM78: @ %bb.0: ; ARM78-NEXT: uxtb r1, r1 ; ARM78-NEXT: mov r2, #24 -; ARM78-NEXT: ands r0, r0, r2, lsr r1 +; ARM78-NEXT: tst r0, r2, lsr r1 ; ARM78-NEXT: mov r0, #0 ; ARM78-NEXT: movwmi r0, #1 ; ARM78-NEXT: bx lr @@ -1003,7 +1003,7 @@ define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { ; THUMB78-NEXT: uxtb r1, r1 ; THUMB78-NEXT: movs r2, #24 ; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 +; THUMB78-NEXT: tst r1, r0 ; THUMB78-NEXT: mov.w r0, #0 ; THUMB78-NEXT: it mi ; THUMB78-NEXT: movmi r0, #1 diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll index a8421ae9a6a89..ca7c6913edd2f 100644 --- a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -345,7 +345,7 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { ; ARM6-LABEL: scalar_i64_signbit_eq: ; ARM6: @ %bb.0: ; ARM6-NEXT: lsr r0, r1, r2 -; ARM6-NEXT: subs r1, r2, #32 +; ARM6-NEXT: cmp r2, #32 ; ARM6-NEXT: movpl r0, #0 ; ARM6-NEXT: mvn r0, r0 ; ARM6-NEXT: lsr r0, r0, #31 @@ -354,7 +354,7 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { ; ARM78-LABEL: scalar_i64_signbit_eq: ; ARM78: @ %bb.0: ; ARM78-NEXT: lsr r0, r1, r2 -; ARM78-NEXT: subs r1, r2, #32 +; ARM78-NEXT: cmp r2, #32 ; ARM78-NEXT: movwpl r0, #0 ; ARM78-NEXT: mvn r0, r0 ; ARM78-NEXT: lsr r0, r0, #31 @@ -374,7 +374,7 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { ; THUMB78-LABEL: scalar_i64_signbit_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: lsr.w r0, r1, r2 -; THUMB78-NEXT: subs.w r1, r2, #32 +; THUMB78-NEXT: cmp r2, #32 ; THUMB78-NEXT: it pl ; THUMB78-NEXT: movpl r0, #0 ; THUMB78-NEXT: mvns r0, r0 diff --git a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll index 8bd78dd0f6ab5..9163687cab9ab 100644 --- a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll +++ b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll @@ -1,104 +1,152 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=arm-linux -mcpu=generic -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=ARM ; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV6 ; RUN: llc < %s -mtriple=thumbv7-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV7 define i32 @uadd_overflow(i32 %a, i32 %b) #0 { +; ARM-LABEL: uadd_overflow: +; ARM: @ %bb.0: +; ARM-NEXT: cmn r0, r1 +; ARM-NEXT: mov r2, #0 +; ARM-NEXT: adc r0, r2, #0 +; ARM-NEXT: mov pc, lr +; +; THUMBV6-LABEL: uadd_overflow: +; THUMBV6: @ %bb.0: +; THUMBV6-NEXT: movs r2, #0 +; THUMBV6-NEXT: adds r0, r0, r1 +; THUMBV6-NEXT: adcs r2, r2 +; THUMBV6-NEXT: mov r0, r2 +; THUMBV6-NEXT: bx lr +; +; THUMBV7-LABEL: uadd_overflow: +; THUMBV7: @ %bb.0: +; THUMBV7-NEXT: cmn r0, r1 +; THUMBV7-NEXT: mov.w r2, #0 +; THUMBV7-NEXT: adc r0, r2, #0 +; THUMBV7-NEXT: bx lr %sadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) %1 = extractvalue { i32, i1 } %sadd, 1 %2 = zext i1 %1 to i32 ret i32 %2 - ; CHECK-LABEL: uadd_overflow: - ; ARM: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] - ; ARM: mov r[[R2:[0-9]+]], #0 - ; ARM: adc r[[R0]], r[[R2]], #0 - ; THUMBV6: movs r[[R2:[0-9]+]], #0 - ; THUMBV6: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] - ; THUMBV6: adcs r[[R2]], r[[R2]] - ; THUMBV6: mov r[[R0]], r[[R2]] - ; THUMBV7: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] - ; THUMBV7: mov.w r[[R2:[0-9]+]], #0 - ; THUMBV7: adc r[[R0]], r[[R2]], #0 } define i32 @sadd_overflow(i32 %a, i32 %b) #0 { +; ARM-LABEL: sadd_overflow: +; ARM: @ %bb.0: +; ARM-NEXT: cmn r0, r1 +; ARM-NEXT: mov r0, #1 +; ARM-NEXT: movvc r0, #0 +; ARM-NEXT: mov pc, lr +; +; THUMBV6-LABEL: sadd_overflow: +; THUMBV6: @ %bb.0: +; THUMBV6-NEXT: adds r0, r0, r1 +; THUMBV6-NEXT: bvc .LBB1_2 +; THUMBV6-NEXT: @ %bb.1: +; THUMBV6-NEXT: movs r0, #1 +; THUMBV6-NEXT: bx lr +; THUMBV6-NEXT: .LBB1_2: +; THUMBV6-NEXT: movs r0, #0 +; THUMBV6-NEXT: bx lr +; +; THUMBV7-LABEL: sadd_overflow: +; THUMBV7: @ %bb.0: +; THUMBV7-NEXT: cmn r0, r1 +; THUMBV7-NEXT: mov.w r0, #1 +; THUMBV7-NEXT: it vc +; THUMBV7-NEXT: movvc r0, #0 +; THUMBV7-NEXT: bx lr %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) %1 = extractvalue { i32, i1 } %sadd, 1 %2 = zext i1 %1 to i32 ret i32 %2 - ; CHECK-LABEL: sadd_overflow: - ; ARM: adds r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]] - ; ARM: mov r[[R0]], #1 - ; ARM: movvc r[[R0]], #0 - ; ARM: mov pc, lr - ; THUMBV6: adds r0, r0, r1 - ; THUMBV6: bvc .LBB1_2 - ; THUMBV7: adds r[[R2:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] - ; THUMBV7: mov.w r[[R0:[0-9]+]], #1 - ; THUMBV7: it vc - ; THUMBV7: movvc r[[R0]], #0 } define i32 @usub_overflow(i32 %a, i32 %b) #0 { +; ARM-LABEL: usub_overflow: +; ARM: @ %bb.0: +; ARM-NEXT: cmp r0, r1 +; ARM-NEXT: mov r2, #0 +; ARM-NEXT: adc r0, r2, #0 +; ARM-NEXT: eor r0, r0, #1 +; ARM-NEXT: mov pc, lr +; +; THUMBV6-LABEL: usub_overflow: +; THUMBV6: @ %bb.0: +; THUMBV6-NEXT: movs r2, #0 +; THUMBV6-NEXT: subs r0, r0, r1 +; THUMBV6-NEXT: adcs r2, r2 +; THUMBV6-NEXT: movs r0, #1 +; THUMBV6-NEXT: eors r0, r2 +; THUMBV6-NEXT: bx lr +; +; THUMBV7-LABEL: usub_overflow: +; THUMBV7: @ %bb.0: +; THUMBV7-NEXT: cmp r0, r1 +; THUMBV7-NEXT: mov.w r2, #0 +; THUMBV7-NEXT: adc r0, r2, #0 +; THUMBV7-NEXT: eor r0, r0, #1 +; THUMBV7-NEXT: bx lr %sadd = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) %1 = extractvalue { i32, i1 } %sadd, 1 %2 = zext i1 %1 to i32 ret i32 %2 - ; CHECK-LABEL: usub_overflow: - ; ARM: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] - ; ARM: mov r[[R2:[0-9]+]], #0 - ; ARM: adc r[[R0]], r[[R2]], #0 - ; ARM: eor r[[R0]], r[[R0]], #1 - ; THUMBV6: movs r[[R2:[0-9]+]], #0 - ; THUMBV6: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] - ; THUMBV6: adcs r[[R2]], r[[R2]] - ; THUMBV6: movs r[[R0]], #1 - ; THUMBV6: eors r[[R0]], r[[R2]] - ; THUMBV7: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] - ; THUMBV7: mov.w r[[R2:[0-9]+]], #0 - ; THUMBV7: adc r[[R0]], r[[R2]], #0 - ; THUMBV7: eor r[[R0]], r[[R0]], #1 ; We should know that the overflow is just 1 bit, ; no need to clear any other bit - ; CHECK-NOT: and } define i32 @ssub_overflow(i32 %a, i32 %b) #0 { +; ARM-LABEL: ssub_overflow: +; ARM: @ %bb.0: +; ARM-NEXT: mov r2, #1 +; ARM-NEXT: cmp r0, r1 +; ARM-NEXT: movvc r2, #0 +; ARM-NEXT: mov r0, r2 +; ARM-NEXT: mov pc, lr +; +; THUMBV6-LABEL: ssub_overflow: +; THUMBV6: @ %bb.0: +; THUMBV6-NEXT: cmp r0, r1 +; THUMBV6-NEXT: bvc .LBB3_2 +; THUMBV6-NEXT: @ %bb.1: +; THUMBV6-NEXT: movs r0, #1 +; THUMBV6-NEXT: bx lr +; THUMBV6-NEXT: .LBB3_2: +; THUMBV6-NEXT: movs r0, #0 +; THUMBV6-NEXT: bx lr +; +; THUMBV7-LABEL: ssub_overflow: +; THUMBV7: @ %bb.0: +; THUMBV7-NEXT: movs r2, #1 +; THUMBV7-NEXT: cmp r0, r1 +; THUMBV7-NEXT: it vc +; THUMBV7-NEXT: movvc r2, #0 +; THUMBV7-NEXT: mov r0, r2 +; THUMBV7-NEXT: bx lr %sadd = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) %1 = extractvalue { i32, i1 } %sadd, 1 %2 = zext i1 %1 to i32 ret i32 %2 - - ; CHECK-LABEL: ssub_overflow: - - ; ARM: mov r[[R2]], #1 - ; ARM: cmp r[[R0]], r[[R1]] - ; ARM: movvc r[[R2]], #0 - - ; THUMBV6: cmp r0, r1 - ; THUMBV6: bvc .LBB3_2 - - ; THUMBV7: movs r[[R2:[0-9]+]], #1 - ; THUMBV7: cmp r[[R0:[0-9]+]], r[[R1:[0-9]+]] - ; THUMBV7: it vc - ; THUMBV7: movvc r[[R2]], #0 - ; THUMBV7: mov r[[R0]], r[[R2]] } declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #2 declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #3 declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) #4 +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/ARM/long_shift.ll b/llvm/test/CodeGen/ARM/long_shift.ll index 72a6e3566a10b..09dec5f18d0db 100644 --- a/llvm/test/CodeGen/ARM/long_shift.ll +++ b/llvm/test/CodeGen/ARM/long_shift.ll @@ -29,14 +29,14 @@ define i32 @f1(i64 %x, i64 %y) { ; CHECK-LE-LABEL: f1: ; CHECK-LE: @ %bb.0: ; CHECK-LE-NEXT: lsl r0, r0, r2 -; CHECK-LE-NEXT: subs r1, r2, #32 +; CHECK-LE-NEXT: cmp r2, #32 ; CHECK-LE-NEXT: movpl r0, #0 ; CHECK-LE-NEXT: mov pc, lr ; ; CHECK-BE-LABEL: f1: ; CHECK-BE: @ %bb.0: ; CHECK-BE-NEXT: lsl r0, r1, r3 -; CHECK-BE-NEXT: subs r1, r3, #32 +; CHECK-BE-NEXT: cmp r3, #32 ; CHECK-BE-NEXT: movpl r0, #0 ; CHECK-BE-NEXT: mov pc, lr diff --git a/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll b/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll index 837d2cd7f5cc8..8d83892154017 100644 --- a/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll +++ b/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll @@ -1,16 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T ; RUN: llc -mtriple=armv7-apple-ios -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-A ; LSR should compare against the post-incremented induction variable. ; In this case, the immediate value is -2 which requires a cmn instruction. ; -; CHECK-LABEL: f: -; CHECK: %for.body -; CHECK: sub{{.*}}[[IV:r[0-9]+]], #2 -; CHECK-T: adds{{.*}}[[IV]], #2 -; CHECK-A: cmn{{.*}}[[IV]], #2 -; CHECK: bne define i32 @f(ptr nocapture %a, i32 %i) nounwind readonly ssp { +; CHECK-T-LABEL: f: +; CHECK-T: @ %bb.0: @ %entry +; CHECK-T-NEXT: cmn.w r1, #2 +; CHECK-T-NEXT: itt eq +; CHECK-T-NEXT: moveq r0, #0 +; CHECK-T-NEXT: bxeq lr +; CHECK-T-NEXT: LBB0_1: @ %for.body.preheader +; CHECK-T-NEXT: mov r9, r0 +; CHECK-T-NEXT: movs r0, #0 +; CHECK-T-NEXT: movs r3, #0 +; CHECK-T-NEXT: LBB0_2: @ %for.body +; CHECK-T-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-T-NEXT: ldr.w r2, [r9, r1, lsl #2] +; CHECK-T-NEXT: cmp r2, r3 +; CHECK-T-NEXT: itt gt +; CHECK-T-NEXT: movgt r3, r2 +; CHECK-T-NEXT: movgt r0, r1 +; CHECK-T-NEXT: subs r1, #2 +; CHECK-T-NEXT: cmn.w r1, #2 +; CHECK-T-NEXT: bne LBB0_2 +; CHECK-T-NEXT: @ %bb.3: @ %for.end +; CHECK-T-NEXT: bx lr +; +; CHECK-A-LABEL: f: +; CHECK-A: @ %bb.0: @ %entry +; CHECK-A-NEXT: mov r2, #0 +; CHECK-A-NEXT: cmn r1, #2 +; CHECK-A-NEXT: beq LBB0_3 +; CHECK-A-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-A-NEXT: mov r3, #0 +; CHECK-A-NEXT: LBB0_2: @ %for.body +; CHECK-A-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-A-NEXT: ldr r9, [r0, r1, lsl #2] +; CHECK-A-NEXT: cmp r9, r3 +; CHECK-A-NEXT: movgt r2, r1 +; CHECK-A-NEXT: sub r1, r1, #2 +; CHECK-A-NEXT: movgt r3, r9 +; CHECK-A-NEXT: cmn r1, #2 +; CHECK-A-NEXT: bne LBB0_2 +; CHECK-A-NEXT: LBB0_3: @ %for.end +; CHECK-A-NEXT: mov r0, r2 +; CHECK-A-NEXT: bx lr entry: %cmp3 = icmp eq i32 %i, -2 br i1 %cmp3, label %for.end, label %for.body @@ -32,3 +69,5 @@ for.end: ; preds = %for.body, %entry %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ] ret i32 %bi.0.lcssa } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll index 6e891a0480814..b581ba100f87d 100644 --- a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll +++ b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll @@ -114,7 +114,7 @@ define void @f4(ptr %ptr1, ptr %ptr2, i64 %val) { ; CHECK-NEXT: orrs r0, r2, r3 ; CHECK-NEXT: beq LBB3_2 ; CHECK-NEXT: @ %bb.1: @ %if.end -; CHECK-NEXT: subs r0, r2, #10 +; CHECK-NEXT: cmp r2, #10 ; CHECK-NEXT: sbcs r0, r3, #0 ; CHECK-NEXT: bxlt lr ; CHECK-NEXT: LBB3_2: @ %if.end3 diff --git a/llvm/test/CodeGen/ARM/neon_vabd.ll b/llvm/test/CodeGen/ARM/neon_vabd.ll index ffc72b242f829..af8798b33415c 100644 --- a/llvm/test/CodeGen/ARM/neon_vabd.ll +++ b/llvm/test/CodeGen/ARM/neon_vabd.ll @@ -150,13 +150,13 @@ define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: vmov r12, lr, d0 ; CHECK-NEXT: vmov r4, r5, d2 ; CHECK-NEXT: vsub.i64 q8, q0, q1 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs r0, r3, r1 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movwlt r0, #1 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: subs r1, r4, r12 +; CHECK-NEXT: cmp r4, r12 ; CHECK-NEXT: sbcs r1, r5, lr ; CHECK-NEXT: vdup.32 d19, r0 ; CHECK-NEXT: movwlt r6, #1 @@ -481,13 +481,13 @@ define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) { ; CHECK-NEXT: vmov r12, lr, d0 ; CHECK-NEXT: vmov r4, r5, d2 ; CHECK-NEXT: vsub.i64 q8, q0, q1 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs r0, r3, r1 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movwlt r0, #1 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: subs r1, r4, r12 +; CHECK-NEXT: cmp r4, r12 ; CHECK-NEXT: sbcs r1, r5, lr ; CHECK-NEXT: vdup.32 d19, r0 ; CHECK-NEXT: movwlt r6, #1 diff --git a/llvm/test/CodeGen/ARM/popcnt.ll b/llvm/test/CodeGen/ARM/popcnt.ll index a70fdc580ca9b..5d178fbb45ad1 100644 --- a/llvm/test/CodeGen/ARM/popcnt.ll +++ b/llvm/test/CodeGen/ARM/popcnt.ll @@ -403,7 +403,7 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone { ; CHECK-NEXT: sbc r3, r1, #0 ; CHECK-NEXT: eor r12, r1, r3 ; CHECK-NEXT: eor r1, r0, r2 -; CHECK-NEXT: subs r1, r2, r1 +; CHECK-NEXT: cmp r2, r1 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: sbcs r1, r3, r12 ; CHECK-NEXT: movlo r0, #1 diff --git a/llvm/test/CodeGen/ARM/pr35103.ll b/llvm/test/CodeGen/ARM/pr35103.ll index e2be40e64ba5f..b86dff469ab0a 100644 --- a/llvm/test/CodeGen/ARM/pr35103.ll +++ b/llvm/test/CodeGen/ARM/pr35103.ll @@ -9,10 +9,10 @@ define i32 @foo(i32 %vreg0, i32 %vreg1, i32 %vreg2, i32 %vreg3, i32 %vreg4) loca ; CHECK-NEXT: adds r2, r2, r0 ; CHECK-NEXT: mov r12, #0 ; CHECK-NEXT: adc lr, r12, #0 -; CHECK-NEXT: adds r0, r2, r0 +; CHECK-NEXT: cmn r2, r0 ; CHECK-NEXT: ldr r2, [sp, #8] ; CHECK-NEXT: adc r0, r12, #0 -; CHECK-NEXT: adds r1, r3, r1 +; CHECK-NEXT: cmn r3, r1 ; CHECK-NEXT: adcs r1, r2, #0 ; CHECK-NEXT: adc r0, r0, lr ; CHECK-NEXT: pop {r11, lr} diff --git a/llvm/test/CodeGen/ARM/scmp.ll b/llvm/test/CodeGen/ARM/scmp.ll index 9189aee6aaf43..88bc860b50a1f 100644 --- a/llvm/test/CodeGen/ARM/scmp.ll +++ b/llvm/test/CodeGen/ARM/scmp.ll @@ -39,12 +39,12 @@ define i8 @scmp_8_64(i64 %x, i64 %y) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: subs lr, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: mov r12, #0 ; CHECK-NEXT: sbcs lr, r1, r3 ; CHECK-NEXT: mov lr, #0 ; CHECK-NEXT: movwlt lr, #1 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs r0, r3, r1 ; CHECK-NEXT: movwlt r12, #1 ; CHECK-NEXT: sub r0, r12, lr @@ -61,7 +61,7 @@ define i8 @scmp_8_128(i128 %x, i128 %y) nounwind { ; CHECK-NEXT: ldr r4, [sp, #24] ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: ldr r6, [sp, #28] -; CHECK-NEXT: subs r7, r0, r4 +; CHECK-NEXT: cmp r0, r4 ; CHECK-NEXT: ldr r12, [sp, #32] ; CHECK-NEXT: sbcs r7, r1, r6 ; CHECK-NEXT: ldr lr, [sp, #36] @@ -69,7 +69,7 @@ define i8 @scmp_8_128(i128 %x, i128 %y) nounwind { ; CHECK-NEXT: sbcs r7, r3, lr ; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: subs r0, r4, r0 +; CHECK-NEXT: cmp r4, r0 ; CHECK-NEXT: sbcs r0, r6, r1 ; CHECK-NEXT: sbcs r0, r12, r2 ; CHECK-NEXT: sbcs r0, lr, r3 @@ -108,12 +108,12 @@ define i32 @scmp_32_64(i64 %x, i64 %y) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: subs lr, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: mov r12, #0 ; CHECK-NEXT: sbcs lr, r1, r3 ; CHECK-NEXT: mov lr, #0 ; CHECK-NEXT: movwlt lr, #1 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs r0, r3, r1 ; CHECK-NEXT: movwlt r12, #1 ; CHECK-NEXT: sub r0, r12, lr @@ -127,12 +127,12 @@ define i64 @scmp_64_64(i64 %x, i64 %y) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: subs lr, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: mov r12, #0 ; CHECK-NEXT: sbcs lr, r1, r3 ; CHECK-NEXT: mov lr, #0 ; CHECK-NEXT: movwlt lr, #1 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs r0, r3, r1 ; CHECK-NEXT: movwlt r12, #1 ; CHECK-NEXT: sub r0, r12, lr diff --git a/llvm/test/CodeGen/ARM/select_const.ll b/llvm/test/CodeGen/ARM/select_const.ll index 180daa12e7c52..475c4c942ab70 100644 --- a/llvm/test/CodeGen/ARM/select_const.ll +++ b/llvm/test/CodeGen/ARM/select_const.ll @@ -763,7 +763,7 @@ define i64 @opaque_constant2(i1 %cond, i64 %x) { define i64 @func(i64 %arg) { ; ARM-LABEL: func: ; ARM: @ %bb.0: @ %entry -; ARM-NEXT: adds r0, r0, #1 +; ARM-NEXT: cmn r0, #1 ; ARM-NEXT: mov r2, #0 ; ARM-NEXT: adcs r0, r1, #0 ; ARM-NEXT: mov r1, #0 @@ -773,7 +773,7 @@ define i64 @func(i64 %arg) { ; ; THUMB2-LABEL: func: ; THUMB2: @ %bb.0: @ %entry -; THUMB2-NEXT: adds r0, #1 +; THUMB2-NEXT: cmn.w r0, #1 ; THUMB2-NEXT: mov.w r2, #0 ; THUMB2-NEXT: adcs r0, r1, #0 ; THUMB2-NEXT: mov.w r1, #0 diff --git a/llvm/test/CodeGen/ARM/select_xform.ll b/llvm/test/CodeGen/ARM/select_xform.ll index 382536fc91d6d..1fe65f9c52164 100644 --- a/llvm/test/CodeGen/ARM/select_xform.ll +++ b/llvm/test/CodeGen/ARM/select_xform.ll @@ -405,11 +405,11 @@ define i32 @t17(i32 %x, i32 %y) #0 { ; ; T2-LABEL: t17: ; T2: @ %bb.0: @ %entry -; T2-NEXT: adds r0, #1 +; T2-NEXT: cmn.w r0, #1 ; T2-NEXT: mov.w r0, #2 ; T2-NEXT: it eq ; T2-NEXT: moveq r0, #5 -; T2-NEXT: adds r1, #1 +; T2-NEXT: cmn.w r1, #1 ; T2-NEXT: mov.w r1, #4 ; T2-NEXT: it eq ; T2-NEXT: moveq r1, #3 @@ -442,7 +442,7 @@ define i32 @t18(i32 %x, i32 %y) #0 { ; T2-NEXT: cmp r0, #0 ; T2-NEXT: it ne ; T2-NEXT: movne r1, #5 -; T2-NEXT: adds r0, #1 +; T2-NEXT: cmn.w r0, #1 ; T2-NEXT: mov.w r0, #4 ; T2-NEXT: it ne ; T2-NEXT: movne r0, #3 @@ -504,11 +504,11 @@ define i32 @t20(i32 %x, i32 %y) #0 { ; ; T2-LABEL: t20: ; T2: @ %bb.0: @ %entry -; T2-NEXT: adds r0, #1 +; T2-NEXT: cmn.w r0, #1 ; T2-NEXT: mov.w r0, #2 ; T2-NEXT: it ne ; T2-NEXT: movne r0, #5 -; T2-NEXT: adds r1, #1 +; T2-NEXT: cmn.w r1, #1 ; T2-NEXT: mov.w r1, #4 ; T2-NEXT: it ne ; T2-NEXT: movne r1, #3 diff --git a/llvm/test/CodeGen/ARM/smml.ll b/llvm/test/CodeGen/ARM/smml.ll index a09ec504d8b78..8e79037e163c4 100644 --- a/llvm/test/CodeGen/ARM/smml.ll +++ b/llvm/test/CodeGen/ARM/smml.ll @@ -166,7 +166,7 @@ define void @test_used_flags(i32 %in1, i32 %in2) { ; CHECK-V6-NEXT: push {r11, lr} ; CHECK-V6-NEXT: smull r1, r2, r0, r1 ; CHECK-V6-NEXT: mov r0, #56 -; CHECK-V6-NEXT: subs r1, r1, #1 +; CHECK-V6-NEXT: cmp r1, #1 ; CHECK-V6-NEXT: sbcs r1, r2, #0 ; CHECK-V6-NEXT: movlt r0, #42 ; CHECK-V6-NEXT: bl opaque @@ -178,7 +178,7 @@ define void @test_used_flags(i32 %in1, i32 %in2) { ; CHECK-V7-NEXT: push {r11, lr} ; CHECK-V7-NEXT: smull r1, r2, r0, r1 ; CHECK-V7-NEXT: mov r0, #56 -; CHECK-V7-NEXT: subs r1, r1, #1 +; CHECK-V7-NEXT: cmp r1, #1 ; CHECK-V7-NEXT: sbcs r1, r2, #0 ; CHECK-V7-NEXT: movwlt r0, #42 ; CHECK-V7-NEXT: bl opaque @@ -248,7 +248,7 @@ define void @test_used_flags(i32 %in1, i32 %in2) { ; CHECK-THUMBV7-NEXT: push {r7, lr} ; CHECK-THUMBV7-NEXT: smull r1, r2, r0, r1 ; CHECK-THUMBV7-NEXT: movs r0, #56 -; CHECK-THUMBV7-NEXT: subs r1, #1 +; CHECK-THUMBV7-NEXT: cmp r1, #1 ; CHECK-THUMBV7-NEXT: sbcs r1, r2, #0 ; CHECK-THUMBV7-NEXT: it lt ; CHECK-THUMBV7-NEXT: movlt r0, #42 @@ -261,7 +261,7 @@ define void @test_used_flags(i32 %in1, i32 %in2) { ; CHECK-THUMBV7M-NEXT: push {r7, lr} ; CHECK-THUMBV7M-NEXT: smull r1, r2, r0, r1 ; CHECK-THUMBV7M-NEXT: movs r0, #56 -; CHECK-THUMBV7M-NEXT: subs r1, #1 +; CHECK-THUMBV7M-NEXT: cmp r1, #1 ; CHECK-THUMBV7M-NEXT: sbcs r1, r2, #0 ; CHECK-THUMBV7M-NEXT: it lt ; CHECK-THUMBV7M-NEXT: movlt r0, #42 @@ -274,7 +274,7 @@ define void @test_used_flags(i32 %in1, i32 %in2) { ; CHECK-THUMBV7EM-NEXT: push {r7, lr} ; CHECK-THUMBV7EM-NEXT: smull r1, r2, r0, r1 ; CHECK-THUMBV7EM-NEXT: movs r0, #56 -; CHECK-THUMBV7EM-NEXT: subs r1, #1 +; CHECK-THUMBV7EM-NEXT: cmp r1, #1 ; CHECK-THUMBV7EM-NEXT: sbcs r1, r2, #0 ; CHECK-THUMBV7EM-NEXT: it lt ; CHECK-THUMBV7EM-NEXT: movlt r0, #42 diff --git a/llvm/test/CodeGen/ARM/ssub_sat.ll b/llvm/test/CodeGen/ARM/ssub_sat.ll index 0978bfd1f0140..d08b2e9465e85 100644 --- a/llvm/test/CodeGen/ARM/ssub_sat.ll +++ b/llvm/test/CodeGen/ARM/ssub_sat.ll @@ -94,10 +94,10 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; CHECK-T2-NEXT: eor.w r12, r1, r3 ; CHECK-T2-NEXT: sbc.w r2, r1, r3 ; CHECK-T2-NEXT: eors r1, r2 -; CHECK-T2-NEXT: ands.w r1, r1, r12 +; CHECK-T2-NEXT: tst.w r12, r1 +; CHECK-T2-NEXT: mov.w r1, #-2147483648 ; CHECK-T2-NEXT: it mi ; CHECK-T2-NEXT: asrmi r0, r2, #31 -; CHECK-T2-NEXT: mov.w r1, #-2147483648 ; CHECK-T2-NEXT: it mi ; CHECK-T2-NEXT: eormi.w r2, r1, r2, asr #31 ; CHECK-T2-NEXT: mov r1, r2 @@ -109,9 +109,9 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; CHECK-ARM-NEXT: eor r12, r1, r3 ; CHECK-ARM-NEXT: sbc r2, r1, r3 ; CHECK-ARM-NEXT: eor r1, r1, r2 -; CHECK-ARM-NEXT: ands r1, r12, r1 -; CHECK-ARM-NEXT: asrmi r0, r2, #31 +; CHECK-ARM-NEXT: tst r12, r1 ; CHECK-ARM-NEXT: mov r1, #-2147483648 +; CHECK-ARM-NEXT: asrmi r0, r2, #31 ; CHECK-ARM-NEXT: eormi r2, r1, r2, asr #31 ; CHECK-ARM-NEXT: mov r1, r2 ; CHECK-ARM-NEXT: bx lr diff --git a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll index adf6cafc6ccb8..1a4ed431d803a 100644 --- a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll @@ -92,10 +92,10 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; CHECK-T2-NEXT: sbc.w r2, r1, r12 ; CHECK-T2-NEXT: eor.w r3, r1, r12 ; CHECK-T2-NEXT: eors r1, r2 -; CHECK-T2-NEXT: ands r1, r3 +; CHECK-T2-NEXT: tst r3, r1 +; CHECK-T2-NEXT: mov.w r1, #-2147483648 ; CHECK-T2-NEXT: it mi ; CHECK-T2-NEXT: asrmi r0, r2, #31 -; CHECK-T2-NEXT: mov.w r1, #-2147483648 ; CHECK-T2-NEXT: it mi ; CHECK-T2-NEXT: eormi.w r2, r1, r2, asr #31 ; CHECK-T2-NEXT: mov r1, r2 @@ -109,9 +109,9 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; CHECK-ARM-NEXT: eor r3, r1, r2 ; CHECK-ARM-NEXT: sbc r2, r1, r2 ; CHECK-ARM-NEXT: eor r1, r1, r2 -; CHECK-ARM-NEXT: ands r1, r3, r1 -; CHECK-ARM-NEXT: asrmi r0, r2, #31 +; CHECK-ARM-NEXT: tst r3, r1 ; CHECK-ARM-NEXT: mov r1, #-2147483648 +; CHECK-ARM-NEXT: asrmi r0, r2, #31 ; CHECK-ARM-NEXT: eormi r2, r1, r2, asr #31 ; CHECK-ARM-NEXT: mov r1, r2 ; CHECK-ARM-NEXT: bx lr diff --git a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll index fb966c29f39a2..a804b594476ad 100644 --- a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll +++ b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll @@ -128,7 +128,7 @@ define float @float_sel(i32 %a, i32 %b, float %x, float %y) { ; CHECK-V7-LABEL: float_sel: ; CHECK-V7: @ %bb.0: @ %entry ; CHECK-V7-NEXT: vmov s2, r2 -; CHECK-V7-NEXT: subs r0, r0, r1 +; CHECK-V7-NEXT: cmp r0, r1 ; CHECK-V7-NEXT: vmov s0, r3 ; CHECK-V7-NEXT: vmoveq.f32 s0, s2 ; CHECK-V7-NEXT: vmov r0, s0 @@ -136,7 +136,7 @@ define float @float_sel(i32 %a, i32 %b, float %x, float %y) { ; ; CHECK-V8-LABEL: float_sel: ; CHECK-V8: @ %bb.0: @ %entry -; CHECK-V8-NEXT: subs r0, r0, r1 +; CHECK-V8-NEXT: cmp r0, r1 ; CHECK-V8-NEXT: vmov s0, r3 ; CHECK-V8-NEXT: vmov s2, r2 ; CHECK-V8-NEXT: vseleq.f32 s0, s2, s0 @@ -154,7 +154,7 @@ define double @double_sel(i32 %a, i32 %b, double %x, double %y) { ; CHECK-V7: @ %bb.0: @ %entry ; CHECK-V7-NEXT: vmov d17, r2, r3 ; CHECK-V7-NEXT: vldr d16, [sp] -; CHECK-V7-NEXT: subs r0, r0, r1 +; CHECK-V7-NEXT: cmp r0, r1 ; CHECK-V7-NEXT: vmoveq.f64 d16, d17 ; CHECK-V7-NEXT: vmov r0, r1, d16 ; CHECK-V7-NEXT: bx lr @@ -163,7 +163,7 @@ define double @double_sel(i32 %a, i32 %b, double %x, double %y) { ; CHECK-V8: @ %bb.0: @ %entry ; CHECK-V8-NEXT: vldr d16, [sp] ; CHECK-V8-NEXT: vmov d17, r2, r3 -; CHECK-V8-NEXT: subs r0, r0, r1 +; CHECK-V8-NEXT: cmp r0, r1 ; CHECK-V8-NEXT: vseleq.f64 d16, d17, d16 ; CHECK-V8-NEXT: vmov r0, r1, d16 ; CHECK-V8-NEXT: bx lr diff --git a/llvm/test/CodeGen/ARM/ucmp.ll b/llvm/test/CodeGen/ARM/ucmp.ll index bb0201454d1ea..5f92c0bbde952 100644 --- a/llvm/test/CodeGen/ARM/ucmp.ll +++ b/llvm/test/CodeGen/ARM/ucmp.ll @@ -39,12 +39,12 @@ define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: subs lr, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: mov r12, #0 ; CHECK-NEXT: sbcs lr, r1, r3 ; CHECK-NEXT: mov lr, #0 ; CHECK-NEXT: movwlo lr, #1 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs r0, r3, r1 ; CHECK-NEXT: movwlo r12, #1 ; CHECK-NEXT: sub r0, r12, lr @@ -61,7 +61,7 @@ define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind { ; CHECK-NEXT: ldr r4, [sp, #24] ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: ldr r6, [sp, #28] -; CHECK-NEXT: subs r7, r0, r4 +; CHECK-NEXT: cmp r0, r4 ; CHECK-NEXT: ldr r12, [sp, #32] ; CHECK-NEXT: sbcs r7, r1, r6 ; CHECK-NEXT: ldr lr, [sp, #36] @@ -69,7 +69,7 @@ define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind { ; CHECK-NEXT: sbcs r7, r3, lr ; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: movwlo r7, #1 -; CHECK-NEXT: subs r0, r4, r0 +; CHECK-NEXT: cmp r4, r0 ; CHECK-NEXT: sbcs r0, r6, r1 ; CHECK-NEXT: sbcs r0, r12, r2 ; CHECK-NEXT: sbcs r0, lr, r3 @@ -96,12 +96,12 @@ define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: subs lr, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: mov r12, #0 ; CHECK-NEXT: sbcs lr, r1, r3 ; CHECK-NEXT: mov lr, #0 ; CHECK-NEXT: movwlo lr, #1 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs r0, r3, r1 ; CHECK-NEXT: movwlo r12, #1 ; CHECK-NEXT: sub r0, r12, lr @@ -115,12 +115,12 @@ define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: subs lr, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: mov r12, #0 ; CHECK-NEXT: sbcs lr, r1, r3 ; CHECK-NEXT: mov lr, #0 ; CHECK-NEXT: movwlo lr, #1 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs r0, r3, r1 ; CHECK-NEXT: movwlo r12, #1 ; CHECK-NEXT: sub r0, r12, lr diff --git a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll index b85cb3a4f191c..45d910b7726de 100644 --- a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll @@ -633,7 +633,7 @@ define i1 @test_urem_larger(i63 %X) nounwind { ; ARM5-NEXT: ldr r3, .LCPI5_2 ; ARM5-NEXT: bic r1, r0, #-2147483648 ; ARM5-NEXT: mov r0, #0 -; ARM5-NEXT: subs r2, r2, r3 +; ARM5-NEXT: cmp r2, r3 ; ARM5-NEXT: sbcs r1, r1, #1 ; ARM5-NEXT: movlo r0, #1 ; ARM5-NEXT: pop {r4, pc} @@ -661,7 +661,7 @@ define i1 @test_urem_larger(i63 %X) nounwind { ; ARM6-NEXT: ldr r3, .LCPI5_2 ; ARM6-NEXT: bic r1, r0, #-2147483648 ; ARM6-NEXT: mov r0, #0 -; ARM6-NEXT: subs r2, r2, r3 +; ARM6-NEXT: cmp r2, r3 ; ARM6-NEXT: sbcs r1, r1, #1 ; ARM6-NEXT: movlo r0, #1 ; ARM6-NEXT: pop {r11, pc} @@ -691,7 +691,7 @@ define i1 @test_urem_larger(i63 %X) nounwind { ; ARM7-NEXT: movw r3, #24026 ; ARM7-NEXT: bic r1, r0, #-2147483648 ; ARM7-NEXT: movt r3, #48461 -; ARM7-NEXT: subs r2, r2, r3 +; ARM7-NEXT: cmp r2, r3 ; ARM7-NEXT: mov r0, #0 ; ARM7-NEXT: sbcs r1, r1, #1 ; ARM7-NEXT: movwlo r0, #1 @@ -714,7 +714,7 @@ define i1 @test_urem_larger(i63 %X) nounwind { ; ARM8-NEXT: movw r3, #24026 ; ARM8-NEXT: bic r1, r0, #-2147483648 ; ARM8-NEXT: movt r3, #48461 -; ARM8-NEXT: subs r2, r2, r3 +; ARM8-NEXT: cmp r2, r3 ; ARM8-NEXT: mov r0, #0 ; ARM8-NEXT: sbcs r1, r1, #1 ; ARM8-NEXT: movwlo r0, #1 @@ -737,7 +737,7 @@ define i1 @test_urem_larger(i63 %X) nounwind { ; NEON7-NEXT: movw r3, #24026 ; NEON7-NEXT: bic r1, r0, #-2147483648 ; NEON7-NEXT: movt r3, #48461 -; NEON7-NEXT: subs r2, r2, r3 +; NEON7-NEXT: cmp r2, r3 ; NEON7-NEXT: mov r0, #0 ; NEON7-NEXT: sbcs r1, r1, #1 ; NEON7-NEXT: movwlo r0, #1 @@ -760,7 +760,7 @@ define i1 @test_urem_larger(i63 %X) nounwind { ; NEON8-NEXT: movw r3, #24026 ; NEON8-NEXT: bic r1, r0, #-2147483648 ; NEON8-NEXT: movt r3, #48461 -; NEON8-NEXT: subs r2, r2, r3 +; NEON8-NEXT: cmp r2, r3 ; NEON8-NEXT: mov r0, #0 ; NEON8-NEXT: sbcs r1, r1, #1 ; NEON8-NEXT: movwlo r0, #1 diff --git a/llvm/test/CodeGen/ARM/vector-trunc.ll b/llvm/test/CodeGen/ARM/vector-trunc.ll index 9acf463c2be93..3d8c3d75fa522 100644 --- a/llvm/test/CodeGen/ARM/vector-trunc.ll +++ b/llvm/test/CodeGen/ARM/vector-trunc.ll @@ -5,7 +5,7 @@ define i32 @test(i64 %arg1) { ; LE-LABEL: test: ; LE: @ %bb.0: @ %entry -; LE-NEXT: subs r0, r0, #1 +; LE-NEXT: cmp r0, #1 ; LE-NEXT: mov r2, #0 ; LE-NEXT: sbcs r0, r1, #0 ; LE-NEXT: vldr s0, .LCPI0_0 @@ -25,7 +25,7 @@ define i32 @test(i64 %arg1) { ; ; BE-LABEL: test: ; BE: @ %bb.0: @ %entry -; BE-NEXT: subs r1, r1, #1 +; BE-NEXT: cmp r1, #1 ; BE-NEXT: mov r2, #0 ; BE-NEXT: sbcs r0, r0, #0 ; BE-NEXT: vldr s0, .LCPI0_0 diff --git a/llvm/test/CodeGen/ARM/vicmp-64.ll b/llvm/test/CodeGen/ARM/vicmp-64.ll index 5ea4c1005fd23..f1d0a94142103 100644 --- a/llvm/test/CodeGen/ARM/vicmp-64.ll +++ b/llvm/test/CodeGen/ARM/vicmp-64.ll @@ -1,17 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=arm -mattr=+neon %s -o - | FileCheck %s ; Check codegen for 64-bit icmp operations, which don't directly map to any ; instruction. define <2 x i64> @vne(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vne: -;CHECK: vceq.i32 -;CHECK-NEXT: vrev64.32 -;CHECK-NEXT: vand -;CHECK-NEXT: vmvn -;CHECK-NEXT: vmov -;CHECK-NEXT: vmov -;CHECK-NEXT: mov pc, lr +; CHECK-LABEL: vne: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vceq.i32 q8, q9, q8 +; CHECK-NEXT: vrev64.32 q9, q8 +; CHECK-NEXT: vand q8, q8, q9 +; CHECK-NEXT: vmvn q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %tmp3 = icmp ne <2 x i64> %tmp1, %tmp2 @@ -20,13 +24,16 @@ define <2 x i64> @vne(ptr %A, ptr %B) nounwind { } define <2 x i64> @veq(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: veq: -;CHECK: vceq.i32 -;CHECK-NEXT: vrev64.32 -;CHECK-NEXT: vand -;CHECK-NEXT: vmov -;CHECK-NEXT: vmov -;CHECK-NEXT: mov pc, lr +; CHECK-LABEL: veq: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vceq.i32 q8, q9, q8 +; CHECK-NEXT: vrev64.32 q9, q8 +; CHECK-NEXT: vand q8, q8, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %tmp3 = icmp eq <2 x i64> %tmp1, %tmp2 @@ -39,11 +46,31 @@ define <2 x i64> @veq(ptr %A, ptr %B) nounwind { ; would come out to roughly 6 instructions, but we currently ; scalarize it. define <2 x i64> @vult(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vult: -;CHECK: subs -;CHECK: sbcs -;CHECK: subs -;CHECK: sbcs +; CHECK-LABEL: vult: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vmov r0, r12, d16 +; CHECK-NEXT: vmov lr, r4, d17 +; CHECK-NEXT: vmov r3, r1, d18 +; CHECK-NEXT: vmov r5, r6, d19 +; CHECK-NEXT: cmp r3, r0 +; CHECK-NEXT: sbcs r0, r1, r12 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movlo r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: cmp r5, lr +; CHECK-NEXT: sbcs r1, r6, r4 +; CHECK-NEXT: movlo r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: mov r3, r2 +; CHECK-NEXT: pop {r4, r5, r6, lr} +; CHECK-NEXT: mov pc, lr %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i64>, ptr %B %tmp3 = icmp ult <2 x i64> %tmp1, %tmp2 diff --git a/llvm/test/CodeGen/ARM/vselect_imax.ll b/llvm/test/CodeGen/ARM/vselect_imax.ll index bd5e3061f0d18..f07c0336b3c7e 100644 --- a/llvm/test/CodeGen/ARM/vselect_imax.ll +++ b/llvm/test/CodeGen/ARM/vselect_imax.ll @@ -121,22 +121,22 @@ define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: vmov lr, r12, d18 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: vmov r2, r1, d20 -; CHECK-NEXT: subs r2, r2, lr +; CHECK-NEXT: cmp r2, lr ; CHECK-NEXT: vmov r2, r5, d22 ; CHECK-NEXT: sbcs r1, r1, r12 ; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: movlt r1, #1 ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: mvnne r1, #0 -; CHECK-NEXT: subs r2, r2, r4 -; CHECK-NEXT: sbcs r6, r5, r6 +; CHECK-NEXT: cmp r2, r4 ; CHECK-NEXT: vmov r2, r12, d17 +; CHECK-NEXT: sbcs r6, r5, r6 ; CHECK-NEXT: vmov r5, r4, d23 ; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: movlt r6, #1 ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: mvnne r6, #0 -; CHECK-NEXT: subs r2, r5, r2 +; CHECK-NEXT: cmp r5, r2 ; CHECK-NEXT: sbcs r2, r4, r12 ; CHECK-NEXT: vmov lr, r12, d19 ; CHECK-NEXT: vmov r4, r5, d21 @@ -147,7 +147,7 @@ define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: vdup.32 d25, r2 ; CHECK-NEXT: vdup.32 d24, r6 ; CHECK-NEXT: vbit q8, q11, q12 -; CHECK-NEXT: subs r4, r4, lr +; CHECK-NEXT: cmp r4, lr ; CHECK-NEXT: sbcs r5, r5, r12 ; CHECK-NEXT: movlt r0, #1 ; CHECK-NEXT: cmp r0, #0 @@ -188,7 +188,7 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128] ; CHECK-NEXT: vmov r0, r12, d16 ; CHECK-NEXT: vmov r1, r2, d18 -; CHECK-NEXT: subs r0, r1, r0 +; CHECK-NEXT: cmp r1, r0 ; CHECK-NEXT: vmov r1, r4, d25 ; CHECK-NEXT: sbcs r0, r2, r12 ; CHECK-NEXT: mov r12, #0 @@ -196,7 +196,7 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: movlt r12, #1 ; CHECK-NEXT: cmp r12, #0 ; CHECK-NEXT: mvnne r12, #0 -; CHECK-NEXT: subs r1, r1, r2 +; CHECK-NEXT: cmp r1, r2 ; CHECK-NEXT: sbcs r0, r4, r0 ; CHECK-NEXT: vmov r2, r4, d24 ; CHECK-NEXT: mov r0, #0 @@ -205,7 +205,7 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: mvnne r0, #0 ; CHECK-NEXT: vdup.32 d1, r0 ; CHECK-NEXT: vmov r0, r1, d20 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs r0, r4, r1 ; CHECK-NEXT: vmov r2, r4, d26 ; CHECK-NEXT: mov r0, #0 @@ -214,7 +214,7 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: mvnne r0, #0 ; CHECK-NEXT: vdup.32 d0, r0 ; CHECK-NEXT: vmov r0, r1, d22 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: sbcs r0, r4, r1 ; CHECK-NEXT: vmov r4, r5, d31 @@ -222,7 +222,7 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: movlt r2, #1 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: subs r0, r4, r0 +; CHECK-NEXT: cmp r4, r0 ; CHECK-NEXT: sbcs r0, r5, r1 ; CHECK-NEXT: vmov r4, r5, d30 ; CHECK-NEXT: mov r0, #0 @@ -231,7 +231,7 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: mvnne r0, #0 ; CHECK-NEXT: vdup.32 d3, r0 ; CHECK-NEXT: vmov r0, r1, d28 -; CHECK-NEXT: subs r0, r4, r0 +; CHECK-NEXT: cmp r4, r0 ; CHECK-NEXT: sbcs r0, r5, r1 ; CHECK-NEXT: vmov r4, r5, d27 ; CHECK-NEXT: mov r0, #0 @@ -242,7 +242,7 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: vmov r0, r1, d23 ; CHECK-NEXT: vbit q14, q15, q1 ; CHECK-NEXT: vbit q10, q12, q0 -; CHECK-NEXT: subs r0, r4, r0 +; CHECK-NEXT: cmp r4, r0 ; CHECK-NEXT: sbcs r0, r5, r1 ; CHECK-NEXT: vmov r1, r4, d17 ; CHECK-NEXT: vmov r5, r6, d19 @@ -254,7 +254,7 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: vdup.32 d30, r2 ; CHECK-NEXT: vbit q11, q13, q15 ; CHECK-NEXT: vst1.64 {d28, d29}, [r3:128]! -; CHECK-NEXT: subs r1, r5, r1 +; CHECK-NEXT: cmp r5, r1 ; CHECK-NEXT: sbcs r1, r6, r4 ; CHECK-NEXT: movlt lr, #1 ; CHECK-NEXT: cmp lr, #0 @@ -298,7 +298,7 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128]! ; CHECK-NEXT: vld1.64 {d22, d23}, [r8:128]! ; CHECK-NEXT: vld1.64 {d0, d1}, [lr:128]! -; CHECK-NEXT: subs r4, r6, r4 +; CHECK-NEXT: cmp r6, r4 ; CHECK-NEXT: sbcs r4, r7, r5 ; CHECK-NEXT: vmov r5, r6, d16 ; CHECK-NEXT: vmov r7, r2, d24 @@ -307,7 +307,7 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: mvnne r4, #0 ; CHECK-NEXT: vdup.32 d27, r4 -; CHECK-NEXT: subs r5, r7, r5 +; CHECK-NEXT: cmp r7, r5 ; CHECK-NEXT: sbcs r2, r2, r6 ; CHECK-NEXT: vmov r5, r6, d1 ; CHECK-NEXT: mov r2, #0 @@ -320,7 +320,7 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]! ; CHECK-NEXT: vld1.64 {d26, d27}, [r1:128]! ; CHECK-NEXT: vld1.64 {d28, d29}, [lr:128]! -; CHECK-NEXT: subs r2, r5, r2 +; CHECK-NEXT: cmp r5, r2 ; CHECK-NEXT: sbcs r2, r6, r4 ; CHECK-NEXT: vmov r4, r5, d22 ; CHECK-NEXT: vmov r6, r7, d0 @@ -329,7 +329,7 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: mvnne r2, #0 ; CHECK-NEXT: vdup.32 d3, r2 -; CHECK-NEXT: subs r4, r6, r4 +; CHECK-NEXT: cmp r6, r4 ; CHECK-NEXT: sbcs r4, r7, r5 ; CHECK-NEXT: vmov r2, r5, d27 ; CHECK-NEXT: vmov r6, r7, d25 @@ -338,7 +338,7 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: mvnne r4, #0 ; CHECK-NEXT: vdup.32 d2, r4 -; CHECK-NEXT: subs r2, r6, r2 +; CHECK-NEXT: cmp r6, r2 ; CHECK-NEXT: sbcs r2, r7, r5 ; CHECK-NEXT: vmov r6, r7, d24 ; CHECK-NEXT: mov r2, #0 @@ -347,7 +347,7 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: mvnne r2, #0 ; CHECK-NEXT: vdup.32 d5, r2 ; CHECK-NEXT: vmov r2, r5, d26 -; CHECK-NEXT: subs r2, r6, r2 +; CHECK-NEXT: cmp r6, r2 ; CHECK-NEXT: sbcs r2, r7, r5 ; CHECK-NEXT: vmov r6, r7, d19 ; CHECK-NEXT: mov r2, #0 @@ -356,7 +356,7 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: mvnne r2, #0 ; CHECK-NEXT: vdup.32 d4, r2 ; CHECK-NEXT: vmov r2, r5, d21 -; CHECK-NEXT: subs r2, r6, r2 +; CHECK-NEXT: cmp r6, r2 ; CHECK-NEXT: sbcs r2, r7, r5 ; CHECK-NEXT: vmov r6, r7, d18 ; CHECK-NEXT: mov r2, #0 @@ -365,7 +365,7 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: mvnne r2, #0 ; CHECK-NEXT: vdup.32 d31, r2 ; CHECK-NEXT: vmov r2, r5, d20 -; CHECK-NEXT: subs r2, r6, r2 +; CHECK-NEXT: cmp r6, r2 ; CHECK-NEXT: sbcs r2, r7, r5 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movlt r2, #1 @@ -385,14 +385,14 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: vld1.64 {d6, d7}, [r0:128]! ; CHECK-NEXT: vld1.64 {d0, d1}, [r1:128] ; CHECK-NEXT: vld1.64 {d2, d3}, [r0:128] -; CHECK-NEXT: subs r1, r6, r2 +; CHECK-NEXT: cmp r6, r2 ; CHECK-NEXT: vmov r0, r6, d2 ; CHECK-NEXT: sbcs r1, r5, r7 ; CHECK-NEXT: vmov r2, r7, d0 ; CHECK-NEXT: movlt lr, #1 ; CHECK-NEXT: cmp lr, #0 ; CHECK-NEXT: mvnne lr, #0 -; CHECK-NEXT: subs r0, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: sbcs r0, r6, r7 ; CHECK-NEXT: vmov r2, r7, d30 ; CHECK-NEXT: vmov r6, r5, d28 @@ -400,7 +400,7 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: movlt r0, #1 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: subs r2, r6, r2 +; CHECK-NEXT: cmp r6, r2 ; CHECK-NEXT: sbcs r2, r5, r7 ; CHECK-NEXT: vmov r7, r6, d31 ; CHECK-NEXT: vmov r5, r4, d29 @@ -408,7 +408,7 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: movlt r2, #1 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: subs r7, r5, r7 +; CHECK-NEXT: cmp r5, r7 ; CHECK-NEXT: vmov r5, r1, d7 ; CHECK-NEXT: sbcs r7, r4, r6 ; CHECK-NEXT: mov r4, #0 @@ -416,7 +416,7 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: movlt r4, #1 ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: mvnne r4, #0 -; CHECK-NEXT: subs r5, r5, r7 +; CHECK-NEXT: cmp r5, r7 ; CHECK-NEXT: sbcs r1, r1, r6 ; CHECK-NEXT: vmov r6, r7, d6 ; CHECK-NEXT: mov r1, #0 @@ -425,7 +425,7 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: mvnne r1, #0 ; CHECK-NEXT: vdup.32 d9, r1 ; CHECK-NEXT: vmov r1, r5, d4 -; CHECK-NEXT: subs r1, r6, r1 +; CHECK-NEXT: cmp r6, r1 ; CHECK-NEXT: sbcs r1, r7, r5 ; CHECK-NEXT: vmov r6, r7, d3 ; CHECK-NEXT: mov r1, #0 @@ -437,7 +437,7 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: vbit q2, q3, q4 ; CHECK-NEXT: vdup.32 d9, r4 ; CHECK-NEXT: vdup.32 d8, r2 -; CHECK-NEXT: subs r1, r6, r1 +; CHECK-NEXT: cmp r6, r1 ; CHECK-NEXT: sbcs r1, r7, r5 ; CHECK-NEXT: vmov r5, r6, d24 ; CHECK-NEXT: mov r1, #0 @@ -447,19 +447,19 @@ define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: vdup.32 d7, r1 ; CHECK-NEXT: vmov r1, r4, d20 ; CHECK-NEXT: vdup.32 d6, r0 -; CHECK-NEXT: subs r1, r5, r1 +; CHECK-NEXT: cmp r5, r1 ; CHECK-NEXT: mov r1, r3 ; CHECK-NEXT: sbcs r0, r6, r4 ; CHECK-NEXT: vst1.64 {d16, d17}, [r1:128]! ; CHECK-NEXT: vorr q8, q4, q4 ; CHECK-NEXT: movlt r12, #1 ; CHECK-NEXT: cmp r12, #0 +; CHECK-NEXT: add r0, r3, #64 ; CHECK-NEXT: vbsl q8, q14, q15 ; CHECK-NEXT: vdup.32 d29, lr ; CHECK-NEXT: vorr q15, q3, q3 ; CHECK-NEXT: mvnne r12, #0 ; CHECK-NEXT: vdup.32 d28, r12 -; CHECK-NEXT: add r0, r3, #64 ; CHECK-NEXT: vbsl q15, q1, q0 ; CHECK-NEXT: vst1.64 {d26, d27}, [r1:128]! ; CHECK-NEXT: vbit q10, q12, q14 diff --git a/llvm/test/CodeGen/ARM/wide-compares.ll b/llvm/test/CodeGen/ARM/wide-compares.ll index 9acf8d249ddf1..5baefdee063ab 100644 --- a/llvm/test/CodeGen/ARM/wide-compares.ll +++ b/llvm/test/CodeGen/ARM/wide-compares.ll @@ -7,7 +7,7 @@ define i32 @test_slt1(i64 %a, i64 %b) { ; CHECK-ARM-LABEL: test_slt1: ; CHECK-ARM: @ %bb.0: @ %entry -; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: cmp r0, r2 ; CHECK-ARM-NEXT: mov r12, #2 ; CHECK-ARM-NEXT: sbcs r0, r1, r3 ; CHECK-ARM-NEXT: movwlt r12, #1 @@ -40,7 +40,7 @@ define i32 @test_slt1(i64 %a, i64 %b) { ; ; CHECK-THUMB2-LABEL: test_slt1: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: subs r0, r0, r2 +; CHECK-THUMB2-NEXT: cmp r0, r2 ; CHECK-THUMB2-NEXT: mov.w r12, #2 ; CHECK-THUMB2-NEXT: sbcs.w r0, r1, r3 ; CHECK-THUMB2-NEXT: it lt @@ -60,7 +60,7 @@ define void @test_slt2(i64 %a, i64 %b) { ; CHECK-ARM-LABEL: test_slt2: ; CHECK-ARM: @ %bb.0: @ %entry ; CHECK-ARM-NEXT: push {r11, lr} -; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: cmp r0, r2 ; CHECK-ARM-NEXT: sbcs r0, r1, r3 ; CHECK-ARM-NEXT: bge .LBB1_2 ; CHECK-ARM-NEXT: @ %bb.1: @ %bb1 @@ -103,7 +103,7 @@ define void @test_slt2(i64 %a, i64 %b) { ; CHECK-THUMB2-LABEL: test_slt2: ; CHECK-THUMB2: @ %bb.0: @ %entry ; CHECK-THUMB2-NEXT: push {r7, lr} -; CHECK-THUMB2-NEXT: subs r0, r0, r2 +; CHECK-THUMB2-NEXT: cmp r0, r2 ; CHECK-THUMB2-NEXT: sbcs.w r0, r1, r3 ; CHECK-THUMB2-NEXT: bge .LBB1_2 ; CHECK-THUMB2-NEXT: @ %bb.1: @ %bb1 @@ -129,16 +129,16 @@ declare void @g() define i64 @test_slt_select(i64 %c, i64 %d, i64 %a, i64 %b) { ; CHECK-ARM-LABEL: test_slt_select: ; CHECK-ARM: @ %bb.0: @ %entry -; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r11, lr} ; CHECK-ARM-NEXT: ldr r12, [sp, #24] ; CHECK-ARM-NEXT: ldr lr, [sp, #16] -; CHECK-ARM-NEXT: ldr r6, [sp, #28] +; CHECK-ARM-NEXT: ldr r4, [sp, #28] ; CHECK-ARM-NEXT: ldr r5, [sp, #20] -; CHECK-ARM-NEXT: subs r4, lr, r12 -; CHECK-ARM-NEXT: sbcs r6, r5, r6 +; CHECK-ARM-NEXT: cmp lr, r12 +; CHECK-ARM-NEXT: sbcs r5, r5, r4 ; CHECK-ARM-NEXT: movhs r0, r2 ; CHECK-ARM-NEXT: movhs r1, r3 -; CHECK-ARM-NEXT: pop {r4, r5, r6, pc} +; CHECK-ARM-NEXT: pop {r4, r5, r11, pc} ; ; CHECK-THUMB1-NOMOV-LABEL: test_slt_select: ; CHECK-THUMB1-NOMOV: @ %bb.0: @ %entry @@ -192,15 +192,15 @@ define i64 @test_slt_select(i64 %c, i64 %d, i64 %a, i64 %b) { ; ; CHECK-THUMB2-LABEL: test_slt_select: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: push {r4, r5, r6, lr} -; CHECK-THUMB2-NEXT: ldrd r12, r6, [sp, #24] +; CHECK-THUMB2-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB2-NEXT: ldrd r12, r4, [sp, #24] ; CHECK-THUMB2-NEXT: ldrd lr, r5, [sp, #16] -; CHECK-THUMB2-NEXT: subs.w r4, lr, r12 -; CHECK-THUMB2-NEXT: sbcs.w r6, r5, r6 +; CHECK-THUMB2-NEXT: cmp lr, r12 +; CHECK-THUMB2-NEXT: sbcs r5, r4 ; CHECK-THUMB2-NEXT: itt hs ; CHECK-THUMB2-NEXT: movhs r0, r2 ; CHECK-THUMB2-NEXT: movhs r1, r3 -; CHECK-THUMB2-NEXT: pop {r4, r5, r6, pc} +; CHECK-THUMB2-NEXT: pop {r4, r5, r7, pc} entry: %cmp = icmp ult i64 %a, %b %r1 = select i1 %cmp, i64 %c, i64 %d @@ -213,7 +213,7 @@ define {i32, i32} @test_slt_not(i32 %c, i32 %d, i64 %a, i64 %b) { ; CHECK-ARM-NEXT: ldr r12, [sp] ; CHECK-ARM-NEXT: mov r1, #0 ; CHECK-ARM-NEXT: ldr r0, [sp, #4] -; CHECK-ARM-NEXT: subs r2, r2, r12 +; CHECK-ARM-NEXT: cmp r2, r12 ; CHECK-ARM-NEXT: sbcs r0, r3, r0 ; CHECK-ARM-NEXT: mov r0, #0 ; CHECK-ARM-NEXT: movwge r1, #1 @@ -272,7 +272,7 @@ define {i32, i32} @test_slt_not(i32 %c, i32 %d, i64 %a, i64 %b) { ; CHECK-THUMB2-NEXT: ldr.w r12, [sp] ; CHECK-THUMB2-NEXT: movs r1, #0 ; CHECK-THUMB2-NEXT: ldr r0, [sp, #4] -; CHECK-THUMB2-NEXT: subs.w r2, r2, r12 +; CHECK-THUMB2-NEXT: cmp r2, r12 ; CHECK-THUMB2-NEXT: sbcs.w r0, r3, r0 ; CHECK-THUMB2-NEXT: mov.w r0, #0 ; CHECK-THUMB2-NEXT: ite lt diff --git a/llvm/test/CodeGen/Thumb/cmp-add-fold.ll b/llvm/test/CodeGen/Thumb/cmp-add-fold.ll index 4dc0bc70440dd..81352c0549e05 100644 --- a/llvm/test/CodeGen/Thumb/cmp-add-fold.ll +++ b/llvm/test/CodeGen/Thumb/cmp-add-fold.ll @@ -16,7 +16,7 @@ define i32 @addri1(i32 %a, i32 %b) { ; ; T2-LABEL: addri1: ; T2: @ %bb.0: @ %entry -; T2-NEXT: adds r0, #3 +; T2-NEXT: cmn.w r0, #3 ; T2-NEXT: mov.w r0, #5 ; T2-NEXT: it eq ; T2-NEXT: moveq r0, #4 @@ -47,7 +47,7 @@ define i32 @addri2(i32 %a, i32 %b) { ; ; T2-LABEL: addri2: ; T2: @ %bb.0: @ %entry -; T2-NEXT: adds r0, #254 +; T2-NEXT: cmn.w r0, #254 ; T2-NEXT: mov.w r0, #5 ; T2-NEXT: it eq ; T2-NEXT: moveq r0, #4 diff --git a/llvm/test/CodeGen/Thumb/scmp.ll b/llvm/test/CodeGen/Thumb/scmp.ll index c0024492b3a6d..4eb8e1045803e 100644 --- a/llvm/test/CodeGen/Thumb/scmp.ll +++ b/llvm/test/CodeGen/Thumb/scmp.ll @@ -152,13 +152,13 @@ define i8 @scmp_8_64(i64 %x, i64 %y) nounwind { ; ; THUMB2-LABEL: scmp_8_64: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: subs.w r12, r0, r2 +; THUMB2-NEXT: cmp r0, r2 ; THUMB2-NEXT: mov.w r9, #0 ; THUMB2-NEXT: sbcs.w r12, r1, r3 ; THUMB2-NEXT: mov.w r12, #0 ; THUMB2-NEXT: it lt ; THUMB2-NEXT: movlt.w r12, #1 -; THUMB2-NEXT: subs r0, r2, r0 +; THUMB2-NEXT: cmp r2, r0 ; THUMB2-NEXT: sbcs.w r0, r3, r1 ; THUMB2-NEXT: it lt ; THUMB2-NEXT: movlt.w r9, #1 @@ -167,10 +167,10 @@ define i8 @scmp_8_64(i64 %x, i64 %y) nounwind { ; ; V81M-LABEL: scmp_8_64: ; V81M: @ %bb.0: -; V81M-NEXT: subs.w r12, r0, r2 +; V81M-NEXT: cmp r0, r2 ; V81M-NEXT: sbcs.w r12, r1, r3 ; V81M-NEXT: cset r12, lt -; V81M-NEXT: subs r0, r2, r0 +; V81M-NEXT: cmp r2, r0 ; V81M-NEXT: sbcs.w r0, r3, r1 ; V81M-NEXT: cset r0, lt ; V81M-NEXT: sub.w r0, r0, r12 @@ -231,14 +231,14 @@ define i8 @scmp_8_128(i128 %x, i128 %y) nounwind { ; THUMB2-NEXT: ldr r4, [sp, #28] ; THUMB2-NEXT: movs r5, #0 ; THUMB2-NEXT: ldm.w lr, {r9, r12, lr} -; THUMB2-NEXT: subs.w r6, r0, r9 +; THUMB2-NEXT: cmp r0, r9 ; THUMB2-NEXT: sbcs.w r6, r1, r12 ; THUMB2-NEXT: sbcs.w r6, r2, lr ; THUMB2-NEXT: sbcs.w r6, r3, r4 ; THUMB2-NEXT: mov.w r6, #0 ; THUMB2-NEXT: it lt ; THUMB2-NEXT: movlt r6, #1 -; THUMB2-NEXT: subs.w r0, r9, r0 +; THUMB2-NEXT: cmp r9, r0 ; THUMB2-NEXT: sbcs.w r0, r12, r1 ; THUMB2-NEXT: sbcs.w r0, lr, r2 ; THUMB2-NEXT: sbcs.w r0, r4, r3 @@ -253,12 +253,12 @@ define i8 @scmp_8_128(i128 %x, i128 %y) nounwind { ; V81M-NEXT: push {r4, r5, r6, lr} ; V81M-NEXT: ldrd r5, r4, [sp, #16] ; V81M-NEXT: ldrd lr, r12, [sp, #24] -; V81M-NEXT: subs r6, r0, r5 +; V81M-NEXT: cmp r0, r5 ; V81M-NEXT: sbcs.w r6, r1, r4 ; V81M-NEXT: sbcs.w r6, r2, lr ; V81M-NEXT: sbcs.w r6, r3, r12 ; V81M-NEXT: cset r6, lt -; V81M-NEXT: subs r0, r5, r0 +; V81M-NEXT: cmp r5, r0 ; V81M-NEXT: sbcs.w r0, r4, r1 ; V81M-NEXT: sbcs.w r0, lr, r2 ; V81M-NEXT: sbcs.w r0, r12, r3 @@ -336,13 +336,13 @@ define i32 @scmp_32_64(i64 %x, i64 %y) nounwind { ; ; THUMB2-LABEL: scmp_32_64: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: subs.w r12, r0, r2 +; THUMB2-NEXT: cmp r0, r2 ; THUMB2-NEXT: mov.w r9, #0 ; THUMB2-NEXT: sbcs.w r12, r1, r3 ; THUMB2-NEXT: mov.w r12, #0 ; THUMB2-NEXT: it lt ; THUMB2-NEXT: movlt.w r12, #1 -; THUMB2-NEXT: subs r0, r2, r0 +; THUMB2-NEXT: cmp r2, r0 ; THUMB2-NEXT: sbcs.w r0, r3, r1 ; THUMB2-NEXT: it lt ; THUMB2-NEXT: movlt.w r9, #1 @@ -351,10 +351,10 @@ define i32 @scmp_32_64(i64 %x, i64 %y) nounwind { ; ; V81M-LABEL: scmp_32_64: ; V81M: @ %bb.0: -; V81M-NEXT: subs.w r12, r0, r2 +; V81M-NEXT: cmp r0, r2 ; V81M-NEXT: sbcs.w r12, r1, r3 ; V81M-NEXT: cset r12, lt -; V81M-NEXT: subs r0, r2, r0 +; V81M-NEXT: cmp r2, r0 ; V81M-NEXT: sbcs.w r0, r3, r1 ; V81M-NEXT: cset r0, lt ; V81M-NEXT: sub.w r0, r0, r12 @@ -390,13 +390,13 @@ define i64 @scmp_64_64(i64 %x, i64 %y) nounwind { ; ; THUMB2-LABEL: scmp_64_64: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: subs.w r12, r0, r2 +; THUMB2-NEXT: cmp r0, r2 ; THUMB2-NEXT: mov.w r9, #0 ; THUMB2-NEXT: sbcs.w r12, r1, r3 ; THUMB2-NEXT: mov.w r12, #0 ; THUMB2-NEXT: it lt ; THUMB2-NEXT: movlt.w r12, #1 -; THUMB2-NEXT: subs r0, r2, r0 +; THUMB2-NEXT: cmp r2, r0 ; THUMB2-NEXT: sbcs.w r0, r3, r1 ; THUMB2-NEXT: it lt ; THUMB2-NEXT: movlt.w r9, #1 @@ -406,10 +406,10 @@ define i64 @scmp_64_64(i64 %x, i64 %y) nounwind { ; ; V81M-LABEL: scmp_64_64: ; V81M: @ %bb.0: -; V81M-NEXT: subs.w r12, r0, r2 +; V81M-NEXT: cmp r0, r2 ; V81M-NEXT: sbcs.w r12, r1, r3 ; V81M-NEXT: cset r12, lt -; V81M-NEXT: subs r0, r2, r0 +; V81M-NEXT: cmp r2, r0 ; V81M-NEXT: sbcs.w r0, r3, r1 ; V81M-NEXT: cset r0, lt ; V81M-NEXT: sub.w r0, r0, r12 diff --git a/llvm/test/CodeGen/Thumb/ucmp.ll b/llvm/test/CodeGen/Thumb/ucmp.ll index 5d0f57e2a9d72..eeeb11855f789 100644 --- a/llvm/test/CodeGen/Thumb/ucmp.ll +++ b/llvm/test/CodeGen/Thumb/ucmp.ll @@ -119,13 +119,13 @@ define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind { ; ; THUMB2-LABEL: ucmp_8_64: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: subs.w r12, r0, r2 +; THUMB2-NEXT: cmp r0, r2 ; THUMB2-NEXT: mov.w r9, #0 ; THUMB2-NEXT: sbcs.w r12, r1, r3 ; THUMB2-NEXT: mov.w r12, #0 ; THUMB2-NEXT: it lo ; THUMB2-NEXT: movlo.w r12, #1 -; THUMB2-NEXT: subs r0, r2, r0 +; THUMB2-NEXT: cmp r2, r0 ; THUMB2-NEXT: sbcs.w r0, r3, r1 ; THUMB2-NEXT: it lo ; THUMB2-NEXT: movlo.w r9, #1 @@ -134,10 +134,10 @@ define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind { ; ; V81M-LABEL: ucmp_8_64: ; V81M: @ %bb.0: -; V81M-NEXT: subs.w r12, r0, r2 +; V81M-NEXT: cmp r0, r2 ; V81M-NEXT: sbcs.w r12, r1, r3 ; V81M-NEXT: cset r12, lo -; V81M-NEXT: subs r0, r2, r0 +; V81M-NEXT: cmp r2, r0 ; V81M-NEXT: sbcs.w r0, r3, r1 ; V81M-NEXT: cset r0, lo ; V81M-NEXT: sub.w r0, r0, r12 @@ -198,14 +198,14 @@ define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind { ; THUMB2-NEXT: ldr r4, [sp, #28] ; THUMB2-NEXT: movs r5, #0 ; THUMB2-NEXT: ldm.w lr, {r9, r12, lr} -; THUMB2-NEXT: subs.w r6, r0, r9 +; THUMB2-NEXT: cmp r0, r9 ; THUMB2-NEXT: sbcs.w r6, r1, r12 ; THUMB2-NEXT: sbcs.w r6, r2, lr ; THUMB2-NEXT: sbcs.w r6, r3, r4 ; THUMB2-NEXT: mov.w r6, #0 ; THUMB2-NEXT: it lo ; THUMB2-NEXT: movlo r6, #1 -; THUMB2-NEXT: subs.w r0, r9, r0 +; THUMB2-NEXT: cmp r9, r0 ; THUMB2-NEXT: sbcs.w r0, r12, r1 ; THUMB2-NEXT: sbcs.w r0, lr, r2 ; THUMB2-NEXT: sbcs.w r0, r4, r3 @@ -220,12 +220,12 @@ define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind { ; V81M-NEXT: push {r4, r5, r6, lr} ; V81M-NEXT: ldrd r5, r4, [sp, #16] ; V81M-NEXT: ldrd lr, r12, [sp, #24] -; V81M-NEXT: subs r6, r0, r5 +; V81M-NEXT: cmp r0, r5 ; V81M-NEXT: sbcs.w r6, r1, r4 ; V81M-NEXT: sbcs.w r6, r2, lr ; V81M-NEXT: sbcs.w r6, r3, r12 ; V81M-NEXT: cset r6, lo -; V81M-NEXT: subs r0, r5, r0 +; V81M-NEXT: cmp r5, r0 ; V81M-NEXT: sbcs.w r0, r4, r1 ; V81M-NEXT: sbcs.w r0, lr, r2 ; V81M-NEXT: sbcs.w r0, r12, r3 @@ -292,13 +292,13 @@ define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind { ; ; THUMB2-LABEL: ucmp_32_64: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: subs.w r12, r0, r2 +; THUMB2-NEXT: cmp r0, r2 ; THUMB2-NEXT: mov.w r9, #0 ; THUMB2-NEXT: sbcs.w r12, r1, r3 ; THUMB2-NEXT: mov.w r12, #0 ; THUMB2-NEXT: it lo ; THUMB2-NEXT: movlo.w r12, #1 -; THUMB2-NEXT: subs r0, r2, r0 +; THUMB2-NEXT: cmp r2, r0 ; THUMB2-NEXT: sbcs.w r0, r3, r1 ; THUMB2-NEXT: it lo ; THUMB2-NEXT: movlo.w r9, #1 @@ -307,10 +307,10 @@ define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind { ; ; V81M-LABEL: ucmp_32_64: ; V81M: @ %bb.0: -; V81M-NEXT: subs.w r12, r0, r2 +; V81M-NEXT: cmp r0, r2 ; V81M-NEXT: sbcs.w r12, r1, r3 ; V81M-NEXT: cset r12, lo -; V81M-NEXT: subs r0, r2, r0 +; V81M-NEXT: cmp r2, r0 ; V81M-NEXT: sbcs.w r0, r3, r1 ; V81M-NEXT: cset r0, lo ; V81M-NEXT: sub.w r0, r0, r12 @@ -346,13 +346,13 @@ define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind { ; ; THUMB2-LABEL: ucmp_64_64: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: subs.w r12, r0, r2 +; THUMB2-NEXT: cmp r0, r2 ; THUMB2-NEXT: mov.w r9, #0 ; THUMB2-NEXT: sbcs.w r12, r1, r3 ; THUMB2-NEXT: mov.w r12, #0 ; THUMB2-NEXT: it lo ; THUMB2-NEXT: movlo.w r12, #1 -; THUMB2-NEXT: subs r0, r2, r0 +; THUMB2-NEXT: cmp r2, r0 ; THUMB2-NEXT: sbcs.w r0, r3, r1 ; THUMB2-NEXT: it lo ; THUMB2-NEXT: movlo.w r9, #1 @@ -362,10 +362,10 @@ define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind { ; ; V81M-LABEL: ucmp_64_64: ; V81M: @ %bb.0: -; V81M-NEXT: subs.w r12, r0, r2 +; V81M-NEXT: cmp r0, r2 ; V81M-NEXT: sbcs.w r12, r1, r3 ; V81M-NEXT: cset r12, lo -; V81M-NEXT: subs r0, r2, r0 +; V81M-NEXT: cmp r2, r0 ; V81M-NEXT: sbcs.w r0, r3, r1 ; V81M-NEXT: cset r0, lo ; V81M-NEXT: sub.w r0, r0, r12 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll index cbcbf1f392ce8..b2ee2b5d7dc10 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll @@ -180,7 +180,7 @@ define dso_local i32 @b(ptr %c, i32 %d, i32 %e, ptr %n) "frame-pointer"="all" { ; CHECK-NEXT: muls r1, r3, r1 ; CHECK-NEXT: adds r4, r4, r1 ; CHECK-NEXT: adc.w r1, r2, r1, asr #31 -; CHECK-NEXT: adds.w r2, r4, #-2147483648 +; CHECK-NEXT: cmn.w r4, #-2147483648 ; CHECK-NEXT: ldrd r2, r4, [r8] ; CHECK-NEXT: adc r5, r1, #0 ; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill diff --git a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll index bcd92f81911b2..d783ac318d602 100644 --- a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll +++ b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll @@ -14,12 +14,12 @@ define <2 x i64> @v2i64(i32 %index, i32 %TC, <2 x i64> %V1, <2 x i64> %V2) { ; CHECK-NEXT: vmov lr, r12, d2 ; CHECK-NEXT: adds r6, r0, #1 ; CHECK-NEXT: adc r4, r4, #0 -; CHECK-NEXT: subs.w r0, lr, #-1 +; CHECK-NEXT: cmp.w lr, #-1 ; CHECK-NEXT: vmov q1[2], q1[0], lr, r6 ; CHECK-NEXT: sbcs r0, r12, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r12, r4 ; CHECK-NEXT: csetm r12, lo -; CHECK-NEXT: subs.w r6, r6, #-1 +; CHECK-NEXT: cmp.w r6, #-1 ; CHECK-NEXT: bfi r5, r12, #0, #8 ; CHECK-NEXT: sbcs r6, r4, #0 ; CHECK-NEXT: mov.w r0, #0 @@ -31,14 +31,14 @@ define <2 x i64> @v2i64(i32 %index, i32 %TC, <2 x i64> %V1, <2 x i64> %V2) { ; CHECK-NEXT: vmov r1, r4, d0 ; CHECK-NEXT: vmov r6, r5, d2 ; CHECK-NEXT: vmov d0, r2, r3 -; CHECK-NEXT: subs r1, r6, r1 +; CHECK-NEXT: cmp r6, r1 ; CHECK-NEXT: sbcs.w r1, r5, r4 ; CHECK-NEXT: vmov r5, r4, d1 ; CHECK-NEXT: csetm r1, lo ; CHECK-NEXT: vldr d1, [sp, #16] ; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: vmov r1, r6, d3 -; CHECK-NEXT: subs r1, r1, r5 +; CHECK-NEXT: cmp r1, r5 ; CHECK-NEXT: sbcs.w r1, r6, r4 ; CHECK-NEXT: csetm r1, lo ; CHECK-NEXT: bfi r0, r1, #8, #8 diff --git a/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll b/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll index 886f8440325f3..d4b645acf7fb4 100644 --- a/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll +++ b/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s ; rdar://7387640 @@ -8,16 +9,35 @@ define void @t() nounwind optsize { ; CHECK-LABEL: t: -; CHECK: mov{{.*}}, #1000 +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r0, :lower16:(L_G$non_lazy_ptr-(LPC0_0+4)) +; CHECK-NEXT: mov.w r2, #1000 +; CHECK-NEXT: movt r0, :upper16:(L_G$non_lazy_ptr-(LPC0_0+4)) +; CHECK-NEXT: LPC0_0: +; CHECK-NEXT: add r0, pc +; CHECK-NEXT: ldr.w r9, [r0] +; CHECK-NEXT: ldr.w r1, [r9] +; CHECK-NEXT: movw r3, :lower16:(L_array$non_lazy_ptr-(LPC0_1+4)) +; CHECK-NEXT: movt r3, :upper16:(L_array$non_lazy_ptr-(LPC0_1+4)) +; CHECK-NEXT: LPC0_1: +; CHECK-NEXT: add r3, pc +; CHECK-NEXT: ldr r3, [r3] +; CHECK-NEXT: LBB0_1: @ %bb +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr r0, [r3] +; CHECK-NEXT: ldr.w r0, [r0, r2, lsl #2] +; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: cmn.w r2, #1 +; CHECK-NEXT: add r1, r0 +; CHECK-NEXT: str.w r1, [r9] +; CHECK-NEXT: bne LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %return +; CHECK-NEXT: bx lr entry: %.pre = load i32, ptr @G, align 4 ; [#uses=1] br label %bb bb: ; preds = %bb, %entry -; CHECK: LBB0_1: -; CHECK: subs [[R2:r[0-9]+]], #1 -; CHECK: adds {{.*}}, [[R2]], #1 -; CHECK: bne LBB0_1 %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; [#uses=1] %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; [#uses=2] diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll index d076cb00ad7e0..961662cf52138 100644 --- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll +++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll @@ -358,7 +358,7 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) { ; CHECK-NEXT: .pad #16 ; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: mov lr, r0 -; CHECK-NEXT: subs r0, #1 +; CHECK-NEXT: cmp r0, #1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: blt.w .LBB1_28 ; CHECK-NEXT: @ %bb.1: @ %for.cond2.preheader.lr.ph @@ -426,7 +426,7 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) { ; CHECK-NEXT: .LBB1_5: @ %for.cond.cleanup5 ; CHECK-NEXT: @ in Loop: Header=BB1_6 Depth=1 ; CHECK-NEXT: adds r5, #2 -; CHECK-NEXT: subs.w r1, r5, lr +; CHECK-NEXT: cmp r5, lr ; CHECK-NEXT: asr.w r0, r5, #31 ; CHECK-NEXT: sbcs.w r0, r0, r12 ; CHECK-NEXT: bge.w .LBB1_28 diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll index 742f2a75a1aa8..ff38cf8a02ee7 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -17,14 +17,14 @@ define arm_aapcs_vfpcc <2 x i32> @stest_f64i32(<2 x double> %x) { ; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: adr r3, .LCPI0_0 ; CHECK-NEXT: mvn r12, #-2147483648 +; CHECK-NEXT: adr r3, .LCPI0_0 +; CHECK-NEXT: cmp r4, r12 ; CHECK-NEXT: vldrw.u32 q0, [r3] -; CHECK-NEXT: subs.w r3, r4, r12 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 ; CHECK-NEXT: csetm r3, lt -; CHECK-NEXT: subs.w r0, r0, r12 +; CHECK-NEXT: cmp r0, r12 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 ; CHECK-NEXT: mov.w r5, #0 @@ -87,12 +87,12 @@ define arm_aapcs_vfpcc <2 x i32> @utest_f64i32(<2 x double> %x) { ; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: subs.w r3, r4, #-1 +; CHECK-NEXT: cmp.w r4, #-1 ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: mov.w r2, #0 ; CHECK-NEXT: csetm r3, lo -; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r2, r3, #0, #8 ; CHECK-NEXT: csetm r0, lo @@ -125,12 +125,12 @@ define arm_aapcs_vfpcc <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: subs.w r3, r4, #-1 +; CHECK-NEXT: cmp.w r4, #-1 ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 ; CHECK-NEXT: csetm r3, lt -; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: mov.w r5, #0 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r5, r3, #0, #8 @@ -359,26 +359,26 @@ define arm_aapcs_vfpcc <2 x i16> @stest_f64i16(<2 x double> %x) { ; CHECK-NEXT: movt lr, #65535 ; CHECK-NEXT: mov.w r12, #-1 ; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: subs r1, r1, r4 +; CHECK-NEXT: cmp r1, r4 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov.w r2, #0 ; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: cmp r3, r4 ; CHECK-NEXT: bfi r2, r1, #0, #8 -; CHECK-NEXT: subs r1, r3, r4 ; CHECK-NEXT: sbcs r1, r5, #0 -; CHECK-NEXT: adr r4, .LCPI9_1 ; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: adr r4, .LCPI9_1 ; CHECK-NEXT: bfi r2, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vldrw.u32 q1, [r4] ; CHECK-NEXT: vmov r1, r2, d0 ; CHECK-NEXT: vmov r3, r5, d1 -; CHECK-NEXT: subs.w r1, lr, r1 +; CHECK-NEXT: cmp lr, r1 ; CHECK-NEXT: sbcs.w r1, r12, r2 ; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: cmp lr, r3 ; CHECK-NEXT: bfi r0, r1, #0, #8 -; CHECK-NEXT: subs.w r1, lr, r3 ; CHECK-NEXT: sbcs.w r1, r12, r5 ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 @@ -429,11 +429,11 @@ define arm_aapcs_vfpcc <2 x i16> @utest_f64i16(<2 x double> %x) { ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov.i64 q1, #0xffff ; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: subs r0, r0, r4 +; CHECK-NEXT: cmp r0, r4 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: cmp r2, r4 ; CHECK-NEXT: bfi r5, r0, #0, #8 -; CHECK-NEXT: subs r0, r2, r4 ; CHECK-NEXT: sbcs r0, r3, #0 ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r5, r0, #8, #8 @@ -470,12 +470,12 @@ define arm_aapcs_vfpcc <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-NEXT: vmov r1, r2, d0 ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: vmov r3, r5, d1 -; CHECK-NEXT: subs r1, r1, r4 +; CHECK-NEXT: cmp r1, r4 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov.w r2, #0 ; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: cmp r3, r4 ; CHECK-NEXT: bfi r2, r1, #0, #8 -; CHECK-NEXT: subs r1, r3, r4 ; CHECK-NEXT: sbcs r1, r5, #0 ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #8, #8 @@ -678,7 +678,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: vmov r12, lr, d9 -; CHECK-NEXT: subs.w r5, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: mvn r4, #-2147483648 ; CHECK-NEXT: sbcs.w r5, r1, r4 ; CHECK-NEXT: sbcs r5, r2, #0 @@ -702,7 +702,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs.w r6, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: sbcs.w r6, r1, r4 ; CHECK-NEXT: sbcs r6, r2, #0 ; CHECK-NEXT: sbcs r6, r3, #0 @@ -746,7 +746,7 @@ define arm_aapcs_vfpcc <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: vmov r12, lr, d8 -; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo ; CHECK-NEXT: cmp r2, #0 @@ -755,7 +755,7 @@ define arm_aapcs_vfpcc <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo ; CHECK-NEXT: cmp r2, #0 @@ -784,7 +784,7 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: vmov r12, lr, d8 -; CHECK-NEXT: subs r4, r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r4, r3, #0 ; CHECK-NEXT: mov.w r8, #1 ; CHECK-NEXT: cset r4, lt @@ -805,7 +805,7 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r6, r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r6, r3, #0 ; CHECK-NEXT: cset r6, lt ; CHECK-NEXT: cmp r6, #0 @@ -842,7 +842,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: vmov r0, r9, d0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs.w r7, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: mvn r5, #-2147483648 ; CHECK-NEXT: sbcs.w r7, r1, r5 ; CHECK-NEXT: mov.w r6, #-1 @@ -865,7 +865,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NEXT: csel r10, r0, r1, ne ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs.w r4, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: sbcs.w r4, r1, r5 ; CHECK-NEXT: sbcs r4, r2, #0 ; CHECK-NEXT: sbcs r4, r3, #0 @@ -903,7 +903,7 @@ define arm_aapcs_vfpcc <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: vmov r4, r0, d0 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo ; CHECK-NEXT: cmp r2, #0 @@ -911,7 +911,7 @@ define arm_aapcs_vfpcc <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: csel r5, r1, r2, ne ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo ; CHECK-NEXT: cmp r2, #0 @@ -935,7 +935,7 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: vmov r5, r0, d0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r4, r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: mov.w r8, #1 ; CHECK-NEXT: sbcs r4, r3, #0 ; CHECK-NEXT: mov.w r6, #0 @@ -955,7 +955,7 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: csel r4, r1, r2, ne ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r5, r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r5, r3, #0 ; CHECK-NEXT: cset r5, lt ; CHECK-NEXT: cmp r5, #0 @@ -1102,14 +1102,14 @@ define arm_aapcs_vfpcc <2 x i32> @stest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: adr r3, .LCPI27_0 ; CHECK-NEXT: mvn r12, #-2147483648 +; CHECK-NEXT: adr r3, .LCPI27_0 +; CHECK-NEXT: cmp r4, r12 ; CHECK-NEXT: vldrw.u32 q0, [r3] -; CHECK-NEXT: subs.w r3, r4, r12 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 ; CHECK-NEXT: csetm r3, lt -; CHECK-NEXT: subs.w r0, r0, r12 +; CHECK-NEXT: cmp r0, r12 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 ; CHECK-NEXT: mov.w r5, #0 @@ -1170,12 +1170,12 @@ define arm_aapcs_vfpcc <2 x i32> @utest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: subs.w r3, r4, #-1 +; CHECK-NEXT: cmp.w r4, #-1 ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: mov.w r2, #0 ; CHECK-NEXT: csetm r3, lo -; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r2, r3, #0, #8 ; CHECK-NEXT: csetm r0, lo @@ -1207,12 +1207,12 @@ define arm_aapcs_vfpcc <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: subs.w r3, r4, #-1 +; CHECK-NEXT: cmp.w r4, #-1 ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 ; CHECK-NEXT: csetm r3, lt -; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: mov.w r5, #0 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r5, r3, #0, #8 @@ -1429,26 +1429,26 @@ define arm_aapcs_vfpcc <2 x i16> @stest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: movt lr, #65535 ; CHECK-NEXT: mov.w r12, #-1 ; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: subs r1, r1, r4 +; CHECK-NEXT: cmp r1, r4 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov.w r2, #0 ; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: cmp r3, r4 ; CHECK-NEXT: bfi r2, r1, #0, #8 -; CHECK-NEXT: subs r1, r3, r4 ; CHECK-NEXT: sbcs r1, r5, #0 -; CHECK-NEXT: adr r4, .LCPI36_1 ; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: adr r4, .LCPI36_1 ; CHECK-NEXT: bfi r2, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vldrw.u32 q1, [r4] ; CHECK-NEXT: vmov r1, r2, d0 ; CHECK-NEXT: vmov r3, r5, d1 -; CHECK-NEXT: subs.w r1, lr, r1 +; CHECK-NEXT: cmp lr, r1 ; CHECK-NEXT: sbcs.w r1, r12, r2 ; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: cmp lr, r3 ; CHECK-NEXT: bfi r0, r1, #0, #8 -; CHECK-NEXT: subs.w r1, lr, r3 ; CHECK-NEXT: sbcs.w r1, r12, r5 ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 @@ -1497,11 +1497,11 @@ define arm_aapcs_vfpcc <2 x i16> @utest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov.i64 q1, #0xffff ; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: subs r0, r0, r4 +; CHECK-NEXT: cmp r0, r4 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: cmp r2, r4 ; CHECK-NEXT: bfi r5, r0, #0, #8 -; CHECK-NEXT: subs r0, r2, r4 ; CHECK-NEXT: sbcs r0, r3, #0 ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r5, r0, #8, #8 @@ -1537,12 +1537,12 @@ define arm_aapcs_vfpcc <2 x i16> @ustest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r1, r2, d0 ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: vmov r3, r5, d1 -; CHECK-NEXT: subs r1, r1, r4 +; CHECK-NEXT: cmp r1, r4 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov.w r2, #0 ; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: cmp r3, r4 ; CHECK-NEXT: bfi r2, r1, #0, #8 -; CHECK-NEXT: subs r1, r3, r4 ; CHECK-NEXT: sbcs r1, r5, #0 ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #8, #8 @@ -1733,7 +1733,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: vmov r12, lr, d8 -; CHECK-NEXT: subs.w r5, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: mvn r4, #-2147483648 ; CHECK-NEXT: sbcs.w r5, r1, r4 ; CHECK-NEXT: sbcs r5, r2, #0 @@ -1757,7 +1757,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs.w r7, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: sbcs.w r7, r1, r4 ; CHECK-NEXT: sbcs r7, r2, #0 ; CHECK-NEXT: sbcs r7, r3, #0 @@ -1799,7 +1799,7 @@ define arm_aapcs_vfpcc <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: vmov r12, lr, d8 -; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo ; CHECK-NEXT: cmp r2, #0 @@ -1808,7 +1808,7 @@ define arm_aapcs_vfpcc <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo ; CHECK-NEXT: cmp r2, #0 @@ -1836,7 +1836,7 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: vmov r12, lr, d8 -; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 @@ -1850,7 +1850,7 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 @@ -1880,7 +1880,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: vmov r8, r0, d0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs.w r7, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: mvn r5, #-2147483648 ; CHECK-NEXT: sbcs.w r7, r1, r5 ; CHECK-NEXT: mov.w r6, #-2147483648 @@ -1903,7 +1903,7 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: csel r9, r1, r6, ne ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs.w r4, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: sbcs.w r4, r1, r5 ; CHECK-NEXT: sbcs r4, r2, #0 ; CHECK-NEXT: sbcs r4, r3, #0 @@ -1939,7 +1939,7 @@ define arm_aapcs_vfpcc <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: vmov r4, r0, d0 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo ; CHECK-NEXT: cmp r2, #0 @@ -1947,7 +1947,7 @@ define arm_aapcs_vfpcc <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: csel r5, r1, r2, ne ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo ; CHECK-NEXT: cmp r2, #0 @@ -1970,7 +1970,7 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: vmov r4, r0, d0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 @@ -1983,7 +1983,7 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: movmi r5, #0 ; CHECK-NEXT: movmi r6, #0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 diff --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll index d536e6b72ac9c..7673d25e3987d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-minmax.ll +++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll @@ -44,12 +44,12 @@ define arm_aapcs_vfpcc <2 x i64> @smin_v2i64(<2 x i64> %s1, <2 x i64> %s2) { ; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: vmov r12, lr, d3 ; CHECK-NEXT: vmov r4, r5, d1 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: cmp r4, r12 ; CHECK-NEXT: bfi r1, r0, #0, #8 -; CHECK-NEXT: subs.w r0, r4, r12 ; CHECK-NEXT: sbcs.w r0, r5, lr ; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 @@ -104,12 +104,12 @@ define arm_aapcs_vfpcc <2 x i64> @umin_v2i64(<2 x i64> %s1, <2 x i64> %s2) { ; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: vmov r12, lr, d3 ; CHECK-NEXT: vmov r4, r5, d1 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: cmp r4, r12 ; CHECK-NEXT: bfi r1, r0, #0, #8 -; CHECK-NEXT: subs.w r0, r4, r12 ; CHECK-NEXT: sbcs.w r0, r5, lr ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 @@ -165,12 +165,12 @@ define arm_aapcs_vfpcc <2 x i64> @smax_v2i64(<2 x i64> %s1, <2 x i64> %s2) { ; CHECK-NEXT: vmov r2, r3, d2 ; CHECK-NEXT: vmov r12, lr, d1 ; CHECK-NEXT: vmov r4, r5, d3 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: cmp r4, r12 ; CHECK-NEXT: bfi r1, r0, #0, #8 -; CHECK-NEXT: subs.w r0, r4, r12 ; CHECK-NEXT: sbcs.w r0, r5, lr ; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 @@ -225,12 +225,12 @@ define arm_aapcs_vfpcc <2 x i64> @umax_v2i64(<2 x i64> %s1, <2 x i64> %s2) { ; CHECK-NEXT: vmov r2, r3, d2 ; CHECK-NEXT: vmov r12, lr, d1 ; CHECK-NEXT: vmov r4, r5, d3 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: cmp r4, r12 ; CHECK-NEXT: bfi r1, r0, #0, #8 -; CHECK-NEXT: subs.w r0, r4, r12 ; CHECK-NEXT: sbcs.w r0, r5, lr ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 diff --git a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll index d9ef1030ee922..51c865226a3ea 100644 --- a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll +++ b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll @@ -46,7 +46,7 @@ declare i64 @llvm.smax.i64(i64 %a, i64 %b) readnone define arm_aapcs_vfpcc i64 @smaxi64(i64 %a, i64 %b) { ; CHECK-LABEL: smaxi64: ; CHECK: @ %bb.0: -; CHECK-NEXT: subs.w r12, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r12, r3, r1 ; CHECK-NEXT: csel r0, r0, r2, lt ; CHECK-NEXT: csel r1, r1, r3, lt @@ -146,13 +146,13 @@ define arm_aapcs_vfpcc <2 x i32> @smax2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 +; CHECK-NEXT: cmp r1, r3 ; CHECK-NEXT: asr.w lr, r1, #31 -; CHECK-NEXT: subs r1, r1, r3 ; CHECK-NEXT: sbcs.w r1, lr, r3, asr #31 ; CHECK-NEXT: asr.w r5, r3, #31 -; CHECK-NEXT: asr.w r12, r0, #31 ; CHECK-NEXT: csetm r1, lt -; CHECK-NEXT: subs r0, r0, r2 +; CHECK-NEXT: cmp r0, r2 +; CHECK-NEXT: asr.w r12, r0, #31 ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r0, r12, r2, asr #31 ; CHECK-NEXT: bfi r3, r1, #0, #8 @@ -201,7 +201,7 @@ define arm_aapcs_vfpcc <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: subs.w r12, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r12, r3, r1 ; CHECK-NEXT: csel r0, r0, r2, lt ; CHECK-NEXT: csel r1, r1, r3, lt @@ -218,14 +218,14 @@ define arm_aapcs_vfpcc <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov r2, r3, d2 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: vmov r3, r2, d3 ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d1 -; CHECK-NEXT: subs r0, r3, r0 +; CHECK-NEXT: cmp r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 ; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 @@ -245,7 +245,7 @@ define arm_aapcs_vfpcc void @smax4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov r1, r12, d2 ; CHECK-NEXT: vmov r3, r2, d6 -; CHECK-NEXT: subs r1, r3, r1 +; CHECK-NEXT: cmp r3, r1 ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r12 ; CHECK-NEXT: vmov lr, r12, d3 @@ -253,7 +253,7 @@ define arm_aapcs_vfpcc void @smax4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: vmov r2, r4, d7 -; CHECK-NEXT: subs.w r2, r2, lr +; CHECK-NEXT: cmp r2, lr ; CHECK-NEXT: sbcs.w r2, r4, r12 ; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r3, r2, #8, #8 @@ -262,13 +262,13 @@ define arm_aapcs_vfpcc void @smax4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { ; CHECK-NEXT: vmov r4, r3, d4 ; CHECK-NEXT: vpsel q1, q1, q3 ; CHECK-NEXT: vstrw.32 q1, [r0, #16] -; CHECK-NEXT: subs r2, r4, r2 +; CHECK-NEXT: cmp r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: vmov r4, r3, d5 ; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r1, r2, #0, #8 ; CHECK-NEXT: vmov r2, r12, d1 -; CHECK-NEXT: subs r2, r4, r2 +; CHECK-NEXT: cmp r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r1, r2, #8, #8 @@ -326,7 +326,7 @@ declare i64 @llvm.umax.i64(i64 %a, i64 %b) readnone define arm_aapcs_vfpcc i64 @umaxi64(i64 %a, i64 %b) { ; CHECK-LABEL: umaxi64: ; CHECK: @ %bb.0: -; CHECK-NEXT: subs.w r12, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r12, r3, r1 ; CHECK-NEXT: csel r0, r0, r2, lo ; CHECK-NEXT: csel r1, r1, r3, lo @@ -423,14 +423,14 @@ define arm_aapcs_vfpcc <2 x i32> @umax2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov r2, r3, d2 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: vmov r3, r2, d3 ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d1 -; CHECK-NEXT: subs r0, r3, r0 +; CHECK-NEXT: cmp r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 @@ -474,7 +474,7 @@ define arm_aapcs_vfpcc <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: subs.w r12, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r12, r3, r1 ; CHECK-NEXT: csel r0, r0, r2, lo ; CHECK-NEXT: csel r1, r1, r3, lo @@ -491,14 +491,14 @@ define arm_aapcs_vfpcc <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov r2, r3, d2 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: vmov r3, r2, d3 ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d1 -; CHECK-NEXT: subs r0, r3, r0 +; CHECK-NEXT: cmp r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 @@ -518,7 +518,7 @@ define arm_aapcs_vfpcc void @umax4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov r1, r12, d2 ; CHECK-NEXT: vmov r3, r2, d6 -; CHECK-NEXT: subs r1, r3, r1 +; CHECK-NEXT: cmp r3, r1 ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r12 ; CHECK-NEXT: vmov lr, r12, d3 @@ -526,7 +526,7 @@ define arm_aapcs_vfpcc void @umax4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: vmov r2, r4, d7 -; CHECK-NEXT: subs.w r2, r2, lr +; CHECK-NEXT: cmp r2, lr ; CHECK-NEXT: sbcs.w r2, r4, r12 ; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r3, r2, #8, #8 @@ -535,13 +535,13 @@ define arm_aapcs_vfpcc void @umax4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { ; CHECK-NEXT: vmov r4, r3, d4 ; CHECK-NEXT: vpsel q1, q1, q3 ; CHECK-NEXT: vstrw.32 q1, [r0, #16] -; CHECK-NEXT: subs r2, r4, r2 +; CHECK-NEXT: cmp r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: vmov r4, r3, d5 ; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r1, r2, #0, #8 ; CHECK-NEXT: vmov r2, r12, d1 -; CHECK-NEXT: subs r2, r4, r2 +; CHECK-NEXT: cmp r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r1, r2, #8, #8 @@ -599,7 +599,7 @@ declare i64 @llvm.smin.i64(i64 %a, i64 %b) readnone define arm_aapcs_vfpcc i64 @smini64(i64 %a, i64 %b) { ; CHECK-LABEL: smini64: ; CHECK: @ %bb.0: -; CHECK-NEXT: subs.w r12, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: sbcs.w r12, r1, r3 ; CHECK-NEXT: csel r0, r0, r2, lt ; CHECK-NEXT: csel r1, r1, r3, lt @@ -702,17 +702,17 @@ define arm_aapcs_vfpcc <2 x i32> @smin2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: vmov r3, s0 ; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 +; CHECK-NEXT: cmp r3, r1 ; CHECK-NEXT: asr.w lr, r3, #31 -; CHECK-NEXT: subs r3, r3, r1 ; CHECK-NEXT: sbcs.w r1, lr, r1, asr #31 -; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: asr.w r12, r2, #31 -; CHECK-NEXT: bfi r3, r1, #0, #8 -; CHECK-NEXT: subs r1, r2, r0 ; CHECK-NEXT: sbcs.w r0, r12, r0, asr #31 -; CHECK-NEXT: vmov q0[3], q0[1], lr, r12 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: bfi r3, r1, #0, #8 +; CHECK-NEXT: vmov q0[3], q0[1], lr, r12 ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -754,7 +754,7 @@ define arm_aapcs_vfpcc <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: subs.w r12, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: sbcs.w r12, r1, r3 ; CHECK-NEXT: csel r0, r0, r2, lt ; CHECK-NEXT: csel r1, r1, r3, lt @@ -771,14 +771,14 @@ define arm_aapcs_vfpcc <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov r0, r1, d2 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: vmov r3, r2, d1 ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d3 -; CHECK-NEXT: subs r0, r3, r0 +; CHECK-NEXT: cmp r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 ; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 @@ -798,7 +798,7 @@ define arm_aapcs_vfpcc void @smin4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov r1, r12, d6 ; CHECK-NEXT: vmov r3, r2, d2 -; CHECK-NEXT: subs r1, r3, r1 +; CHECK-NEXT: cmp r3, r1 ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r12 ; CHECK-NEXT: vmov lr, r12, d7 @@ -806,7 +806,7 @@ define arm_aapcs_vfpcc void @smin4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: vmov r2, r4, d3 -; CHECK-NEXT: subs.w r2, r2, lr +; CHECK-NEXT: cmp r2, lr ; CHECK-NEXT: sbcs.w r2, r4, r12 ; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r3, r2, #8, #8 @@ -815,13 +815,13 @@ define arm_aapcs_vfpcc void @smin4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { ; CHECK-NEXT: vmov r4, r3, d0 ; CHECK-NEXT: vpsel q1, q1, q3 ; CHECK-NEXT: vstrw.32 q1, [r0, #16] -; CHECK-NEXT: subs r2, r4, r2 +; CHECK-NEXT: cmp r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: vmov r4, r3, d1 ; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r1, r2, #0, #8 ; CHECK-NEXT: vmov r2, r12, d5 -; CHECK-NEXT: subs r2, r4, r2 +; CHECK-NEXT: cmp r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r1, r2, #8, #8 @@ -879,7 +879,7 @@ declare i64 @llvm.umin.i64(i64 %a, i64 %b) readnone define arm_aapcs_vfpcc i64 @umini64(i64 %a, i64 %b) { ; CHECK-LABEL: umini64: ; CHECK: @ %bb.0: -; CHECK-NEXT: subs.w r12, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: sbcs.w r12, r1, r3 ; CHECK-NEXT: csel r0, r0, r2, lo ; CHECK-NEXT: csel r1, r1, r3, lo @@ -976,14 +976,14 @@ define arm_aapcs_vfpcc <2 x i32> @umin2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vmov r0, r1, d2 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: vmov r3, r2, d1 ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d3 -; CHECK-NEXT: subs r0, r3, r0 +; CHECK-NEXT: cmp r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 @@ -1027,7 +1027,7 @@ define arm_aapcs_vfpcc <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: subs.w r12, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: sbcs.w r12, r1, r3 ; CHECK-NEXT: csel r0, r0, r2, lo ; CHECK-NEXT: csel r1, r1, r3, lo @@ -1044,14 +1044,14 @@ define arm_aapcs_vfpcc <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: vmov r0, r1, d2 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: vmov r3, r2, d1 ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d3 -; CHECK-NEXT: subs r0, r3, r0 +; CHECK-NEXT: cmp r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 @@ -1071,7 +1071,7 @@ define arm_aapcs_vfpcc void @umin4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov r1, r12, d6 ; CHECK-NEXT: vmov r3, r2, d2 -; CHECK-NEXT: subs r1, r3, r1 +; CHECK-NEXT: cmp r3, r1 ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r12 ; CHECK-NEXT: vmov lr, r12, d7 @@ -1079,7 +1079,7 @@ define arm_aapcs_vfpcc void @umin4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: vmov r2, r4, d3 -; CHECK-NEXT: subs.w r2, r2, lr +; CHECK-NEXT: cmp r2, lr ; CHECK-NEXT: sbcs.w r2, r4, r12 ; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r3, r2, #8, #8 @@ -1088,13 +1088,13 @@ define arm_aapcs_vfpcc void @umin4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { ; CHECK-NEXT: vmov r4, r3, d0 ; CHECK-NEXT: vpsel q1, q1, q3 ; CHECK-NEXT: vstrw.32 q1, [r0, #16] -; CHECK-NEXT: subs r2, r4, r2 +; CHECK-NEXT: cmp r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: vmov r4, r3, d1 ; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r1, r2, #0, #8 ; CHECK-NEXT: vmov r2, r12, d5 -; CHECK-NEXT: subs r2, r4, r2 +; CHECK-NEXT: cmp r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r1, r2, #8, #8 diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index 29b56639bd769..e3ea396da4c84 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -57,13 +57,13 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: mvn r5, #-2147483648 ; CHECK-NEXT: vpsel q2, q2, q0 ; CHECK-NEXT: vmov r2, r3, d4 -; CHECK-NEXT: subs r2, r2, r5 +; CHECK-NEXT: cmp r2, r5 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: vmov r2, r4, d5 -; CHECK-NEXT: subs r2, r2, r5 +; CHECK-NEXT: cmp r2, r5 ; CHECK-NEXT: sbcs r2, r4, #0 ; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r3, r2, #8, #8 @@ -88,11 +88,11 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: ldr r4, [r8], #4 ; CHECK-NEXT: smull r2, r5, r4, r2 ; CHECK-NEXT: asrl r2, r5, #31 -; CHECK-NEXT: subs r4, r1, r2 +; CHECK-NEXT: cmp r1, r2 ; CHECK-NEXT: sbcs.w r4, r0, r5 ; CHECK-NEXT: csel r2, r2, r1, lt ; CHECK-NEXT: csel r4, r5, r0, lt -; CHECK-NEXT: subs r5, r2, r3 +; CHECK-NEXT: cmp r2, r3 ; CHECK-NEXT: sbcs r4, r4, #0 ; CHECK-NEXT: csel r2, r2, r3, lt ; CHECK-NEXT: str r2, [r10], #4 @@ -194,39 +194,38 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq.w .LBB1_8 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB1_3 ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: mov r9, r5 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: mov r11, r2 ; CHECK-NEXT: b .LBB1_6 ; CHECK-NEXT: .LBB1_3: @ %vector.ph -; CHECK-NEXT: bic r1, r3, #3 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: bic r3, r3, #3 +; CHECK-NEXT: subs r7, r3, #4 ; CHECK-NEXT: adr r4, .LCPI1_0 -; CHECK-NEXT: subs r7, r1, #4 ; CHECK-NEXT: movs r6, #1 ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: adr r4, .LCPI1_1 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-NEXT: add.w r11, r2, r1, lsl #2 -; CHECK-NEXT: add.w r9, r5, r1, lsl #2 -; CHECK-NEXT: add.w r12, r0, r1, lsl #2 +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: add.w r11, r2, r3, lsl #2 +; CHECK-NEXT: add.w r9, r1, r3, lsl #2 +; CHECK-NEXT: add.w r12, r0, r3, lsl #2 ; CHECK-NEXT: vldrw.u32 q1, [r4] ; CHECK-NEXT: .LBB1_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q3, [r5], #16 +; CHECK-NEXT: vldrw.u32 q3, [r1], #16 ; CHECK-NEXT: vldrw.u32 q2, [r0], #16 ; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: mov.w r2, #-1 ; CHECK-NEXT: vmov.f32 s16, s10 -; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: vmov.f32 s20, s14 ; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: vmov.f32 s20, s14 +; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: vmov.f32 s18, s11 ; CHECK-NEXT: vmov.f32 s22, s15 ; CHECK-NEXT: vmullb.s32 q6, q5, q4 @@ -251,13 +250,13 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: mvn r8, #-2147483648 ; CHECK-NEXT: vpsel q4, q4, q0 ; CHECK-NEXT: vmov r3, r4, d8 -; CHECK-NEXT: subs.w r3, r3, r8 +; CHECK-NEXT: cmp r3, r8 ; CHECK-NEXT: sbcs r3, r4, #0 ; CHECK-NEXT: mov.w r4, #0 ; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #0, #8 ; CHECK-NEXT: vmov r3, r5, d9 -; CHECK-NEXT: subs.w r3, r3, r8 +; CHECK-NEXT: cmp r3, r8 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: mov.w r5, #0 ; CHECK-NEXT: csetm r3, lt @@ -282,19 +281,19 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r5, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r5 -; CHECK-NEXT: ldrd r5, r2, [sp, #8] @ 8-byte Folded Reload +; CHECK-NEXT: ldrd r1, r2, [sp, #8] @ 8-byte Folded Reload ; CHECK-NEXT: vpsel q2, q2, q0 -; CHECK-NEXT: vmov r1, r3, d4 -; CHECK-NEXT: subs.w r1, r1, r8 -; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: csetm r1, lt -; CHECK-NEXT: bfi r3, r1, #0, #8 -; CHECK-NEXT: vmov r1, r4, d5 -; CHECK-NEXT: subs.w r1, r1, r8 -; CHECK-NEXT: sbcs r1, r4, #0 -; CHECK-NEXT: csetm r1, lt -; CHECK-NEXT: bfi r3, r1, #8, #8 +; CHECK-NEXT: vmov r4, r3, d4 +; CHECK-NEXT: cmp r4, r8 +; CHECK-NEXT: sbcs r3, r3, #0 +; CHECK-NEXT: csetm r4, lt +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: bfi r3, r4, #0, #8 +; CHECK-NEXT: vmov r5, r4, d5 +; CHECK-NEXT: cmp r5, r8 +; CHECK-NEXT: sbcs r4, r4, #0 +; CHECK-NEXT: csetm r4, lt +; CHECK-NEXT: bfi r3, r4, #8, #8 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q2, q2, q1 ; CHECK-NEXT: vmov.f32 s9, s10 @@ -303,11 +302,11 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: vstrb.8 q2, [r2], #16 ; CHECK-NEXT: le lr, .LBB1_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block -; CHECK-NEXT: ldrd r1, r3, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: cmp r1, r3 +; CHECK-NEXT: ldrd r7, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: cmp r7, r3 ; CHECK-NEXT: beq .LBB1_8 ; CHECK-NEXT: .LBB1_6: @ %for.body.preheader21 -; CHECK-NEXT: sub.w lr, r3, r1 +; CHECK-NEXT: sub.w lr, r3, r7 ; CHECK-NEXT: mov.w r0, #-1 ; CHECK-NEXT: mov.w r3, #-2147483648 ; CHECK-NEXT: mvn r2, #-2147483648 @@ -317,11 +316,11 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: ldr r4, [r9], #4 ; CHECK-NEXT: smull r4, r1, r4, r1 ; CHECK-NEXT: asrl r4, r1, #31 -; CHECK-NEXT: subs r5, r3, r4 +; CHECK-NEXT: cmp r3, r4 ; CHECK-NEXT: sbcs.w r5, r0, r1 ; CHECK-NEXT: csel r4, r4, r3, lt ; CHECK-NEXT: csel r1, r1, r0, lt -; CHECK-NEXT: subs r5, r4, r2 +; CHECK-NEXT: cmp r4, r2 ; CHECK-NEXT: sbcs r1, r1, #0 ; CHECK-NEXT: csel r1, r4, r2, lt ; CHECK-NEXT: str r1, [r11], #4 @@ -483,13 +482,13 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n ; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: vpsel q0, q0, q2 ; CHECK-NEXT: vmov r3, r4, d0 -; CHECK-NEXT: subs.w r3, r3, r8 +; CHECK-NEXT: cmp r3, r8 ; CHECK-NEXT: sbcs r3, r4, #0 ; CHECK-NEXT: mov.w r4, #0 ; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #0, #8 ; CHECK-NEXT: vmov r3, r5, d1 -; CHECK-NEXT: subs.w r3, r3, r8 +; CHECK-NEXT: cmp r3, r8 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #8, #8 @@ -517,13 +516,13 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n ; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: vpsel q0, q0, q2 ; CHECK-NEXT: vmov r3, r4, d0 -; CHECK-NEXT: subs.w r3, r3, r8 +; CHECK-NEXT: cmp r3, r8 ; CHECK-NEXT: sbcs r3, r4, #0 ; CHECK-NEXT: mov.w r4, #0 ; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #0, #8 ; CHECK-NEXT: vmov r3, r5, d1 -; CHECK-NEXT: subs.w r3, r3, r8 +; CHECK-NEXT: cmp r3, r8 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #8, #8 @@ -603,44 +602,40 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq .LBB3_8 ; CHECK-NEXT: @ %bb.1: @ %entry -; CHECK-NEXT: mov r8, r2 ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: bne .LBB3_3 ; CHECK-NEXT: @ %bb.2: -; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: mov r12, r0 ; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r7, r2 ; CHECK-NEXT: b .LBB3_6 ; CHECK-NEXT: .LBB3_3: @ %vector.ph -; CHECK-NEXT: bic r5, r3, #1 +; CHECK-NEXT: bic r8, r3, #1 ; CHECK-NEXT: movs r6, #1 -; CHECK-NEXT: subs r7, r5, #2 -; CHECK-NEXT: str r5, [sp] @ 4-byte Spill -; CHECK-NEXT: add.w r2, r8, r5, lsl #2 -; CHECK-NEXT: add.w r11, r1, r5, lsl #2 -; CHECK-NEXT: add.w lr, r6, r7, lsr #1 -; CHECK-NEXT: add.w r12, r0, r5, lsl #2 +; CHECK-NEXT: sub.w r7, r8, #2 ; CHECK-NEXT: vmov.i8 q0, #0xff +; CHECK-NEXT: add.w r11, r1, r8, lsl #2 +; CHECK-NEXT: add.w r12, r0, r8, lsl #2 +; CHECK-NEXT: add.w lr, r6, r7, lsr #1 +; CHECK-NEXT: add.w r7, r2, r8, lsl #2 ; CHECK-NEXT: .LBB3_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrd r4, r9, [r0], #8 +; CHECK-NEXT: movs r6, #0 ; CHECK-NEXT: ldrd r5, r10, [r1], #8 ; CHECK-NEXT: umull r4, r5, r5, r4 ; CHECK-NEXT: lsrl r4, r5, #31 -; CHECK-NEXT: subs.w r6, r4, #-1 +; CHECK-NEXT: cmp.w r4, #-1 ; CHECK-NEXT: sbcs r5, r5, #0 -; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: csetm r5, lo ; CHECK-NEXT: bfi r6, r5, #0, #8 ; CHECK-NEXT: umull r10, r5, r10, r9 ; CHECK-NEXT: lsrl r10, r5, #31 -; CHECK-NEXT: subs.w r7, r10, #-1 +; CHECK-NEXT: cmp.w r10, #-1 ; CHECK-NEXT: vmov q1[2], q1[0], r4, r10 ; CHECK-NEXT: sbcs r5, r5, #0 ; CHECK-NEXT: csetm r5, lo @@ -649,28 +644,26 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: vpsel q1, q1, q0 ; CHECK-NEXT: vmov r4, s6 ; CHECK-NEXT: vmov r5, s4 -; CHECK-NEXT: strd r5, r4, [r8], #8 +; CHECK-NEXT: strd r5, r4, [r2], #8 ; CHECK-NEXT: le lr, .LBB3_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block -; CHECK-NEXT: ldr r7, [sp] @ 4-byte Reload -; CHECK-NEXT: cmp r7, r3 +; CHECK-NEXT: cmp r8, r3 ; CHECK-NEXT: beq .LBB3_8 ; CHECK-NEXT: .LBB3_6: @ %for.body.preheader -; CHECK-NEXT: sub.w lr, r3, r7 +; CHECK-NEXT: sub.w lr, r3, r8 ; CHECK-NEXT: .LBB3_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r0, [r12], #4 ; CHECK-NEXT: ldr r1, [r11], #4 ; CHECK-NEXT: umull r0, r1, r1, r0 ; CHECK-NEXT: lsrl r0, r1, #31 -; CHECK-NEXT: subs.w r3, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: sbcs r1, r1, #0 ; CHECK-NEXT: it hs ; CHECK-NEXT: movhs.w r0, #-1 -; CHECK-NEXT: str r0, [r2], #4 +; CHECK-NEXT: str r0, [r7], #4 ; CHECK-NEXT: le lr, .LBB3_7 ; CHECK-NEXT: .LBB3_8: @ %for.cond.cleanup -; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: switch i32 %N, label %vector.ph [ @@ -744,10 +737,8 @@ for.body: ; preds = %for.body.preheader, define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr noalias nocapture %pDst, i32 %N) { ; CHECK-LABEL: usatmul_4_q31: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: cmp r3, #0 @@ -759,21 +750,22 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: mov r12, r0 ; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: mov r11, r2 +; CHECK-NEXT: mov r7, r2 ; CHECK-NEXT: b .LBB4_6 ; CHECK-NEXT: .LBB4_3: @ %vector.ph ; CHECK-NEXT: bic r8, r3, #3 ; CHECK-NEXT: movs r6, #1 ; CHECK-NEXT: sub.w r7, r8, #4 ; CHECK-NEXT: vmov.i64 q0, #0xffffffff -; CHECK-NEXT: add.w r11, r2, r8, lsl #2 ; CHECK-NEXT: add.w r9, r1, r8, lsl #2 -; CHECK-NEXT: add.w lr, r6, r7, lsr #2 ; CHECK-NEXT: add.w r12, r0, r8, lsl #2 +; CHECK-NEXT: add.w lr, r6, r7, lsr #2 +; CHECK-NEXT: add.w r7, r2, r8, lsl #2 ; CHECK-NEXT: .LBB4_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: vldrw.u32 q2, [r1], #16 +; CHECK-NEXT: movs r6, #0 ; CHECK-NEXT: vmov.f32 s12, s6 ; CHECK-NEXT: vmov.f32 s14, s7 ; CHECK-NEXT: vmov.f32 s16, s10 @@ -783,15 +775,14 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: vmov r10, r5, d10 ; CHECK-NEXT: lsrl r10, r5, #31 ; CHECK-NEXT: vmov.f32 s10, s9 -; CHECK-NEXT: subs.w r6, r10, #-1 +; CHECK-NEXT: cmp.w r10, #-1 ; CHECK-NEXT: sbcs r5, r5, #0 -; CHECK-NEXT: mov.w r6, #0 -; CHECK-NEXT: csetm r5, lo ; CHECK-NEXT: vmullb.u32 q4, q2, q1 +; CHECK-NEXT: csetm r5, lo ; CHECK-NEXT: bfi r6, r5, #0, #8 ; CHECK-NEXT: vmov r4, r5, d11 ; CHECK-NEXT: lsrl r4, r5, #31 -; CHECK-NEXT: subs.w r7, r4, #-1 +; CHECK-NEXT: cmp.w r4, #-1 ; CHECK-NEXT: vmov q3[2], q3[0], r10, r4 ; CHECK-NEXT: sbcs r5, r5, #0 ; CHECK-NEXT: csetm r5, lo @@ -799,15 +790,15 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: vmov r10, r5, d8 ; CHECK-NEXT: lsrl r10, r5, #31 ; CHECK-NEXT: vmsr p0, r6 -; CHECK-NEXT: subs.w r6, r10, #-1 -; CHECK-NEXT: vpsel q3, q3, q0 -; CHECK-NEXT: sbcs r5, r5, #0 +; CHECK-NEXT: cmp.w r10, #-1 ; CHECK-NEXT: mov.w r6, #0 +; CHECK-NEXT: sbcs r5, r5, #0 +; CHECK-NEXT: vpsel q3, q3, q0 ; CHECK-NEXT: csetm r5, lo ; CHECK-NEXT: bfi r6, r5, #0, #8 ; CHECK-NEXT: vmov r4, r5, d9 ; CHECK-NEXT: lsrl r4, r5, #31 -; CHECK-NEXT: subs.w r7, r4, #-1 +; CHECK-NEXT: cmp.w r4, #-1 ; CHECK-NEXT: vmov q1[2], q1[0], r10, r4 ; CHECK-NEXT: sbcs r5, r5, #0 ; CHECK-NEXT: csetm r5, lo @@ -830,16 +821,15 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: ldr r1, [r9], #4 ; CHECK-NEXT: umull r0, r1, r1, r0 ; CHECK-NEXT: lsrl r0, r1, #31 -; CHECK-NEXT: subs.w r2, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: sbcs r1, r1, #0 ; CHECK-NEXT: it hs ; CHECK-NEXT: movhs.w r0, #-1 -; CHECK-NEXT: str r0, [r11], #4 +; CHECK-NEXT: str r0, [r7], #4 ; CHECK-NEXT: le lr, .LBB4_7 ; CHECK-NEXT: .LBB4_8: @ %for.cond.cleanup ; CHECK-NEXT: vpop {d8, d9, d10, d11} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} entry: %cmp8 = icmp eq i32 %N, 0 br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader diff --git a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll index bbc0ff9bd1be5..e577f0db39656 100644 --- a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll +++ b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll @@ -41,7 +41,7 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) ; CHECK-NEXT: adds.w r12, r2, r0 ; CHECK-NEXT: vmov r0, r4, d1 ; CHECK-NEXT: adc.w lr, r3, r1 -; CHECK-NEXT: subs.w r2, r12, r2 +; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: sbcs.w r2, lr, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r1, #0 @@ -53,7 +53,7 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) ; CHECK-NEXT: vmov r1, r3, d3 ; CHECK-NEXT: adds r1, r1, r0 ; CHECK-NEXT: adc.w r5, r4, r3 -; CHECK-NEXT: subs r0, r1, r0 +; CHECK-NEXT: cmp r1, r0 ; CHECK-NEXT: sbcs.w r0, r5, r4 ; CHECK-NEXT: vmov q0[2], q0[0], r12, r1 ; CHECK-NEXT: cset r0, lt @@ -124,14 +124,14 @@ define arm_aapcs_vfpcc <2 x i64> @uadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: adds r5, r2, r0 ; CHECK-NEXT: adc.w lr, r3, r1 -; CHECK-NEXT: subs r2, r5, r2 +; CHECK-NEXT: cmp r5, r2 ; CHECK-NEXT: sbcs.w r2, lr, r3 ; CHECK-NEXT: vmov r3, r12, d2 ; CHECK-NEXT: vmov r1, r4, d0 ; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: adds r3, r3, r1 ; CHECK-NEXT: adc.w r0, r4, r12 -; CHECK-NEXT: subs r1, r3, r1 +; CHECK-NEXT: cmp r3, r1 ; CHECK-NEXT: sbcs.w r1, r0, r4 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r5 ; CHECK-NEXT: csetm r1, lo @@ -186,7 +186,7 @@ define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2) ; CHECK-NEXT: vmov r4, r5, d1 ; CHECK-NEXT: subs.w r12, r1, r2 ; CHECK-NEXT: sbc.w lr, r0, r3 -; CHECK-NEXT: subs.w r1, r12, r1 +; CHECK-NEXT: cmp r12, r1 ; CHECK-NEXT: sbcs.w r0, lr, r0 ; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: cset r0, lt @@ -198,7 +198,7 @@ define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2) ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: subs r6, r4, r2 ; CHECK-NEXT: sbc.w r7, r5, r3 -; CHECK-NEXT: subs r4, r6, r4 +; CHECK-NEXT: cmp r6, r4 ; CHECK-NEXT: sbcs.w r4, r7, r5 ; CHECK-NEXT: vmov q0[2], q0[0], r12, r6 ; CHECK-NEXT: cset r4, lt @@ -271,14 +271,14 @@ define arm_aapcs_vfpcc <2 x i64> @usub_int64_t(<2 x i64> %src1, <2 x i64> %src2) ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: subs r5, r2, r0 ; CHECK-NEXT: sbc.w lr, r3, r1 -; CHECK-NEXT: subs r2, r2, r5 +; CHECK-NEXT: cmp r2, r5 ; CHECK-NEXT: sbcs.w r2, r3, lr ; CHECK-NEXT: vmov r3, r12, d2 ; CHECK-NEXT: vmov r1, r4, d0 ; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: subs r3, r1, r3 ; CHECK-NEXT: sbc.w r0, r4, r12 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: cmp r1, r3 ; CHECK-NEXT: sbcs.w r1, r4, r0 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r5 ; CHECK-NEXT: csetm r1, lo diff --git a/llvm/test/CodeGen/Thumb2/mve-scmp.ll b/llvm/test/CodeGen/Thumb2/mve-scmp.ll index 23462384eca90..985eacdb661bd 100644 --- a/llvm/test/CodeGen/Thumb2/mve-scmp.ll +++ b/llvm/test/CodeGen/Thumb2/mve-scmp.ll @@ -95,43 +95,43 @@ entry: define arm_aapcs_vfpcc <2 x i32> @s_v2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: s_v2i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov r5, s4 ; CHECK-NEXT: adr.w r12, .LCPI5_0 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: vldrw.u32 q3, [r12] ; CHECK-NEXT: vmov r0, s6 -; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: mov.w lr, #0 ; CHECK-NEXT: vmov r4, s2 ; CHECK-NEXT: vmov.i32 q2, #0x0 ; CHECK-NEXT: vmov.i8 q1, #0xff -; CHECK-NEXT: subs r3, r2, r1 -; CHECK-NEXT: asr.w lr, r2, #31 -; CHECK-NEXT: sbcs.w r3, lr, r1, asr #31 +; CHECK-NEXT: asrs r3, r5, #31 +; CHECK-NEXT: cmp r5, r1 +; CHECK-NEXT: sbcs.w r3, r3, r1, asr #31 ; CHECK-NEXT: csetm r12, lt ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: subs r5, r0, r4 +; CHECK-NEXT: cmp r0, r4 ; CHECK-NEXT: bfi r3, r12, #0, #8 ; CHECK-NEXT: asr.w r12, r0, #31 -; CHECK-NEXT: sbcs.w r5, r12, r4, asr #31 -; CHECK-NEXT: csetm r5, lt -; CHECK-NEXT: bfi r3, r5, #8, #8 +; CHECK-NEXT: sbcs.w r2, r12, r4, asr #31 +; CHECK-NEXT: csetm r2, lt +; CHECK-NEXT: cmp r1, r5 +; CHECK-NEXT: bfi r3, r2, #8, #8 +; CHECK-NEXT: asr.w r2, r1, #31 +; CHECK-NEXT: sbcs.w r1, r2, r5, asr #31 ; CHECK-NEXT: vmsr p0, r3 -; CHECK-NEXT: asrs r3, r1, #31 -; CHECK-NEXT: subs r1, r1, r2 -; CHECK-NEXT: vpsel q0, q3, q2 -; CHECK-NEXT: sbcs.w r1, r3, r2, asr #31 ; CHECK-NEXT: csetm r1, lt -; CHECK-NEXT: subs r2, r4, r0 -; CHECK-NEXT: bfi r6, r1, #0, #8 +; CHECK-NEXT: cmp r4, r0 +; CHECK-NEXT: bfi lr, r1, #0, #8 ; CHECK-NEXT: asr.w r1, r4, #31 ; CHECK-NEXT: sbcs.w r0, r1, r0, asr #31 +; CHECK-NEXT: vpsel q0, q3, q2 ; CHECK-NEXT: csetm r0, lt -; CHECK-NEXT: bfi r6, r0, #8, #8 -; CHECK-NEXT: vmsr p0, r6 +; CHECK-NEXT: bfi lr, r0, #8, #8 +; CHECK-NEXT: vmsr p0, lr ; CHECK-NEXT: vpsel q0, q1, q0 -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI5_0: @@ -194,27 +194,27 @@ define arm_aapcs_vfpcc <2 x i64> @s_v2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: vmov r6, r7, d3 ; CHECK-NEXT: vmov.i32 q1, #0x0 -; CHECK-NEXT: subs.w r1, r3, lr +; CHECK-NEXT: cmp r3, lr ; CHECK-NEXT: sbcs.w r1, r8, r12 ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r4, r1, #0, #8 ; CHECK-NEXT: vmov r1, r5, d1 -; CHECK-NEXT: subs r2, r6, r1 +; CHECK-NEXT: cmp r6, r1 ; CHECK-NEXT: sbcs.w r2, r7, r5 ; CHECK-NEXT: csetm r2, lt +; CHECK-NEXT: cmp lr, r3 ; CHECK-NEXT: bfi r4, r2, #8, #8 ; CHECK-NEXT: adr r2, .LCPI8_0 ; CHECK-NEXT: vldrw.u32 q0, [r2] -; CHECK-NEXT: subs.w r2, lr, r3 ; CHECK-NEXT: sbcs.w r2, r12, r8 -; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: csetm r2, lt -; CHECK-NEXT: subs r1, r1, r6 +; CHECK-NEXT: cmp r1, r6 ; CHECK-NEXT: sbcs.w r1, r5, r7 ; CHECK-NEXT: bfi r0, r2, #0, #8 ; CHECK-NEXT: csetm r1, lt -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: bfi r0, r1, #8, #8 +; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vmov.i8 q1, #0xff ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q1, q0 @@ -247,23 +247,23 @@ define arm_aapcs_vfpcc <4 x i64> @s_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK-NEXT: vmov r6, r7, d5 ; CHECK-NEXT: mov.w r9, #0 ; CHECK-NEXT: vmov.i8 q2, #0xff -; CHECK-NEXT: subs.w r1, r3, lr +; CHECK-NEXT: cmp r3, lr ; CHECK-NEXT: sbcs.w r1, r8, r12 ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r4, r1, #0, #8 ; CHECK-NEXT: vmov r1, r5, d1 -; CHECK-NEXT: subs r2, r6, r1 +; CHECK-NEXT: cmp r6, r1 ; CHECK-NEXT: sbcs.w r2, r7, r5 ; CHECK-NEXT: csetm r2, lt +; CHECK-NEXT: cmp lr, r3 ; CHECK-NEXT: bfi r4, r2, #8, #8 ; CHECK-NEXT: adr r2, .LCPI9_0 ; CHECK-NEXT: vldrw.u32 q4, [r2] -; CHECK-NEXT: subs.w r2, lr, r3 ; CHECK-NEXT: sbcs.w r2, r12, r8 -; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: csetm r2, lt -; CHECK-NEXT: subs r1, r1, r6 +; CHECK-NEXT: cmp r1, r6 ; CHECK-NEXT: sbcs.w r1, r5, r7 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: vmsr p0, r4 @@ -271,29 +271,29 @@ define arm_aapcs_vfpcc <4 x i64> @s_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK-NEXT: vpsel q0, q4, q5 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vmov lr, r12, d2 -; CHECK-NEXT: vmov r3, r7, d6 ; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: vmov r2, r1, d7 +; CHECK-NEXT: vmov r3, r7, d6 ; CHECK-NEXT: vpsel q0, q2, q0 -; CHECK-NEXT: subs.w r6, r3, lr +; CHECK-NEXT: vmov r2, r1, d7 +; CHECK-NEXT: cmp r3, lr ; CHECK-NEXT: sbcs.w r6, r7, r12 ; CHECK-NEXT: csetm r6, lt ; CHECK-NEXT: bfi r5, r6, #0, #8 ; CHECK-NEXT: vmov r6, r4, d3 -; CHECK-NEXT: subs r0, r2, r6 +; CHECK-NEXT: cmp r2, r6 ; CHECK-NEXT: sbcs.w r0, r1, r4 ; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: cmp lr, r3 ; CHECK-NEXT: bfi r5, r0, #8, #8 -; CHECK-NEXT: subs.w r0, lr, r3 ; CHECK-NEXT: sbcs.w r0, r12, r7 -; CHECK-NEXT: vmsr p0, r5 ; CHECK-NEXT: csetm r0, lt -; CHECK-NEXT: vpsel q1, q4, q5 +; CHECK-NEXT: cmp r6, r2 ; CHECK-NEXT: bfi r9, r0, #0, #8 -; CHECK-NEXT: subs r0, r6, r2 ; CHECK-NEXT: sbcs.w r0, r4, r1 ; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: vmsr p0, r5 ; CHECK-NEXT: bfi r9, r0, #8, #8 +; CHECK-NEXT: vpsel q1, q4, q5 ; CHECK-NEXT: vmsr p0, r9 ; CHECK-NEXT: vpsel q1, q2, q1 ; CHECK-NEXT: vpop {d8, d9, d10, d11} diff --git a/llvm/test/CodeGen/Thumb2/mve-ucmp.ll b/llvm/test/CodeGen/Thumb2/mve-ucmp.ll index 92dc9a01d2116..23b6cf43b838c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-ucmp.ll +++ b/llvm/test/CodeGen/Thumb2/mve-ucmp.ll @@ -106,27 +106,27 @@ define arm_aapcs_vfpcc <2 x i32> @u_v2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: vmov r3, r8, d2 ; CHECK-NEXT: vmov r6, r7, d3 ; CHECK-NEXT: vmov.i32 q1, #0x0 -; CHECK-NEXT: subs.w r1, r3, lr +; CHECK-NEXT: cmp r3, lr ; CHECK-NEXT: sbcs.w r1, r8, r12 ; CHECK-NEXT: csetm r1, lo ; CHECK-NEXT: bfi r4, r1, #0, #8 ; CHECK-NEXT: vmov r1, r5, d1 -; CHECK-NEXT: subs r2, r6, r1 +; CHECK-NEXT: cmp r6, r1 ; CHECK-NEXT: sbcs.w r2, r7, r5 ; CHECK-NEXT: csetm r2, lo +; CHECK-NEXT: cmp lr, r3 ; CHECK-NEXT: bfi r4, r2, #8, #8 ; CHECK-NEXT: adr r2, .LCPI5_0 ; CHECK-NEXT: vldrw.u32 q0, [r2] -; CHECK-NEXT: subs.w r2, lr, r3 ; CHECK-NEXT: sbcs.w r2, r12, r8 -; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: csetm r2, lo -; CHECK-NEXT: subs r1, r1, r6 +; CHECK-NEXT: cmp r1, r6 ; CHECK-NEXT: sbcs.w r1, r5, r7 ; CHECK-NEXT: bfi r0, r2, #0, #8 ; CHECK-NEXT: csetm r1, lo -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: bfi r0, r1, #8, #8 +; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vmov.i8 q1, #0xff ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q1, q0 @@ -193,27 +193,27 @@ define arm_aapcs_vfpcc <2 x i64> @u_v2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: vmov r6, r7, d3 ; CHECK-NEXT: vmov.i32 q1, #0x0 -; CHECK-NEXT: subs.w r1, r3, lr +; CHECK-NEXT: cmp r3, lr ; CHECK-NEXT: sbcs.w r1, r8, r12 ; CHECK-NEXT: csetm r1, lo ; CHECK-NEXT: bfi r4, r1, #0, #8 ; CHECK-NEXT: vmov r1, r5, d1 -; CHECK-NEXT: subs r2, r6, r1 +; CHECK-NEXT: cmp r6, r1 ; CHECK-NEXT: sbcs.w r2, r7, r5 ; CHECK-NEXT: csetm r2, lo +; CHECK-NEXT: cmp lr, r3 ; CHECK-NEXT: bfi r4, r2, #8, #8 ; CHECK-NEXT: adr r2, .LCPI8_0 ; CHECK-NEXT: vldrw.u32 q0, [r2] -; CHECK-NEXT: subs.w r2, lr, r3 ; CHECK-NEXT: sbcs.w r2, r12, r8 -; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: csetm r2, lo -; CHECK-NEXT: subs r1, r1, r6 +; CHECK-NEXT: cmp r1, r6 ; CHECK-NEXT: sbcs.w r1, r5, r7 ; CHECK-NEXT: bfi r0, r2, #0, #8 ; CHECK-NEXT: csetm r1, lo -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: bfi r0, r1, #8, #8 +; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vmov.i8 q1, #0xff ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q1, q0 @@ -246,23 +246,23 @@ define arm_aapcs_vfpcc <4 x i64> @u_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK-NEXT: vmov r6, r7, d5 ; CHECK-NEXT: mov.w r9, #0 ; CHECK-NEXT: vmov.i8 q2, #0xff -; CHECK-NEXT: subs.w r1, r3, lr +; CHECK-NEXT: cmp r3, lr ; CHECK-NEXT: sbcs.w r1, r8, r12 ; CHECK-NEXT: csetm r1, lo ; CHECK-NEXT: bfi r4, r1, #0, #8 ; CHECK-NEXT: vmov r1, r5, d1 -; CHECK-NEXT: subs r2, r6, r1 +; CHECK-NEXT: cmp r6, r1 ; CHECK-NEXT: sbcs.w r2, r7, r5 ; CHECK-NEXT: csetm r2, lo +; CHECK-NEXT: cmp lr, r3 ; CHECK-NEXT: bfi r4, r2, #8, #8 ; CHECK-NEXT: adr r2, .LCPI9_0 ; CHECK-NEXT: vldrw.u32 q4, [r2] -; CHECK-NEXT: subs.w r2, lr, r3 ; CHECK-NEXT: sbcs.w r2, r12, r8 -; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: csetm r2, lo -; CHECK-NEXT: subs r1, r1, r6 +; CHECK-NEXT: cmp r1, r6 ; CHECK-NEXT: sbcs.w r1, r5, r7 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: csetm r1, lo ; CHECK-NEXT: vmsr p0, r4 @@ -270,29 +270,29 @@ define arm_aapcs_vfpcc <4 x i64> @u_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK-NEXT: vpsel q0, q4, q5 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vmov lr, r12, d2 -; CHECK-NEXT: vmov r3, r7, d6 ; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: vmov r2, r1, d7 +; CHECK-NEXT: vmov r3, r7, d6 ; CHECK-NEXT: vpsel q0, q2, q0 -; CHECK-NEXT: subs.w r6, r3, lr +; CHECK-NEXT: vmov r2, r1, d7 +; CHECK-NEXT: cmp r3, lr ; CHECK-NEXT: sbcs.w r6, r7, r12 ; CHECK-NEXT: csetm r6, lo ; CHECK-NEXT: bfi r5, r6, #0, #8 ; CHECK-NEXT: vmov r6, r4, d3 -; CHECK-NEXT: subs r0, r2, r6 +; CHECK-NEXT: cmp r2, r6 ; CHECK-NEXT: sbcs.w r0, r1, r4 ; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: cmp lr, r3 ; CHECK-NEXT: bfi r5, r0, #8, #8 -; CHECK-NEXT: subs.w r0, lr, r3 ; CHECK-NEXT: sbcs.w r0, r12, r7 -; CHECK-NEXT: vmsr p0, r5 ; CHECK-NEXT: csetm r0, lo -; CHECK-NEXT: vpsel q1, q4, q5 +; CHECK-NEXT: cmp r6, r2 ; CHECK-NEXT: bfi r9, r0, #0, #8 -; CHECK-NEXT: subs r0, r6, r2 ; CHECK-NEXT: sbcs.w r0, r4, r1 ; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: vmsr p0, r5 ; CHECK-NEXT: bfi r9, r0, #8, #8 +; CHECK-NEXT: vpsel q1, q4, q5 ; CHECK-NEXT: vmsr p0, r9 ; CHECK-NEXT: vpsel q1, q2, q1 ; CHECK-NEXT: vpop {d8, d9, d10, d11} diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmp.ll b/llvm/test/CodeGen/Thumb2/mve-vcmp.ll index 020d06a3937a3..3f7e1ea264d31 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmp.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmp.ll @@ -396,14 +396,14 @@ define arm_aapcs_vfpcc <2 x i64> @vcmp_slt_v2i64(<2 x i64> %src, <2 x i64> %srcb ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov r0, r1, d2 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: vmov r3, r2, d1 ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d3 -; CHECK-NEXT: subs r0, r3, r0 +; CHECK-NEXT: cmp r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 ; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 @@ -467,7 +467,7 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, < ; CHECK-NEXT: csel r12, zr, r2, eq ; CHECK-NEXT: vmov r2, s8 ; CHECK-NEXT: asrs r3, r1, #31 -; CHECK-NEXT: subs r1, r1, r2 +; CHECK-NEXT: cmp r1, r2 ; CHECK-NEXT: sbcs.w r1, r3, r2, asr #31 ; CHECK-NEXT: vmov r2, s6 ; CHECK-NEXT: csel r1, zr, r12, ge @@ -480,7 +480,7 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, < ; CHECK-NEXT: csel r12, zr, r2, eq ; CHECK-NEXT: vmov r2, s10 ; CHECK-NEXT: asrs r3, r1, #31 -; CHECK-NEXT: subs r1, r1, r2 +; CHECK-NEXT: cmp r1, r2 ; CHECK-NEXT: sbcs.w r1, r3, r2, asr #31 ; CHECK-NEXT: csel r1, zr, r12, ge ; CHECK-NEXT: rsbs r1, r1, #0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll index 8312e91b51b69..b70ca98f1bedf 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll @@ -508,7 +508,7 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, < ; CHECK-NEXT: csel r12, zr, r2, eq ; CHECK-NEXT: vmov r2, s8 ; CHECK-NEXT: asrs r3, r1, #31 -; CHECK-NEXT: subs r1, r1, r2 +; CHECK-NEXT: cmp r1, r2 ; CHECK-NEXT: sbcs.w r1, r3, r2, asr #31 ; CHECK-NEXT: vmov r2, s6 ; CHECK-NEXT: csel r1, zr, r12, ge @@ -521,7 +521,7 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, < ; CHECK-NEXT: csel r12, zr, r2, eq ; CHECK-NEXT: vmov r2, s10 ; CHECK-NEXT: asrs r3, r1, #31 -; CHECK-NEXT: subs r1, r1, r2 +; CHECK-NEXT: cmp r1, r2 ; CHECK-NEXT: sbcs.w r1, r3, r2, asr #31 ; CHECK-NEXT: csel r1, zr, r12, ge ; CHECK-NEXT: rsbs r1, r1, #0 @@ -1049,7 +1049,7 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_r_multi_v2i32(<2 x i64> %a, <2 x i32> %b, ; CHECK-NEXT: csel r12, zr, r2, eq ; CHECK-NEXT: vmov r2, s8 ; CHECK-NEXT: asrs r3, r1, #31 -; CHECK-NEXT: subs r1, r1, r2 +; CHECK-NEXT: cmp r1, r2 ; CHECK-NEXT: sbcs.w r1, r3, r2, asr #31 ; CHECK-NEXT: vmov r2, s6 ; CHECK-NEXT: csel r1, zr, r12, ge @@ -1062,7 +1062,7 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_r_multi_v2i32(<2 x i64> %a, <2 x i32> %b, ; CHECK-NEXT: csel r12, zr, r2, eq ; CHECK-NEXT: vmov r2, s10 ; CHECK-NEXT: asrs r3, r1, #31 -; CHECK-NEXT: subs r1, r1, r2 +; CHECK-NEXT: cmp r1, r2 ; CHECK-NEXT: sbcs.w r1, r3, r2, asr #31 ; CHECK-NEXT: csel r1, zr, r12, ge ; CHECK-NEXT: rsbs r1, r1, #0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll index 35e578e425e74..4b671693dc25d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll @@ -497,11 +497,11 @@ define arm_aapcs_vfpcc i64 @uminv2i64(<2 x i64> %vec, i64 %min) { ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov r12, lr, d1 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: subs.w r4, r2, r12 +; CHECK-NEXT: cmp r2, r12 ; CHECK-NEXT: sbcs.w r4, r3, lr ; CHECK-NEXT: csel r2, r2, r12, lo ; CHECK-NEXT: csel r3, r3, lr, lo -; CHECK-NEXT: subs r4, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r4, r3, r1 ; CHECK-NEXT: csel r0, r2, r0, lo ; CHECK-NEXT: csel r1, r3, r1, lo @@ -519,11 +519,11 @@ define arm_aapcs_vfpcc i64 @sminv2i64(<2 x i64> %vec, i64 %min) { ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov r12, lr, d1 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: subs.w r4, r2, r12 +; CHECK-NEXT: cmp r2, r12 ; CHECK-NEXT: sbcs.w r4, r3, lr ; CHECK-NEXT: csel r2, r2, r12, lt ; CHECK-NEXT: csel r3, r3, lr, lt -; CHECK-NEXT: subs r4, r2, r0 +; CHECK-NEXT: cmp r2, r0 ; CHECK-NEXT: sbcs.w r4, r3, r1 ; CHECK-NEXT: csel r0, r2, r0, lt ; CHECK-NEXT: csel r1, r3, r1, lt @@ -541,11 +541,11 @@ define arm_aapcs_vfpcc i64 @umaxv2i64(<2 x i64> %vec, i64 %max) { ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov r12, lr, d0 ; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: subs.w r4, r2, r12 +; CHECK-NEXT: cmp r2, r12 ; CHECK-NEXT: sbcs.w r4, r3, lr ; CHECK-NEXT: csel r2, r12, r2, lo ; CHECK-NEXT: csel r3, lr, r3, lo -; CHECK-NEXT: subs r4, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: sbcs.w r4, r1, r3 ; CHECK-NEXT: csel r0, r2, r0, lo ; CHECK-NEXT: csel r1, r3, r1, lo @@ -563,11 +563,11 @@ define arm_aapcs_vfpcc i64 @smaxv2i64(<2 x i64> %vec, i64 %max) { ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov r12, lr, d0 ; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: subs.w r4, r2, r12 +; CHECK-NEXT: cmp r2, r12 ; CHECK-NEXT: sbcs.w r4, r3, lr ; CHECK-NEXT: csel r2, r12, r2, lt ; CHECK-NEXT: csel r3, lr, r3, lt -; CHECK-NEXT: subs r4, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: sbcs.w r4, r1, r3 ; CHECK-NEXT: csel r0, r2, r0, lt ; CHECK-NEXT: csel r1, r3, r1, lt diff --git a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll index bf0d92b5e0303..a0b36dd39b291 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll @@ -494,7 +494,7 @@ define <2 x i64> @large_i128(<2 x double> %x) { ; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r7, r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: mov.w r9, #1 ; CHECK-NEXT: sbcs r7, r3, #0 ; CHECK-NEXT: mov.w r4, #0 @@ -515,7 +515,7 @@ define <2 x i64> @large_i128(<2 x double> %x) { ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r5, r2, #1 +; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: sbcs r5, r3, #0 ; CHECK-NEXT: cset r5, lt ; CHECK-NEXT: cmp r5, #0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll b/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll index 75f7350fcd5b1..5ca386f20f840 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll @@ -167,13 +167,13 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_smaxmin(<2 x i64> %s0) { ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: mvn r12, #-2147483648 ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: subs.w r0, r0, r12 +; CHECK-NEXT: cmp r0, r12 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: bfi r3, r1, #0, #8 ; CHECK-NEXT: vmov r1, r2, d1 -; CHECK-NEXT: subs.w r1, r1, r12 +; CHECK-NEXT: cmp r1, r12 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r3, r1, #8, #8 @@ -239,12 +239,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_sminmax(<2 x i64> %s0) { ; CHECK-NEXT: mvn r3, #-2147483648 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vmov r1, r2, d0 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: cmp r1, r3 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: vmov r1, r2, d1 -; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: cmp r1, r3 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 @@ -278,13 +278,13 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_umaxmin(<2 x i64> %s0) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov.i64 q1, #0xffffffff -; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r2, d1 -; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: sbcs r0, r2, #0 ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 @@ -302,13 +302,13 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_uminmax(<2 x i64> %s0) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov.i64 q1, #0xffffffff -; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov.w r1, #0 ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r2, d1 -; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: sbcs r0, r2, #0 ; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 diff --git a/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll b/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll index f78d36222c312..8312d0d2e0537 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll @@ -182,17 +182,16 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_smaxmin(<2 x i64> %so) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vmov r0, r1, d1 -; CHECK-NEXT: mvn r12, #-2147483648 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: asrl r0, r1, #3 +; CHECK-NEXT: mvn r12, #-2147483648 +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: asrl r2, r3, #3 +; CHECK-NEXT: asrl r0, r1, #3 +; CHECK-NEXT: cmp r2, r12 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r0 -; CHECK-NEXT: subs.w r2, r2, r12 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 ; CHECK-NEXT: csetm lr, lt -; CHECK-NEXT: subs.w r0, r0, r12 +; CHECK-NEXT: cmp r0, r12 ; CHECK-NEXT: mov.w r2, #0 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r2, lr, #0, #8 @@ -200,10 +199,11 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_smaxmin(<2 x i64> %so) { ; CHECK-NEXT: bfi r2, r0, #8, #8 ; CHECK-NEXT: adr r0, .LCPI12_0 ; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: mov.w r2, #-1 -; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: rsbs.w r0, r0, #-2147483648 ; CHECK-NEXT: sbcs.w r0, r2, r1 @@ -268,12 +268,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_sminmax(<2 x i64> %so) { ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: movs r6, #0 ; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: subs r0, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r6, r0, #0, #8 ; CHECK-NEXT: vmov r0, r1, d1 -; CHECK-NEXT: subs r0, r0, r2 +; CHECK-NEXT: cmp r0, r2 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r6, r0, #8, #8 @@ -306,21 +306,21 @@ entry: define arm_aapcs_vfpcc <2 x i64> @vqshrni64_umaxmin(<2 x i64> %so) { ; CHECK-LABEL: vqshrni64_umaxmin: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov r0, r1, d1 -; CHECK-NEXT: vmov.i64 q1, #0xffffffff ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: lsrl r0, r1, #3 +; CHECK-NEXT: vmov.i64 q1, #0xffffffff +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: lsrl r2, r3, #3 +; CHECK-NEXT: lsrl r0, r1, #3 +; CHECK-NEXT: cmp.w r2, #-1 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r0 -; CHECK-NEXT: subs.w r2, r2, #-1 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 ; CHECK-NEXT: csetm r2, lo -; CHECK-NEXT: subs.w r0, r0, #-1 -; CHECK-NEXT: mov.w r3, #0 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: bfi r3, r2, #0, #8 +; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -335,21 +335,21 @@ entry: define arm_aapcs_vfpcc <2 x i64> @vqshrni64_uminmax(<2 x i64> %so) { ; CHECK-LABEL: vqshrni64_uminmax: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov r0, r1, d1 -; CHECK-NEXT: vmov.i64 q1, #0xffffffff ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: lsrl r0, r1, #3 +; CHECK-NEXT: vmov.i64 q1, #0xffffffff +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: lsrl r2, r3, #3 +; CHECK-NEXT: lsrl r0, r1, #3 +; CHECK-NEXT: cmp.w r2, #-1 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r0 -; CHECK-NEXT: subs.w r2, r2, #-1 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 ; CHECK-NEXT: csetm r2, lo -; CHECK-NEXT: subs.w r0, r0, #-1 -; CHECK-NEXT: mov.w r3, #0 +; CHECK-NEXT: cmp.w r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: bfi r3, r2, #0, #8 +; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q0, q0, q1 diff --git a/llvm/test/CodeGen/Thumb2/thumb2-cmn2.ll b/llvm/test/CodeGen/Thumb2/thumb2-cmn2.ll index 83195aeb0bc30..44dd03b3ef784 100644 --- a/llvm/test/CodeGen/Thumb2/thumb2-cmn2.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-cmn2.ll @@ -1,9 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s ; -0x000000bb = 4294967109 define i32 @f1(i32 %a) { ; CHECK-LABEL: f1: -; CHECK: adds {{r.*}}, #187 +; CHECK: @ %bb.0: +; CHECK-NEXT: cmn.w r0, #187 +; CHECK-NEXT: mov.w r0, #24 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #42 +; CHECK-NEXT: bx lr %tmp = icmp ne i32 %a, 4294967109 %ret = select i1 %tmp, i32 42, i32 24 ret i32 %ret @@ -12,7 +18,13 @@ define i32 @f1(i32 %a) { ; -0x00aa00aa = 4283826006 define i32 @f2(i32 %a) { ; CHECK-LABEL: f2: -; CHECK: cmn.w {{r.*}}, #11141290 +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #24 +; CHECK-NEXT: cmn.w r0, #11141290 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq r1, #42 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bx lr %tmp = icmp eq i32 %a, 4283826006 %ret = select i1 %tmp, i32 42, i32 24 ret i32 %ret @@ -21,7 +33,13 @@ define i32 @f2(i32 %a) { ; -0xcc00cc00 = 872363008 define i32 @f3(i32 %a) { ; CHECK-LABEL: f3: -; CHECK: cmn.w {{r.*}}, #-872363008 +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #24 +; CHECK-NEXT: cmn.w r0, #-872363008 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r1, #42 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bx lr %tmp = icmp ne i32 %a, 872363008 %ret = select i1 %tmp, i32 42, i32 24 ret i32 %ret @@ -30,7 +48,13 @@ define i32 @f3(i32 %a) { ; -0x00110000 = 4293853184 define i32 @f4(i32 %a) { ; CHECK-LABEL: f4: -; CHECK: cmn.w {{r.*}}, #1114112 +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #24 +; CHECK-NEXT: cmn.w r0, #1114112 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq r1, #42 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bx lr %tmp = icmp eq i32 %a, 4293853184 %ret = select i1 %tmp, i32 42, i32 24 ret i32 %ret diff --git a/llvm/test/CodeGen/Thumb2/thumb2-sbc.ll b/llvm/test/CodeGen/Thumb2/thumb2-sbc.ll index b04dae61cef75..049bcc8356c0f 100644 --- a/llvm/test/CodeGen/Thumb2/thumb2-sbc.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-sbc.ll @@ -1,63 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s define i64 @f1(i64 %a, i64 %b) { -; CHECK: f1 -; CHECK: subs r0, r0, r2 +; CHECK-LABEL: f1: +; CHECK: @ %bb.0: +; CHECK-NEXT: subs r0, r0, r2 +; CHECK-NEXT: sbcs r1, r3 +; CHECK-NEXT: bx lr %tmp = sub i64 %a, %b ret i64 %tmp } ; 734439407618 = 0x000000ab00000002 define i64 @f2(i64 %a) { -; CHECK: f2 -; CHECK: subs r0, #2 -; CHECK: sbc r1, r1, #171 +; CHECK-LABEL: f2: +; CHECK: @ %bb.0: +; CHECK-NEXT: subs r0, #2 +; CHECK-NEXT: sbc r1, r1, #171 +; CHECK-NEXT: bx lr %tmp = sub i64 %a, 734439407618 ret i64 %tmp } ; 5066626890203138 = 0x0012001200000002 define i64 @f3(i64 %a) { -; CHECK: f3 -; CHECK: subs r0, #2 -; CHECK: sbc r1, r1, #1179666 +; CHECK-LABEL: f3: +; CHECK: @ %bb.0: +; CHECK-NEXT: subs r0, #2 +; CHECK-NEXT: sbc r1, r1, #1179666 +; CHECK-NEXT: bx lr %tmp = sub i64 %a, 5066626890203138 ret i64 %tmp } ; 3747052064576897026 = 0x3400340000000002 define i64 @f4(i64 %a) { -; CHECK: f4 -; CHECK: subs r0, #2 -; CHECK: sbc r1, r1, #872428544 +; CHECK-LABEL: f4: +; CHECK: @ %bb.0: +; CHECK-NEXT: subs r0, #2 +; CHECK-NEXT: sbc r1, r1, #872428544 +; CHECK-NEXT: bx lr %tmp = sub i64 %a, 3747052064576897026 ret i64 %tmp } ; 6221254862626095106 = 0x5656565600000002 define i64 @f5(i64 %a) { -; CHECK: f5 -; CHECK: subs r0, #2 -; CHECK: adc r1, r1, #-1448498775 +; CHECK-LABEL: f5: +; CHECK: @ %bb.0: +; CHECK-NEXT: subs r0, #2 +; CHECK-NEXT: adc r1, r1, #-1448498775 +; CHECK-NEXT: bx lr %tmp = sub i64 %a, 6221254862626095106 ret i64 %tmp } ; 287104476244869122 = 0x03fc000000000002 define i64 @f6(i64 %a) { -; CHECK: f6 -; CHECK: subs r0, #2 -; CHECK: sbc r1, r1, #66846720 +; CHECK-LABEL: f6: +; CHECK: @ %bb.0: +; CHECK-NEXT: subs r0, #2 +; CHECK-NEXT: sbc r1, r1, #66846720 +; CHECK-NEXT: bx lr %tmp = sub i64 %a, 287104476244869122 ret i64 %tmp } ; Example from numerics code that manually computes wider-than-64 values. ; -; CHECK-LABEL: livecarry: -; CHECK: adds -; CHECK: adc define i64 @livecarry(i64 %carry, i32 %digit) nounwind { +; CHECK-LABEL: livecarry: +; CHECK: @ %bb.0: +; CHECK-NEXT: cmn r0, r2 +; CHECK-NEXT: mov.w r3, #0 +; CHECK-NEXT: adcs r0, r1, #0 +; CHECK-NEXT: adc r1, r3, #0 +; CHECK-NEXT: bx lr %ch = lshr i64 %carry, 32 %cl = and i64 %carry, 4294967295 %truncdigit = zext i32 %digit to i64