diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a75fa688d87a8..b13d380ae7c7e 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1999,6 +1999,19 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( Value *IntValue = Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy); Args.push_back(IntValue); + + // Set the zeroext/signext attributes on the parameter if needed to match + // the target's ABI. + if (TLI->shouldExtendTypeInLibCall( + TLI->getMemValueType(DL, SizedIntTy))) { + // The only atomic operations affected by signedness are min/max, and + // we don't have __atomic_ libcalls for them, so IsSigned is always + // false. + if (TLI->shouldSignExtendTypeInLibCall(SizedIntTy, false /*IsSigned*/)) + Attr = Attr.addParamAttribute(Ctx, Args.size() - 1, Attribute::SExt); + else + Attr = Attr.addParamAttribute(Ctx, Args.size() - 1, Attribute::ZExt); + } } else { AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); AllocaValue->setAlignment(AllocaAlignment); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f56097fdbb51a..9cd4e42cfd062 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4386,23 +4386,46 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { AtomicOrdering Order = cast(Node)->getMergedOrdering(); RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, Order, VT); EVT RetVT = Node->getValueType(0); + SDValue ChainIn = Node->getOperand(0); + SDValue Pointer = Node->getOperand(1); + SDLoc dl(Node); SmallVector Ops; + + // Zero/sign extend small operands if required by the target's ABI. + SmallVector ExtendedOps; + for (auto Op = Node->op_begin() + 2, E = Node->op_end(); Op != E; ++Op) { + if (TLI.shouldExtendTypeInLibCall(VT)) { + bool IsSigned = + Opc == ISD::ATOMIC_LOAD_MIN || Opc == ISD::ATOMIC_LOAD_MAX; + if (TLI.shouldSignExtendTypeInLibCall( + EVT(VT).getTypeForEVT(*DAG.getContext()), IsSigned)) + ExtendedOps.push_back(DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Op->getValueType(), *Op, + DAG.getValueType(VT))); + else + ExtendedOps.push_back(DAG.getZeroExtendInReg(*Op, dl, VT)); + + } else { + ExtendedOps.push_back(*Op); + } + } + if (TLI.getLibcallName(LC)) { // If outline atomic available, prepare its arguments and expand. - Ops.append(Node->op_begin() + 2, Node->op_end()); - Ops.push_back(Node->getOperand(1)); + Ops.append(ExtendedOps.begin(), ExtendedOps.end()); + Ops.push_back(Pointer); } else { LC = RTLIB::getSYNC(Opc, VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); // Arguments for expansion to sync libcall - Ops.append(Node->op_begin() + 1, Node->op_end()); + Ops.push_back(Pointer); + Ops.append(ExtendedOps.begin(), ExtendedOps.end()); } - std::pair Tmp = TLI.makeLibCall(DAG, LC, RetVT, - Ops, CallOptions, - SDLoc(Node), - Node->getOperand(0)); + + std::pair Tmp = + TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, dl, ChainIn); Results.push_back(Tmp.first); Results.push_back(Tmp.second); break; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 1987c892ac080..2a015f8ed67bc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1378,6 +1378,8 @@ class AArch64TargetLowering : public TargetLowering { bool shouldScalarizeBinop(SDValue VecOp) const override { return VecOp.getOpcode() == ISD::SETCC; } + + bool shouldExtendTypeInLibCall(EVT Type) const override { return false; } }; namespace AArch64 { diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll index 21729b9dfd101..b650040617ecd 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll @@ -58,13 +58,13 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 ; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2 -; SOFTFP-NOLSE-NEXT: mov w22, w0 -; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff -; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 -; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w20 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 -; SOFTFP-NOLSE-NEXT: mov w1, w21 +; SOFTFP-NOLSE-NEXT: mov w22, w0 +; SOFTFP-NOLSE-NEXT: mov w0, w21 +; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 +; SOFTFP-NOLSE-NEXT: mov w1, w22 ; SOFTFP-NOLSE-NEXT: bl __addsf3 ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 @@ -72,7 +72,7 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] -; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth +; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth ; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1 ; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore ; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2 @@ -146,13 +146,13 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 ; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2 -; SOFTFP-NOLSE-NEXT: mov w22, w0 -; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff -; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 -; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w20 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 -; SOFTFP-NOLSE-NEXT: mov w1, w21 +; SOFTFP-NOLSE-NEXT: mov w22, w0 +; SOFTFP-NOLSE-NEXT: mov w0, w21 +; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 +; SOFTFP-NOLSE-NEXT: mov w1, w22 ; SOFTFP-NOLSE-NEXT: bl __addsf3 ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 @@ -160,7 +160,7 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] -; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth +; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth ; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1 ; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore ; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2 @@ -711,19 +711,19 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half> ; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 ; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2 -; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w19 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w24, w0 -; SOFTFP-NOLSE-NEXT: and w0, w23, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w23 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w1, w24 ; SOFTFP-NOLSE-NEXT: bl __addsf3 ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w24, w0 -; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w21 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w25, w0 -; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w1, w25 ; SOFTFP-NOLSE-NEXT: bl __addsf3 diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll index 9b5e48d2b4217..41c5afe0f64a9 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll @@ -60,13 +60,13 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 ; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2 -; SOFTFP-NOLSE-NEXT: mov w22, w0 -; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff -; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 -; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w20 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 -; SOFTFP-NOLSE-NEXT: mov w1, w21 +; SOFTFP-NOLSE-NEXT: mov w22, w0 +; SOFTFP-NOLSE-NEXT: mov w0, w21 +; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 +; SOFTFP-NOLSE-NEXT: mov w1, w22 ; SOFTFP-NOLSE-NEXT: bl fmaxf ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 @@ -74,7 +74,7 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] -; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth +; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth ; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1 ; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore ; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2 @@ -148,13 +148,13 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 ; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2 -; SOFTFP-NOLSE-NEXT: mov w22, w0 -; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff -; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 -; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w20 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 -; SOFTFP-NOLSE-NEXT: mov w1, w21 +; SOFTFP-NOLSE-NEXT: mov w22, w0 +; SOFTFP-NOLSE-NEXT: mov w0, w21 +; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 +; SOFTFP-NOLSE-NEXT: mov w1, w22 ; SOFTFP-NOLSE-NEXT: bl fmaxf ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 @@ -162,7 +162,7 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] -; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth +; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth ; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1 ; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore ; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2 @@ -591,19 +591,19 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half> ; SOFTFP-NOLSE-NEXT: .LBB6_2: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 ; SOFTFP-NOLSE-NEXT: // Child Loop BB6_3 Depth 2 -; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w19 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w24, w0 -; SOFTFP-NOLSE-NEXT: and w0, w23, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w23 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w1, w24 ; SOFTFP-NOLSE-NEXT: bl fmaxf ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w24, w0 -; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w21 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w25, w0 -; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w1, w25 ; SOFTFP-NOLSE-NEXT: bl fmaxf diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll index f6c542fe7d407..a01bd182e61e6 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll @@ -60,13 +60,13 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 ; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2 -; SOFTFP-NOLSE-NEXT: mov w22, w0 -; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff -; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 -; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w20 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 -; SOFTFP-NOLSE-NEXT: mov w1, w21 +; SOFTFP-NOLSE-NEXT: mov w22, w0 +; SOFTFP-NOLSE-NEXT: mov w0, w21 +; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 +; SOFTFP-NOLSE-NEXT: mov w1, w22 ; SOFTFP-NOLSE-NEXT: bl fminf ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 @@ -74,7 +74,7 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] -; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth +; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth ; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1 ; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore ; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2 @@ -148,13 +148,13 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 ; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2 -; SOFTFP-NOLSE-NEXT: mov w22, w0 -; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff -; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 -; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w20 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 -; SOFTFP-NOLSE-NEXT: mov w1, w21 +; SOFTFP-NOLSE-NEXT: mov w22, w0 +; SOFTFP-NOLSE-NEXT: mov w0, w21 +; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 +; SOFTFP-NOLSE-NEXT: mov w1, w22 ; SOFTFP-NOLSE-NEXT: bl fminf ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 @@ -162,7 +162,7 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] -; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth +; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth ; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1 ; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore ; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2 @@ -591,19 +591,19 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half> ; SOFTFP-NOLSE-NEXT: .LBB6_2: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 ; SOFTFP-NOLSE-NEXT: // Child Loop BB6_3 Depth 2 -; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w19 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w24, w0 -; SOFTFP-NOLSE-NEXT: and w0, w23, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w23 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w1, w24 ; SOFTFP-NOLSE-NEXT: bl fminf ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w24, w0 -; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w21 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w25, w0 -; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w1, w25 ; SOFTFP-NOLSE-NEXT: bl fminf diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll index 82e0f14e68e26..01beb5c50afdd 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll @@ -58,13 +58,13 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 ; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2 -; SOFTFP-NOLSE-NEXT: mov w22, w0 -; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff -; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 -; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w20 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 -; SOFTFP-NOLSE-NEXT: mov w1, w21 +; SOFTFP-NOLSE-NEXT: mov w22, w0 +; SOFTFP-NOLSE-NEXT: mov w0, w21 +; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 +; SOFTFP-NOLSE-NEXT: mov w1, w22 ; SOFTFP-NOLSE-NEXT: bl __subsf3 ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 @@ -72,7 +72,7 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] -; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth +; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth ; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1 ; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore ; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2 @@ -146,13 +146,13 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 ; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2 -; SOFTFP-NOLSE-NEXT: mov w22, w0 -; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff -; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w21, w0 -; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w20 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 -; SOFTFP-NOLSE-NEXT: mov w1, w21 +; SOFTFP-NOLSE-NEXT: mov w22, w0 +; SOFTFP-NOLSE-NEXT: mov w0, w21 +; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 +; SOFTFP-NOLSE-NEXT: mov w1, w22 ; SOFTFP-NOLSE-NEXT: bl __subsf3 ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w8, w0 @@ -160,7 +160,7 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 { ; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1 ; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2 ; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19] -; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth +; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth ; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1 ; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore ; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2 @@ -711,19 +711,19 @@ define <2 x half> @test_atomicrmw_fsub_v2f16_seq_cst_align4(ptr %ptr, <2 x half> ; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start ; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1 ; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2 -; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w19 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w24, w0 -; SOFTFP-NOLSE-NEXT: and w0, w23, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w23 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w1, w24 ; SOFTFP-NOLSE-NEXT: bl __subsf3 ; SOFTFP-NOLSE-NEXT: bl __truncsfhf2 ; SOFTFP-NOLSE-NEXT: mov w24, w0 -; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w21 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w25, w0 -; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff +; SOFTFP-NOLSE-NEXT: mov w0, w22 ; SOFTFP-NOLSE-NEXT: bl __extendhfsf2 ; SOFTFP-NOLSE-NEXT: mov w1, w25 ; SOFTFP-NOLSE-NEXT: bl __subsf3 diff --git a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll index 63b8a1cee27ae..771963cfd7042 100644 --- a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll +++ b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll @@ -20,7 +20,6 @@ define void @f16_arg(half %arg, ptr %ptr) #0 { ; NOFP16-NEXT: .cfi_def_cfa_offset 16 ; NOFP16-NEXT: .cfi_offset w19, -8 ; NOFP16-NEXT: .cfi_offset w30, -16 -; NOFP16-NEXT: and w0, w0, #0xffff ; NOFP16-NEXT: mov x19, x1 ; NOFP16-NEXT: bl __extendhfsf2 ; NOFP16-NEXT: str w0, [x19] @@ -41,12 +40,11 @@ define void @v2f16_arg(<2 x half> %arg, ptr %ptr) #0 { ; NOFP16-NEXT: .cfi_offset w20, -16 ; NOFP16-NEXT: .cfi_offset w21, -24 ; NOFP16-NEXT: .cfi_offset w30, -32 -; NOFP16-NEXT: and w0, w0, #0xffff ; NOFP16-NEXT: mov x19, x2 ; NOFP16-NEXT: mov w20, w1 ; NOFP16-NEXT: bl __extendhfsf2 ; NOFP16-NEXT: mov w21, w0 -; NOFP16-NEXT: and w0, w20, #0xffff +; NOFP16-NEXT: mov w0, w20 ; NOFP16-NEXT: bl __extendhfsf2 ; NOFP16-NEXT: stp w21, w0, [x19] ; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload @@ -70,15 +68,15 @@ define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 { ; NOFP16-NEXT: .cfi_offset w22, -32 ; NOFP16-NEXT: .cfi_offset w30, -48 ; NOFP16-NEXT: mov w21, w0 -; NOFP16-NEXT: and w0, w1, #0xffff +; NOFP16-NEXT: mov w0, w1 ; NOFP16-NEXT: mov x19, x3 ; NOFP16-NEXT: mov w20, w2 ; NOFP16-NEXT: bl __extendhfsf2 ; NOFP16-NEXT: mov w22, w0 -; NOFP16-NEXT: and w0, w21, #0xffff +; NOFP16-NEXT: mov w0, w21 ; NOFP16-NEXT: bl __extendhfsf2 ; NOFP16-NEXT: mov w8, w0 -; NOFP16-NEXT: and w0, w20, #0xffff +; NOFP16-NEXT: mov w0, w20 ; NOFP16-NEXT: orr x21, x8, x22, lsl #32 ; NOFP16-NEXT: bl __extendhfsf2 ; NOFP16-NEXT: str x21, [x19] @@ -105,20 +103,19 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 { ; NOFP16-NEXT: .cfi_offset w22, -32 ; NOFP16-NEXT: .cfi_offset w23, -40 ; NOFP16-NEXT: .cfi_offset w30, -48 -; NOFP16-NEXT: and w0, w0, #0xffff ; NOFP16-NEXT: mov x19, x4 ; NOFP16-NEXT: mov w20, w3 ; NOFP16-NEXT: mov w21, w2 ; NOFP16-NEXT: mov w22, w1 ; NOFP16-NEXT: bl __extendhfsf2 ; NOFP16-NEXT: mov w23, w0 -; NOFP16-NEXT: and w0, w22, #0xffff +; NOFP16-NEXT: mov w0, w22 ; NOFP16-NEXT: bl __extendhfsf2 ; NOFP16-NEXT: mov w22, w0 -; NOFP16-NEXT: and w0, w21, #0xffff +; NOFP16-NEXT: mov w0, w21 ; NOFP16-NEXT: bl __extendhfsf2 ; NOFP16-NEXT: mov w21, w0 -; NOFP16-NEXT: and w0, w20, #0xffff +; NOFP16-NEXT: mov w0, w20 ; NOFP16-NEXT: bl __extendhfsf2 ; NOFP16-NEXT: stp w21, w0, [x19, #8] ; NOFP16-NEXT: stp w23, w22, [x19] diff --git a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll index 4bf42d4ac9629..d231687c4f176 100644 --- a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll @@ -12,10 +12,10 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new) ; CHECK-ARM-LABEL: test_cmpxchg_res_i8: ; CHECK-ARM: .save {r4, lr} ; CHECK-ARM-NEXT: push {r4, lr} -; CHECK-ARM-NEXT: mov r4, r1 +; CHECK-ARM-NEXT: and r4, r1, #255 +; CHECK-ARM-NEXT: mov r1, r4 ; CHECK-ARM-NEXT: bl __sync_val_compare_and_swap_1 -; CHECK-ARM-NEXT: and r1, r4, #255 -; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: sub r0, r0, r4 ; CHECK-ARM-NEXT: rsbs r1, r0, #0 ; CHECK-ARM-NEXT: adc r0, r0, r1 ; CHECK-ARM-NEXT: pop {r4, lr} @@ -25,10 +25,11 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new) ; CHECK-THUMB: .save {r4, lr} ; CHECK-THUMB-NEXT: push {r4, lr} ; CHECK-THUMB-NEXT: movs r4, r1 -; CHECK-THUMB-NEXT: bl __sync_val_compare_and_swap_1 ; CHECK-THUMB-NEXT: movs r1, #255 -; CHECK-THUMB-NEXT: ands r1, r4 -; CHECK-THUMB-NEXT: subs r1, r0, r1 +; CHECK-THUMB-NEXT: ands r4, r1 +; CHECK-THUMB-NEXT: movs r1, r4 +; CHECK-THUMB-NEXT: bl __sync_val_compare_and_swap_1 +; CHECK-THUMB-NEXT: subs r1, r0, r4 ; CHECK-THUMB-NEXT: rsbs r0, r1, #0 ; CHECK-THUMB-NEXT: adcs r0, r1 ; CHECK-THUMB-NEXT: pop {r4} @@ -52,10 +53,10 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new) ; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8: ; CHECK-THUMBV6: .save {r4, lr} ; CHECK-THUMBV6-NEXT: push {r4, lr} -; CHECK-THUMBV6-NEXT: mov r4, r1 +; CHECK-THUMBV6-NEXT: uxtb r4, r1 +; CHECK-THUMBV6-NEXT: mov r1, r4 ; CHECK-THUMBV6-NEXT: bl __sync_val_compare_and_swap_1 -; CHECK-THUMBV6-NEXT: uxtb r1, r4 -; CHECK-THUMBV6-NEXT: subs r1, r0, r1 +; CHECK-THUMBV6-NEXT: subs r1, r0, r4 ; CHECK-THUMBV6-NEXT: rsbs r0, r1, #0 ; CHECK-THUMBV6-NEXT: adcs r0, r1 ; CHECK-THUMBV6-NEXT: pop {r4, pc} diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..63dd68e8e607b 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -143,7 +143,7 @@ define void @test3(ptr %ptr1, ptr %ptr2) { ; ARMV4-NEXT: mov r4, r1 ; ARMV4-NEXT: mov r1, #0 ; ARMV4-NEXT: bl __atomic_load_1 -; ARMV4-NEXT: mov r1, r0 +; ARMV4-NEXT: and r1, r0, #255 ; ARMV4-NEXT: mov r0, r4 ; ARMV4-NEXT: mov r2, #0 ; ARMV4-NEXT: bl __atomic_store_1 @@ -203,7 +203,7 @@ define void @test4(ptr %ptr1, ptr %ptr2) { ; THUMBONE-NEXT: movs r1, #0 ; THUMBONE-NEXT: mov r2, r1 ; THUMBONE-NEXT: bl __sync_val_compare_and_swap_1 -; THUMBONE-NEXT: mov r1, r0 +; THUMBONE-NEXT: uxtb r1, r0 ; THUMBONE-NEXT: mov r0, r4 ; THUMBONE-NEXT: bl __sync_lock_test_and_set_1 ; THUMBONE-NEXT: pop {r4, pc} @@ -214,7 +214,7 @@ define void @test4(ptr %ptr1, ptr %ptr2) { ; ARMV4-NEXT: mov r4, r1 ; ARMV4-NEXT: mov r1, #5 ; ARMV4-NEXT: bl __atomic_load_1 -; ARMV4-NEXT: mov r1, r0 +; ARMV4-NEXT: and r1, r0, #255 ; ARMV4-NEXT: mov r0, r4 ; ARMV4-NEXT: mov r2, #5 ; ARMV4-NEXT: bl __atomic_store_1 @@ -324,17 +324,17 @@ define void @test_old_store_64bit(ptr %p, i64 %v) { ; ; ARMOPTNONE-LABEL: test_old_store_64bit: ; ARMOPTNONE: @ %bb.0: -; ARMOPTNONE-NEXT: push {r4, r5, r7, r8, r10, r11, lr} -; ARMOPTNONE-NEXT: add r7, sp, #20 -; ARMOPTNONE-NEXT: sub sp, sp, #24 -; ARMOPTNONE-NEXT: str r0, [sp, #4] @ 4-byte Spill -; ARMOPTNONE-NEXT: str r2, [sp, #8] @ 4-byte Spill -; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill -; ARMOPTNONE-NEXT: dmb ish -; ARMOPTNONE-NEXT: ldr r1, [r0] -; ARMOPTNONE-NEXT: ldr r0, [r0, #4] -; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill -; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill +; ARMOPTNONE-NEXT: push {r4, r5, r7, r8, r10, r11, lr} +; ARMOPTNONE-NEXT: add r7, sp, #20 +; ARMOPTNONE-NEXT: sub sp, sp, #24 +; ARMOPTNONE-NEXT: str r0, [sp, #4] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r2, [sp, #8] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: ldr r1, [r0] +; ARMOPTNONE-NEXT: ldr r0, [r0, #4] +; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill ; ARMOPTNONE-NEXT: b LBB5_1 ; ARMOPTNONE-NEXT: LBB5_1: @ %atomicrmw.start ; ARMOPTNONE-NEXT: @ =>This Loop Header: Depth=1 @@ -381,7 +381,7 @@ define void @test_old_store_64bit(ptr %p, i64 %v) { ; ARMOPTNONE-NEXT: LBB5_5: @ %atomicrmw.end ; ARMOPTNONE-NEXT: dmb ish ; ARMOPTNONE-NEXT: sub sp, r7, #20 -; ARMOPTNONE-NEXT: pop {r4, r5, r7, r8, r10, r11, pc} +; ARMOPTNONE-NEXT: pop {r4, r5, r7, r8, r10, r11, pc} ; ; THUMBTWO-LABEL: test_old_store_64bit: ; THUMBTWO: @ %bb.0: @@ -692,12 +692,16 @@ define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) { ; THUMBONE-LABEL: store_atomic_f16__seq_cst: ; THUMBONE: @ %bb.0: ; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: uxth r1, r1 ; THUMBONE-NEXT: bl __sync_lock_test_and_set_2 ; THUMBONE-NEXT: pop {r7, pc} ; ; ARMV4-LABEL: store_atomic_f16__seq_cst: ; ARMV4: @ %bb.0: ; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: mov r2, #255 +; ARMV4-NEXT: orr r2, r2, #65280 +; ARMV4-NEXT: and r1, r1, r2 ; ARMV4-NEXT: mov r2, #5 ; ARMV4-NEXT: bl __atomic_store_2 ; ARMV4-NEXT: pop {r11, lr} @@ -753,12 +757,16 @@ define void @store_atomic_bf16__seq_cst(ptr %ptr, bfloat %val1) { ; THUMBONE-LABEL: store_atomic_bf16__seq_cst: ; THUMBONE: @ %bb.0: ; THUMBONE-NEXT: push {r7, lr} +; THUMBONE-NEXT: uxth r1, r1 ; THUMBONE-NEXT: bl __sync_lock_test_and_set_2 ; THUMBONE-NEXT: pop {r7, pc} ; ; ARMV4-LABEL: store_atomic_bf16__seq_cst: ; ARMV4: @ %bb.0: ; ARMV4-NEXT: push {r11, lr} +; ARMV4-NEXT: mov r2, #255 +; ARMV4-NEXT: orr r2, r2, #65280 +; ARMV4-NEXT: and r1, r1, r2 ; ARMV4-NEXT: mov r2, #5 ; ARMV4-NEXT: bl __atomic_store_2 ; ARMV4-NEXT: pop {r11, lr} @@ -862,19 +870,19 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { ; ; ARMOPTNONE-LABEL: store_atomic_f64__seq_cst: ; ARMOPTNONE: @ %bb.0: -; ARMOPTNONE-NEXT: push {r4, r5, r7, r8, r10, r11, lr} -; ARMOPTNONE-NEXT: add r7, sp, #20 -; ARMOPTNONE-NEXT: sub sp, sp, #24 -; ARMOPTNONE-NEXT: str r0, [sp, #4] @ 4-byte Spill -; ARMOPTNONE-NEXT: vmov d16, r1, r2 -; ARMOPTNONE-NEXT: vmov r1, r2, d16 -; ARMOPTNONE-NEXT: str r2, [sp, #8] @ 4-byte Spill -; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill -; ARMOPTNONE-NEXT: dmb ish -; ARMOPTNONE-NEXT: ldr r1, [r0] -; ARMOPTNONE-NEXT: ldr r0, [r0, #4] -; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill -; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill +; ARMOPTNONE-NEXT: push {r4, r5, r7, r8, r10, r11, lr} +; ARMOPTNONE-NEXT: add r7, sp, #20 +; ARMOPTNONE-NEXT: sub sp, sp, #24 +; ARMOPTNONE-NEXT: str r0, [sp, #4] @ 4-byte Spill +; ARMOPTNONE-NEXT: vmov d16, r1, r2 +; ARMOPTNONE-NEXT: vmov r1, r2, d16 +; ARMOPTNONE-NEXT: str r2, [sp, #8] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill +; ARMOPTNONE-NEXT: dmb ish +; ARMOPTNONE-NEXT: ldr r1, [r0] +; ARMOPTNONE-NEXT: ldr r0, [r0, #4] +; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill ; ARMOPTNONE-NEXT: b LBB13_1 ; ARMOPTNONE-NEXT: LBB13_1: @ %atomicrmw.start ; ARMOPTNONE-NEXT: @ =>This Loop Header: Depth=1 @@ -921,7 +929,7 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { ; ARMOPTNONE-NEXT: LBB13_5: @ %atomicrmw.end ; ARMOPTNONE-NEXT: dmb ish ; ARMOPTNONE-NEXT: sub sp, r7, #20 -; ARMOPTNONE-NEXT: pop {r4, r5, r7, r8, r10, r11, pc} +; ARMOPTNONE-NEXT: pop {r4, r5, r7, r8, r10, r11, pc} ; ; THUMBTWO-LABEL: store_atomic_f64__seq_cst: ; THUMBTWO: @ %bb.0: diff --git a/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll index db71eae97544d..9743761c08e22 100644 --- a/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll +++ b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll @@ -359,3 +359,267 @@ define i64 @cmpxchg64(ptr %p) { %res.0 = extractvalue { i64, i1 } %res, 0 ret i64 %res.0 } + +define void @trunc_store8(ptr %p, i32 %val) { +; NO-ATOMIC32-LABEL: trunc_store8: +; NO-ATOMIC32: @ %bb.0: +; NO-ATOMIC32-NEXT: .save {r7, lr} +; NO-ATOMIC32-NEXT: push {r7, lr} +; NO-ATOMIC32-NEXT: uxtb r1, r1 +; NO-ATOMIC32-NEXT: movs r2, #5 +; NO-ATOMIC32-NEXT: bl __atomic_store_1 +; NO-ATOMIC32-NEXT: pop {r7, pc} +; +; ATOMIC32-LABEL: trunc_store8: +; ATOMIC32: @ %bb.0: +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: strb r1, [r0] +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: bx lr + %trunc = trunc i32 %val to i8 + store atomic i8 %trunc, ptr %p seq_cst, align 1 + ret void +} + +define i8 @trunc_rmw8(ptr %p, i32 %val) { +; NO-ATOMIC32-LABEL: trunc_rmw8: +; NO-ATOMIC32: @ %bb.0: +; NO-ATOMIC32-NEXT: .save {r7, lr} +; NO-ATOMIC32-NEXT: push {r7, lr} +; NO-ATOMIC32-NEXT: uxtb r1, r1 +; NO-ATOMIC32-NEXT: movs r2, #5 +; NO-ATOMIC32-NEXT: bl __atomic_fetch_add_1 +; NO-ATOMIC32-NEXT: pop {r7, pc} +; +; ATOMIC32-LABEL: trunc_rmw8: +; ATOMIC32: @ %bb.0: +; ATOMIC32-NEXT: .save {r7, lr} +; ATOMIC32-NEXT: push {r7, lr} +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: uxtb r1, r1 +; ATOMIC32-NEXT: bl __sync_fetch_and_add_1 +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: pop {r7, pc} + %trunc = trunc i32 %val to i8 + %v = atomicrmw add ptr %p, i8 %trunc seq_cst, align 1 + ret i8 %v +} + +define i8 @trunc_rmw8_signed(ptr %p, i32 %val) { +; NO-ATOMIC32-LABEL: trunc_rmw8_signed: +; NO-ATOMIC32: @ %bb.0: +; NO-ATOMIC32-NEXT: .save {r4, r5, r6, lr} +; NO-ATOMIC32-NEXT: push {r4, r5, r6, lr} +; NO-ATOMIC32-NEXT: .pad #8 +; NO-ATOMIC32-NEXT: sub sp, #8 +; NO-ATOMIC32-NEXT: mov r4, r1 +; NO-ATOMIC32-NEXT: mov r5, r0 +; NO-ATOMIC32-NEXT: ldrb r0, [r0] +; NO-ATOMIC32-NEXT: b .LBB18_2 +; NO-ATOMIC32-NEXT: .LBB18_1: @ %atomicrmw.start +; NO-ATOMIC32-NEXT: @ in Loop: Header=BB18_2 Depth=1 +; NO-ATOMIC32-NEXT: uxtb r2, r0 +; NO-ATOMIC32-NEXT: mov r0, r5 +; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_1 +; NO-ATOMIC32-NEXT: mov r1, r0 +; NO-ATOMIC32-NEXT: ldr r0, [sp, #4] +; NO-ATOMIC32-NEXT: cmp r1, #0 +; NO-ATOMIC32-NEXT: bne .LBB18_4 +; NO-ATOMIC32-NEXT: .LBB18_2: @ %atomicrmw.start +; NO-ATOMIC32-NEXT: @ =>This Inner Loop Header: Depth=1 +; NO-ATOMIC32-NEXT: add r1, sp, #4 +; NO-ATOMIC32-NEXT: strb r0, [r1] +; NO-ATOMIC32-NEXT: movs r3, #5 +; NO-ATOMIC32-NEXT: str r3, [sp] +; NO-ATOMIC32-NEXT: sxtb r2, r4 +; NO-ATOMIC32-NEXT: sxtb r6, r0 +; NO-ATOMIC32-NEXT: cmp r6, r2 +; NO-ATOMIC32-NEXT: bgt .LBB18_1 +; NO-ATOMIC32-NEXT: @ %bb.3: @ %atomicrmw.start +; NO-ATOMIC32-NEXT: @ in Loop: Header=BB18_2 Depth=1 +; NO-ATOMIC32-NEXT: mov r0, r4 +; NO-ATOMIC32-NEXT: b .LBB18_1 +; NO-ATOMIC32-NEXT: .LBB18_4: @ %atomicrmw.end +; NO-ATOMIC32-NEXT: add sp, #8 +; NO-ATOMIC32-NEXT: pop {r4, r5, r6, pc} +; +; ATOMIC32-LABEL: trunc_rmw8_signed: +; ATOMIC32: @ %bb.0: +; ATOMIC32-NEXT: .save {r7, lr} +; ATOMIC32-NEXT: push {r7, lr} +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: sxtb r1, r1 +; ATOMIC32-NEXT: bl __sync_fetch_and_max_1 +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: pop {r7, pc} + %trunc = trunc i32 %val to i8 + %v = atomicrmw max ptr %p, i8 %trunc seq_cst, align 1 + ret i8 %v +} + +define i8 @trunc_cmpxchg8(ptr %p, i32 %cmp, i32 %new) { +; NO-ATOMIC32-LABEL: trunc_cmpxchg8: +; NO-ATOMIC32: @ %bb.0: +; NO-ATOMIC32-NEXT: .save {r4, lr} +; NO-ATOMIC32-NEXT: push {r4, lr} +; NO-ATOMIC32-NEXT: .pad #8 +; NO-ATOMIC32-NEXT: sub sp, #8 +; NO-ATOMIC32-NEXT: add r4, sp, #4 +; NO-ATOMIC32-NEXT: strb r1, [r4] +; NO-ATOMIC32-NEXT: movs r3, #5 +; NO-ATOMIC32-NEXT: str r3, [sp] +; NO-ATOMIC32-NEXT: uxtb r2, r2 +; NO-ATOMIC32-NEXT: mov r1, r4 +; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_1 +; NO-ATOMIC32-NEXT: ldr r0, [sp, #4] +; NO-ATOMIC32-NEXT: add sp, #8 +; NO-ATOMIC32-NEXT: pop {r4, pc} +; +; ATOMIC32-LABEL: trunc_cmpxchg8: +; ATOMIC32: @ %bb.0: +; ATOMIC32-NEXT: .save {r7, lr} +; ATOMIC32-NEXT: push {r7, lr} +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: uxtb r1, r1 +; ATOMIC32-NEXT: uxtb r2, r2 +; ATOMIC32-NEXT: bl __sync_val_compare_and_swap_1 +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: pop {r7, pc} + %trunc_cmp = trunc i32 %cmp to i8 + %trunc_new = trunc i32 %new to i8 + %res = cmpxchg ptr %p, i8 %trunc_cmp, i8 %trunc_new seq_cst seq_cst, align 1 + %res.0 = extractvalue { i8, i1 } %res, 0 + ret i8 %res.0 +} + +define void @trunc_store16(ptr %p, i32 %val) { +; NO-ATOMIC32-LABEL: trunc_store16: +; NO-ATOMIC32: @ %bb.0: +; NO-ATOMIC32-NEXT: .save {r7, lr} +; NO-ATOMIC32-NEXT: push {r7, lr} +; NO-ATOMIC32-NEXT: uxth r1, r1 +; NO-ATOMIC32-NEXT: movs r2, #5 +; NO-ATOMIC32-NEXT: bl __atomic_store_2 +; NO-ATOMIC32-NEXT: pop {r7, pc} +; +; ATOMIC32-LABEL: trunc_store16: +; ATOMIC32: @ %bb.0: +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: strh r1, [r0] +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: bx lr + %trunc = trunc i32 %val to i16 + store atomic i16 %trunc, ptr %p seq_cst, align 2 + ret void +} + +define i16 @trunc_rmw16(ptr %p, i32 %val) { +; NO-ATOMIC32-LABEL: trunc_rmw16: +; NO-ATOMIC32: @ %bb.0: +; NO-ATOMIC32-NEXT: .save {r7, lr} +; NO-ATOMIC32-NEXT: push {r7, lr} +; NO-ATOMIC32-NEXT: uxth r1, r1 +; NO-ATOMIC32-NEXT: movs r2, #5 +; NO-ATOMIC32-NEXT: bl __atomic_fetch_add_2 +; NO-ATOMIC32-NEXT: pop {r7, pc} +; +; ATOMIC32-LABEL: trunc_rmw16: +; ATOMIC32: @ %bb.0: +; ATOMIC32-NEXT: .save {r7, lr} +; ATOMIC32-NEXT: push {r7, lr} +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: uxth r1, r1 +; ATOMIC32-NEXT: bl __sync_fetch_and_add_2 +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: pop {r7, pc} + %trunc = trunc i32 %val to i16 + %v = atomicrmw add ptr %p, i16 %trunc seq_cst, align 2 + ret i16 %v +} + +define i16 @trunc_rmw16_signed(ptr %p, i32 %val) { +; NO-ATOMIC32-LABEL: trunc_rmw16_signed: +; NO-ATOMIC32: @ %bb.0: +; NO-ATOMIC32-NEXT: .save {r4, r5, r6, lr} +; NO-ATOMIC32-NEXT: push {r4, r5, r6, lr} +; NO-ATOMIC32-NEXT: .pad #8 +; NO-ATOMIC32-NEXT: sub sp, #8 +; NO-ATOMIC32-NEXT: mov r4, r1 +; NO-ATOMIC32-NEXT: mov r5, r0 +; NO-ATOMIC32-NEXT: ldrh r0, [r0] +; NO-ATOMIC32-NEXT: b .LBB22_2 +; NO-ATOMIC32-NEXT: .LBB22_1: @ %atomicrmw.start +; NO-ATOMIC32-NEXT: @ in Loop: Header=BB22_2 Depth=1 +; NO-ATOMIC32-NEXT: uxth r2, r0 +; NO-ATOMIC32-NEXT: mov r0, r5 +; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_2 +; NO-ATOMIC32-NEXT: mov r1, r0 +; NO-ATOMIC32-NEXT: ldr r0, [sp, #4] +; NO-ATOMIC32-NEXT: cmp r1, #0 +; NO-ATOMIC32-NEXT: bne .LBB22_4 +; NO-ATOMIC32-NEXT: .LBB22_2: @ %atomicrmw.start +; NO-ATOMIC32-NEXT: @ =>This Inner Loop Header: Depth=1 +; NO-ATOMIC32-NEXT: add r1, sp, #4 +; NO-ATOMIC32-NEXT: strh r0, [r1] +; NO-ATOMIC32-NEXT: movs r3, #5 +; NO-ATOMIC32-NEXT: str r3, [sp] +; NO-ATOMIC32-NEXT: sxth r2, r4 +; NO-ATOMIC32-NEXT: sxth r6, r0 +; NO-ATOMIC32-NEXT: cmp r6, r2 +; NO-ATOMIC32-NEXT: bgt .LBB22_1 +; NO-ATOMIC32-NEXT: @ %bb.3: @ %atomicrmw.start +; NO-ATOMIC32-NEXT: @ in Loop: Header=BB22_2 Depth=1 +; NO-ATOMIC32-NEXT: mov r0, r4 +; NO-ATOMIC32-NEXT: b .LBB22_1 +; NO-ATOMIC32-NEXT: .LBB22_4: @ %atomicrmw.end +; NO-ATOMIC32-NEXT: add sp, #8 +; NO-ATOMIC32-NEXT: pop {r4, r5, r6, pc} +; +; ATOMIC32-LABEL: trunc_rmw16_signed: +; ATOMIC32: @ %bb.0: +; ATOMIC32-NEXT: .save {r7, lr} +; ATOMIC32-NEXT: push {r7, lr} +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: sxth r1, r1 +; ATOMIC32-NEXT: bl __sync_fetch_and_max_2 +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: pop {r7, pc} + %trunc = trunc i32 %val to i16 + %v = atomicrmw max ptr %p, i16 %trunc seq_cst, align 2 + ret i16 %v +} + +define i16 @trunc_cmpxchg16(ptr %p, i32 %cmp, i32 %new) { +; NO-ATOMIC32-LABEL: trunc_cmpxchg16: +; NO-ATOMIC32: @ %bb.0: +; NO-ATOMIC32-NEXT: .save {r4, lr} +; NO-ATOMIC32-NEXT: push {r4, lr} +; NO-ATOMIC32-NEXT: .pad #8 +; NO-ATOMIC32-NEXT: sub sp, #8 +; NO-ATOMIC32-NEXT: add r4, sp, #4 +; NO-ATOMIC32-NEXT: strh r1, [r4] +; NO-ATOMIC32-NEXT: movs r3, #5 +; NO-ATOMIC32-NEXT: str r3, [sp] +; NO-ATOMIC32-NEXT: uxth r2, r2 +; NO-ATOMIC32-NEXT: mov r1, r4 +; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_2 +; NO-ATOMIC32-NEXT: ldr r0, [sp, #4] +; NO-ATOMIC32-NEXT: add sp, #8 +; NO-ATOMIC32-NEXT: pop {r4, pc} +; +; ATOMIC32-LABEL: trunc_cmpxchg16: +; ATOMIC32: @ %bb.0: +; ATOMIC32-NEXT: .save {r7, lr} +; ATOMIC32-NEXT: push {r7, lr} +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: uxth r1, r1 +; ATOMIC32-NEXT: uxth r2, r2 +; ATOMIC32-NEXT: bl __sync_val_compare_and_swap_2 +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: pop {r7, pc} + %trunc_cmp = trunc i32 %cmp to i16 + %trunc_new = trunc i32 %new to i16 + %res = cmpxchg ptr %p, i16 %trunc_cmp, i16 %trunc_new seq_cst seq_cst, align 2 + %res.0 = extractvalue { i16, i1 } %res, 0 + ret i16 %res.0 +}