diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index cf88c1f4ae937..3b2dafc9e0fa8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4382,6 +4382,43 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero |= APInt::getBitsSetFrom(BitWidth, VT.getScalarSizeInBits()); break; } + case ISD::ATOMIC_LOAD: { + // If we are looking at the loaded value. + if (Op.getResNo() == 0) { + auto *AT = cast(Op); + unsigned ScalarMemorySize = AT->getMemoryVT().getScalarSizeInBits(); + KnownBits KnownScalarMemory(ScalarMemorySize); + if (const MDNode *MD = AT->getRanges()) + computeKnownBitsFromRangeMetadata(*MD, KnownScalarMemory); + + switch (AT->getExtensionType()) { + case ISD::ZEXTLOAD: + Known = KnownScalarMemory.zext(BitWidth); + break; + case ISD::SEXTLOAD: + Known = KnownScalarMemory.sext(BitWidth); + break; + case ISD::EXTLOAD: + switch (TLI->getExtendForAtomicOps()) { + case ISD::ZERO_EXTEND: + Known = KnownScalarMemory.zext(BitWidth); + break; + case ISD::SIGN_EXTEND: + Known = KnownScalarMemory.sext(BitWidth); + break; + default: + Known = KnownScalarMemory.anyext(BitWidth); + break; + } + break; + case ISD::NON_EXTLOAD: + Known = KnownScalarMemory; + break; + } + assert(Known.getBitWidth() == BitWidth); + } + break; + } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: if (Op.getResNo() == 1) { // The boolean result conforms to getBooleanContents. @@ -4407,21 +4444,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::ATOMIC_LOAD_MIN: case ISD::ATOMIC_LOAD_MAX: case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - case ISD::ATOMIC_LOAD: { + case ISD::ATOMIC_LOAD_UMAX: { // If we are looking at the loaded value. if (Op.getResNo() == 0) { auto *AT = cast(Op); unsigned MemBits = AT->getMemoryVT().getScalarSizeInBits(); - // For atomic_load, prefer to use the extension type. - if (Op->getOpcode() == ISD::ATOMIC_LOAD) { - if (AT->getExtensionType() == ISD::ZEXTLOAD) - Known.Zero.setBitsFrom(MemBits); - else if (AT->getExtensionType() != ISD::SEXTLOAD && - TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) - Known.Zero.setBitsFrom(MemBits); - } else if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) + if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) Known.Zero.setBitsFrom(MemBits); } break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 66bd78af2939c..4b05d89417d2c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5150,9 +5150,10 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo); + const MDNode *Ranges = getRangeMetadata(I); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), - I.getAlign(), AAMDNodes(), nullptr, SSID, Order); + I.getAlign(), AAMDNodes(), Ranges, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); diff --git a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll new file mode 100644 index 0000000000000..8097179443791 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll @@ -0,0 +1,1362 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s +; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s + + +; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s +; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s + +; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s + +; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-WMO %s +; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-TSO %s + +; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-WMO %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-TSO %s + +define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i1_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i1_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lb a0, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i1_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i1_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lb a0, 0(a0) +; RV64IA-NEXT: ret + %1 = load atomic i8, ptr %a unordered, align 1, !range !0, !noundef !1 + %2 = trunc nuw i8 %1 to i1 + ret i1 %2 +} + +define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i1_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i1_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lb a0, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i1_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i1_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lb a0, 0(a0) +; RV64IA-NEXT: ret + %1 = load atomic i8, ptr %a monotonic, align 1, !range !0, !noundef !1 + %2 = trunc nuw i8 %1 to i1 + ret i1 %2 +} + +define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i1_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i1_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lb a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i1_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lb a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i1_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i1_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lb a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i1_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lb a0, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire: +; RV32IA-ZALASR-WMO: # %bb.0: +; RV32IA-ZALASR-WMO-NEXT: lb.aq a0, (a0) +; RV32IA-ZALASR-WMO-NEXT: ret +; +; RV32IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire: +; RV32IA-ZALASR-TSO: # %bb.0: +; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0) +; RV32IA-ZALASR-TSO-NEXT: ret +; +; RV64IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire: +; RV64IA-ZALASR-WMO: # %bb.0: +; RV64IA-ZALASR-WMO-NEXT: lb.aq a0, (a0) +; RV64IA-ZALASR-WMO-NEXT: ret +; +; RV64IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire: +; RV64IA-ZALASR-TSO: # %bb.0: +; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0) +; RV64IA-ZALASR-TSO-NEXT: ret + %1 = load atomic i8, ptr %a acquire, align 1, !range !0, !noundef !1 + %2 = trunc nuw i8 %1 to i1 + ret i1 %2 +} + +define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i1_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i1_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, rw +; RV32IA-WMO-NEXT: lb a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i1_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lb a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i1_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i1_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: lb a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i1_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lb a0, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-LABEL: atomic_load_i1_seq_cst: +; RV32IA-ZALASR: # %bb.0: +; RV32IA-ZALASR-NEXT: lb.aq a0, (a0) +; RV32IA-ZALASR-NEXT: ret +; +; RV64IA-ZALASR-LABEL: atomic_load_i1_seq_cst: +; RV64IA-ZALASR: # %bb.0: +; RV64IA-ZALASR-NEXT: lb.aq a0, (a0) +; RV64IA-ZALASR-NEXT: ret + %1 = load atomic i8, ptr %a seq_cst, align 1, !range !0, !noundef !1 + %2 = trunc nuw i8 %1 to i1 + ret i1 %2 +} + +define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i8_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: zext.b a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i8_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lb a0, 0(a0) +; RV32IA-NEXT: zext.b a0, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i8_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: zext.b a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i8_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lb a0, 0(a0) +; RV64IA-NEXT: zext.b a0, a0 +; RV64IA-NEXT: ret + %1 = load atomic i8, ptr %a unordered, align 1 + ret i8 %1 +} + +define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i8_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: zext.b a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i8_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lb a0, 0(a0) +; RV32IA-NEXT: zext.b a0, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i8_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: zext.b a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i8_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lb a0, 0(a0) +; RV64IA-NEXT: zext.b a0, a0 +; RV64IA-NEXT: ret + %1 = load atomic i8, ptr %a monotonic, align 1 + ret i8 %1 +} + +define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i8_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: zext.b a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i8_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lb a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: zext.b a0, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i8_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lb a0, 0(a0) +; RV32IA-TSO-NEXT: zext.b a0, a0 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i8_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: zext.b a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i8_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lb a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: zext.b a0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i8_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lb a0, 0(a0) +; RV64IA-TSO-NEXT: zext.b a0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire: +; RV32IA-ZALASR-WMO: # %bb.0: +; RV32IA-ZALASR-WMO-NEXT: lb.aq a0, (a0) +; RV32IA-ZALASR-WMO-NEXT: zext.b a0, a0 +; RV32IA-ZALASR-WMO-NEXT: ret +; +; RV32IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire: +; RV32IA-ZALASR-TSO: # %bb.0: +; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0) +; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0 +; RV32IA-ZALASR-TSO-NEXT: ret +; +; RV64IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire: +; RV64IA-ZALASR-WMO: # %bb.0: +; RV64IA-ZALASR-WMO-NEXT: lb.aq a0, (a0) +; RV64IA-ZALASR-WMO-NEXT: zext.b a0, a0 +; RV64IA-ZALASR-WMO-NEXT: ret +; +; RV64IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire: +; RV64IA-ZALASR-TSO: # %bb.0: +; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0) +; RV64IA-ZALASR-TSO-NEXT: zext.b a0, a0 +; RV64IA-ZALASR-TSO-NEXT: ret + %1 = load atomic i8, ptr %a acquire, align 1 + ret i8 %1 +} + +define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i8_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: zext.b a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, rw +; RV32IA-WMO-NEXT: lb a0, 0(a0) +; RV32IA-WMO-NEXT: zext.b a0, a0 +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i8_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lb a0, 0(a0) +; RV32IA-TSO-NEXT: zext.b a0, a0 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i8_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: zext.b a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: lb a0, 0(a0) +; RV64IA-WMO-NEXT: zext.b a0, a0 +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i8_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lb a0, 0(a0) +; RV64IA-TSO-NEXT: zext.b a0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-LABEL: atomic_load_i8_seq_cst: +; RV32IA-ZALASR: # %bb.0: +; RV32IA-ZALASR-NEXT: lb.aq a0, (a0) +; RV32IA-ZALASR-NEXT: zext.b a0, a0 +; RV32IA-ZALASR-NEXT: ret +; +; RV64IA-ZALASR-LABEL: atomic_load_i8_seq_cst: +; RV64IA-ZALASR: # %bb.0: +; RV64IA-ZALASR-NEXT: lb.aq a0, (a0) +; RV64IA-ZALASR-NEXT: zext.b a0, a0 +; RV64IA-ZALASR-NEXT: ret + %1 = load atomic i8, ptr %a seq_cst, align 1 + ret i8 %1 +} + +define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i16_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_2 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i16_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lh a0, 0(a0) +; RV32IA-NEXT: slli a0, a0, 16 +; RV32IA-NEXT: srli a0, a0, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i16_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_2 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i16_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lh a0, 0(a0) +; RV64IA-NEXT: slli a0, a0, 48 +; RV64IA-NEXT: srli a0, a0, 48 +; RV64IA-NEXT: ret + %1 = load atomic i16, ptr %a unordered, align 2 + ret i16 %1 +} + +define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i16_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_2 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i16_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lh a0, 0(a0) +; RV32IA-NEXT: slli a0, a0, 16 +; RV32IA-NEXT: srli a0, a0, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i16_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_2 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i16_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lh a0, 0(a0) +; RV64IA-NEXT: slli a0, a0, 48 +; RV64IA-NEXT: srli a0, a0, 48 +; RV64IA-NEXT: ret + %1 = load atomic i16, ptr %a monotonic, align 2 + ret i16 %1 +} + +define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i16_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_2 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i16_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lh a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: slli a0, a0, 16 +; RV32IA-WMO-NEXT: srli a0, a0, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i16_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lh a0, 0(a0) +; RV32IA-TSO-NEXT: slli a0, a0, 16 +; RV32IA-TSO-NEXT: srli a0, a0, 16 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i16_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_2 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i16_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lh a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: slli a0, a0, 48 +; RV64IA-WMO-NEXT: srli a0, a0, 48 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i16_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lh a0, 0(a0) +; RV64IA-TSO-NEXT: slli a0, a0, 48 +; RV64IA-TSO-NEXT: srli a0, a0, 48 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 16 +; RV32IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 16 +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 16 +; RV32IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 16 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 48 +; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 48 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 48 +; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 48 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire: +; RV32IA-ZALASR-WMO: # %bb.0: +; RV32IA-ZALASR-WMO-NEXT: lh.aq a0, (a0) +; RV32IA-ZALASR-WMO-NEXT: slli a0, a0, 16 +; RV32IA-ZALASR-WMO-NEXT: srli a0, a0, 16 +; RV32IA-ZALASR-WMO-NEXT: ret +; +; RV32IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire: +; RV32IA-ZALASR-TSO: # %bb.0: +; RV32IA-ZALASR-TSO-NEXT: lh a0, 0(a0) +; RV32IA-ZALASR-TSO-NEXT: slli a0, a0, 16 +; RV32IA-ZALASR-TSO-NEXT: srli a0, a0, 16 +; RV32IA-ZALASR-TSO-NEXT: ret +; +; RV64IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire: +; RV64IA-ZALASR-WMO: # %bb.0: +; RV64IA-ZALASR-WMO-NEXT: lh.aq a0, (a0) +; RV64IA-ZALASR-WMO-NEXT: slli a0, a0, 48 +; RV64IA-ZALASR-WMO-NEXT: srli a0, a0, 48 +; RV64IA-ZALASR-WMO-NEXT: ret +; +; RV64IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire: +; RV64IA-ZALASR-TSO: # %bb.0: +; RV64IA-ZALASR-TSO-NEXT: lh a0, 0(a0) +; RV64IA-ZALASR-TSO-NEXT: slli a0, a0, 48 +; RV64IA-ZALASR-TSO-NEXT: srli a0, a0, 48 +; RV64IA-ZALASR-TSO-NEXT: ret + %1 = load atomic i16, ptr %a acquire, align 2 + ret i16 %1 +} + +define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i16_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_2 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, rw +; RV32IA-WMO-NEXT: lh a0, 0(a0) +; RV32IA-WMO-NEXT: slli a0, a0, 16 +; RV32IA-WMO-NEXT: srli a0, a0, 16 +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i16_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lh a0, 0(a0) +; RV32IA-TSO-NEXT: slli a0, a0, 16 +; RV32IA-TSO-NEXT: srli a0, a0, 16 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i16_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_2 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: lh a0, 0(a0) +; RV64IA-WMO-NEXT: slli a0, a0, 48 +; RV64IA-WMO-NEXT: srli a0, a0, 48 +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i16_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lh a0, 0(a0) +; RV64IA-TSO-NEXT: slli a0, a0, 48 +; RV64IA-TSO-NEXT: srli a0, a0, 48 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 16 +; RV32IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 16 +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 16 +; RV32IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 16 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 48 +; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 48 +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 48 +; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 48 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-LABEL: atomic_load_i16_seq_cst: +; RV32IA-ZALASR: # %bb.0: +; RV32IA-ZALASR-NEXT: lh.aq a0, (a0) +; RV32IA-ZALASR-NEXT: slli a0, a0, 16 +; RV32IA-ZALASR-NEXT: srli a0, a0, 16 +; RV32IA-ZALASR-NEXT: ret +; +; RV64IA-ZALASR-LABEL: atomic_load_i16_seq_cst: +; RV64IA-ZALASR: # %bb.0: +; RV64IA-ZALASR-NEXT: lh.aq a0, (a0) +; RV64IA-ZALASR-NEXT: slli a0, a0, 48 +; RV64IA-ZALASR-NEXT: srli a0, a0, 48 +; RV64IA-ZALASR-NEXT: ret + %1 = load atomic i16, ptr %a seq_cst, align 2 + ret i16 %1 +} + +define zeroext i32 @atomic_load_i32_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i32_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i32_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i32_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i32_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lw a0, 0(a0) +; RV64IA-NEXT: slli a0, a0, 32 +; RV64IA-NEXT: srli a0, a0, 32 +; RV64IA-NEXT: ret + %1 = load atomic i32, ptr %a unordered, align 4 + ret i32 %1 +} + +define zeroext i32 @atomic_load_i32_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i32_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i32_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i32_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i32_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lw a0, 0(a0) +; RV64IA-NEXT: slli a0, a0, 32 +; RV64IA-NEXT: srli a0, a0, 32 +; RV64IA-NEXT: ret + %1 = load atomic i32, ptr %a monotonic, align 4 + ret i32 %1 +} + +define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i32_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i32_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lw a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i32_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i32_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i32_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lw a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: slli a0, a0, 32 +; RV64IA-WMO-NEXT: srli a0, a0, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i32_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: slli a0, a0, 32 +; RV64IA-TSO-NEXT: srli a0, a0, 32 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 32 +; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 32 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 32 +; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 32 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire: +; RV32IA-ZALASR-WMO: # %bb.0: +; RV32IA-ZALASR-WMO-NEXT: lw.aq a0, (a0) +; RV32IA-ZALASR-WMO-NEXT: ret +; +; RV32IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire: +; RV32IA-ZALASR-TSO: # %bb.0: +; RV32IA-ZALASR-TSO-NEXT: lw a0, 0(a0) +; RV32IA-ZALASR-TSO-NEXT: ret +; +; RV64IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire: +; RV64IA-ZALASR-WMO: # %bb.0: +; RV64IA-ZALASR-WMO-NEXT: lw.aq a0, (a0) +; RV64IA-ZALASR-WMO-NEXT: slli a0, a0, 32 +; RV64IA-ZALASR-WMO-NEXT: srli a0, a0, 32 +; RV64IA-ZALASR-WMO-NEXT: ret +; +; RV64IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire: +; RV64IA-ZALASR-TSO: # %bb.0: +; RV64IA-ZALASR-TSO-NEXT: lw a0, 0(a0) +; RV64IA-ZALASR-TSO-NEXT: slli a0, a0, 32 +; RV64IA-ZALASR-TSO-NEXT: srli a0, a0, 32 +; RV64IA-ZALASR-TSO-NEXT: ret + %1 = load atomic i32, ptr %a acquire, align 4 + ret i32 %1 +} + +define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i32_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i32_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, rw +; RV32IA-WMO-NEXT: lw a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i32_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i32_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: lw a0, 0(a0) +; RV64IA-WMO-NEXT: slli a0, a0, 32 +; RV64IA-WMO-NEXT: srli a0, a0, 32 +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i32_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: slli a0, a0, 32 +; RV64IA-TSO-NEXT: srli a0, a0, 32 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 32 +; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 32 +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 32 +; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 32 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-LABEL: atomic_load_i32_seq_cst: +; RV32IA-ZALASR: # %bb.0: +; RV32IA-ZALASR-NEXT: lw.aq a0, (a0) +; RV32IA-ZALASR-NEXT: ret +; +; RV64IA-ZALASR-LABEL: atomic_load_i32_seq_cst: +; RV64IA-ZALASR: # %bb.0: +; RV64IA-ZALASR-NEXT: lw.aq a0, (a0) +; RV64IA-ZALASR-NEXT: slli a0, a0, 32 +; RV64IA-ZALASR-NEXT: srli a0, a0, 32 +; RV64IA-ZALASR-NEXT: ret + %1 = load atomic i32, ptr %a seq_cst, align 4 + ret i32 %1 +} + +define zeroext i64 @atomic_load_i64_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i64_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i64_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 0 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i64_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i64_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: ld a0, 0(a0) +; RV64IA-NEXT: ret + %1 = load atomic i64, ptr %a unordered, align 8 + ret i64 %1 +} + +define zeroext i64 @atomic_load_i64_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i64_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i64_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 0 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i64_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i64_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: ld a0, 0(a0) +; RV64IA-NEXT: ret + %1 = load atomic i64, ptr %a monotonic, align 8 + ret i64 %1 +} + +define zeroext i64 @atomic_load_i64_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i64_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i64_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 2 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i64_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i64_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: ld a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i64_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i64_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i64_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-ZALASR-WMO-LABEL: atomic_load_i64_acquire: +; RV64IA-ZALASR-WMO: # %bb.0: +; RV64IA-ZALASR-WMO-NEXT: ld.aq a0, (a0) +; RV64IA-ZALASR-WMO-NEXT: ret +; +; RV64IA-ZALASR-TSO-LABEL: atomic_load_i64_acquire: +; RV64IA-ZALASR-TSO: # %bb.0: +; RV64IA-ZALASR-TSO-NEXT: ld a0, 0(a0) +; RV64IA-ZALASR-TSO-NEXT: ret + %1 = load atomic i64, ptr %a acquire, align 8 + ret i64 %1 +} + +define zeroext i64 @atomic_load_i64_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i64_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i64_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 5 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i64_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i64_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: ld a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i64_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i64_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i64_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-ZALASR-LABEL: atomic_load_i64_seq_cst: +; RV64IA-ZALASR: # %bb.0: +; RV64IA-ZALASR-NEXT: ld.aq a0, (a0) +; RV64IA-ZALASR-NEXT: ret + %1 = load atomic i64, ptr %a seq_cst, align 8 + ret i64 %1 +} + +!0 = !{i8 0, i8 2} +!1 = !{}