From 9678b8ea25c5d64291995d58fc7f4c0448d9065b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20G=C3=B3rski?= Date: Sat, 19 Apr 2025 23:28:02 +0200 Subject: [PATCH 1/6] Added pre-commit test. --- llvm/test/CodeGen/RISCV/atomic-load-zext.ll | 1386 +++++++++++++++++++ 1 file changed, 1386 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/atomic-load-zext.ll diff --git a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll new file mode 100644 index 0000000000000..ca76a9ba58614 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll @@ -0,0 +1,1386 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s +; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+no-trailing-seq-cst-fence -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s + + +; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s +; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s + +; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s + +; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-WMO %s +; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-TSO %s + +; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-WMO %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-TSO %s + +define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i1_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i1_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lb a0, 0(a0) +; RV32IA-NEXT: zext.b a0, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i1_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i1_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lb a0, 0(a0) +; RV64IA-NEXT: zext.b a0, a0 +; RV64IA-NEXT: ret + %1 = load atomic i8, ptr %a unordered, align 1 + %2 = trunc nuw i8 %1 to i1 + ret i1 %2 +} + +define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i1_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i1_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lb a0, 0(a0) +; RV32IA-NEXT: zext.b a0, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i1_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i1_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lb a0, 0(a0) +; RV64IA-NEXT: zext.b a0, a0 +; RV64IA-NEXT: ret + %1 = load atomic i8, ptr %a monotonic, align 1 + %2 = trunc nuw i8 %1 to i1 + ret i1 %2 +} + +define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i1_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i1_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lb a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: zext.b a0, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i1_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lb a0, 0(a0) +; RV32IA-TSO-NEXT: zext.b a0, a0 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i1_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i1_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lb a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: zext.b a0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i1_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lb a0, 0(a0) +; RV64IA-TSO-NEXT: zext.b a0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire: +; RV32IA-ZALASR-WMO: # %bb.0: +; RV32IA-ZALASR-WMO-NEXT: lb.aq a0, (a0) +; RV32IA-ZALASR-WMO-NEXT: zext.b a0, a0 +; RV32IA-ZALASR-WMO-NEXT: ret +; +; RV32IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire: +; RV32IA-ZALASR-TSO: # %bb.0: +; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0) +; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0 +; RV32IA-ZALASR-TSO-NEXT: ret +; +; RV64IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire: +; RV64IA-ZALASR-WMO: # %bb.0: +; RV64IA-ZALASR-WMO-NEXT: lb.aq a0, (a0) +; RV64IA-ZALASR-WMO-NEXT: zext.b a0, a0 +; RV64IA-ZALASR-WMO-NEXT: ret +; +; RV64IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire: +; RV64IA-ZALASR-TSO: # %bb.0: +; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0) +; RV64IA-ZALASR-TSO-NEXT: zext.b a0, a0 +; RV64IA-ZALASR-TSO-NEXT: ret + %1 = load atomic i8, ptr %a acquire, align 1 + %2 = trunc nuw i8 %1 to i1 + ret i1 %2 +} + +define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i1_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i1_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, rw +; RV32IA-WMO-NEXT: lb a0, 0(a0) +; RV32IA-WMO-NEXT: zext.b a0, a0 +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i1_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lb a0, 0(a0) +; RV32IA-TSO-NEXT: zext.b a0, a0 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i1_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i1_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: lb a0, 0(a0) +; RV64IA-WMO-NEXT: zext.b a0, a0 +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i1_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lb a0, 0(a0) +; RV64IA-TSO-NEXT: zext.b a0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-LABEL: atomic_load_i1_seq_cst: +; RV32IA-ZALASR: # %bb.0: +; RV32IA-ZALASR-NEXT: lb.aq a0, (a0) +; RV32IA-ZALASR-NEXT: zext.b a0, a0 +; RV32IA-ZALASR-NEXT: ret +; +; RV64IA-ZALASR-LABEL: atomic_load_i1_seq_cst: +; RV64IA-ZALASR: # %bb.0: +; RV64IA-ZALASR-NEXT: lb.aq a0, (a0) +; RV64IA-ZALASR-NEXT: zext.b a0, a0 +; RV64IA-ZALASR-NEXT: ret + %1 = load atomic i8, ptr %a seq_cst, align 1 + %2 = trunc nuw i8 %1 to i1 + ret i1 %2 +} + +define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i8_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: zext.b a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i8_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lb a0, 0(a0) +; RV32IA-NEXT: zext.b a0, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i8_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: zext.b a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i8_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lb a0, 0(a0) +; RV64IA-NEXT: zext.b a0, a0 +; RV64IA-NEXT: ret + %1 = load atomic i8, ptr %a unordered, align 1 + ret i8 %1 +} + +define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i8_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: zext.b a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i8_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lb a0, 0(a0) +; RV32IA-NEXT: zext.b a0, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i8_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: zext.b a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i8_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lb a0, 0(a0) +; RV64IA-NEXT: zext.b a0, a0 +; RV64IA-NEXT: ret + %1 = load atomic i8, ptr %a monotonic, align 1 + ret i8 %1 +} + +define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i8_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: zext.b a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i8_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lb a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: zext.b a0, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i8_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lb a0, 0(a0) +; RV32IA-TSO-NEXT: zext.b a0, a0 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i8_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: zext.b a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i8_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lb a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: zext.b a0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i8_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lb a0, 0(a0) +; RV64IA-TSO-NEXT: zext.b a0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire: +; RV32IA-ZALASR-WMO: # %bb.0: +; RV32IA-ZALASR-WMO-NEXT: lb.aq a0, (a0) +; RV32IA-ZALASR-WMO-NEXT: zext.b a0, a0 +; RV32IA-ZALASR-WMO-NEXT: ret +; +; RV32IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire: +; RV32IA-ZALASR-TSO: # %bb.0: +; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0) +; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0 +; RV32IA-ZALASR-TSO-NEXT: ret +; +; RV64IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire: +; RV64IA-ZALASR-WMO: # %bb.0: +; RV64IA-ZALASR-WMO-NEXT: lb.aq a0, (a0) +; RV64IA-ZALASR-WMO-NEXT: zext.b a0, a0 +; RV64IA-ZALASR-WMO-NEXT: ret +; +; RV64IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire: +; RV64IA-ZALASR-TSO: # %bb.0: +; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0) +; RV64IA-ZALASR-TSO-NEXT: zext.b a0, a0 +; RV64IA-ZALASR-TSO-NEXT: ret + %1 = load atomic i8, ptr %a acquire, align 1 + ret i8 %1 +} + +define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i8_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_1 +; RV32I-NEXT: zext.b a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, rw +; RV32IA-WMO-NEXT: lb a0, 0(a0) +; RV32IA-WMO-NEXT: zext.b a0, a0 +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i8_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lb a0, 0(a0) +; RV32IA-TSO-NEXT: zext.b a0, a0 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i8_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_1 +; RV64I-NEXT: zext.b a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: lb a0, 0(a0) +; RV64IA-WMO-NEXT: zext.b a0, a0 +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i8_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lb a0, 0(a0) +; RV64IA-TSO-NEXT: zext.b a0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-LABEL: atomic_load_i8_seq_cst: +; RV32IA-ZALASR: # %bb.0: +; RV32IA-ZALASR-NEXT: lb.aq a0, (a0) +; RV32IA-ZALASR-NEXT: zext.b a0, a0 +; RV32IA-ZALASR-NEXT: ret +; +; RV64IA-ZALASR-LABEL: atomic_load_i8_seq_cst: +; RV64IA-ZALASR: # %bb.0: +; RV64IA-ZALASR-NEXT: lb.aq a0, (a0) +; RV64IA-ZALASR-NEXT: zext.b a0, a0 +; RV64IA-ZALASR-NEXT: ret + %1 = load atomic i8, ptr %a seq_cst, align 1 + ret i8 %1 +} + +define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i16_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_2 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i16_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lh a0, 0(a0) +; RV32IA-NEXT: slli a0, a0, 16 +; RV32IA-NEXT: srli a0, a0, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i16_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_2 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i16_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lh a0, 0(a0) +; RV64IA-NEXT: slli a0, a0, 48 +; RV64IA-NEXT: srli a0, a0, 48 +; RV64IA-NEXT: ret + %1 = load atomic i16, ptr %a unordered, align 2 + ret i16 %1 +} + +define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i16_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_2 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i16_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lh a0, 0(a0) +; RV32IA-NEXT: slli a0, a0, 16 +; RV32IA-NEXT: srli a0, a0, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i16_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_2 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i16_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lh a0, 0(a0) +; RV64IA-NEXT: slli a0, a0, 48 +; RV64IA-NEXT: srli a0, a0, 48 +; RV64IA-NEXT: ret + %1 = load atomic i16, ptr %a monotonic, align 2 + ret i16 %1 +} + +define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i16_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_2 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i16_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lh a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: slli a0, a0, 16 +; RV32IA-WMO-NEXT: srli a0, a0, 16 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i16_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lh a0, 0(a0) +; RV32IA-TSO-NEXT: slli a0, a0, 16 +; RV32IA-TSO-NEXT: srli a0, a0, 16 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i16_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_2 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i16_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lh a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: slli a0, a0, 48 +; RV64IA-WMO-NEXT: srli a0, a0, 48 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i16_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lh a0, 0(a0) +; RV64IA-TSO-NEXT: slli a0, a0, 48 +; RV64IA-TSO-NEXT: srli a0, a0, 48 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 16 +; RV32IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 16 +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 16 +; RV32IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 16 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 48 +; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 48 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 48 +; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 48 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire: +; RV32IA-ZALASR-WMO: # %bb.0: +; RV32IA-ZALASR-WMO-NEXT: lh.aq a0, (a0) +; RV32IA-ZALASR-WMO-NEXT: slli a0, a0, 16 +; RV32IA-ZALASR-WMO-NEXT: srli a0, a0, 16 +; RV32IA-ZALASR-WMO-NEXT: ret +; +; RV32IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire: +; RV32IA-ZALASR-TSO: # %bb.0: +; RV32IA-ZALASR-TSO-NEXT: lh a0, 0(a0) +; RV32IA-ZALASR-TSO-NEXT: slli a0, a0, 16 +; RV32IA-ZALASR-TSO-NEXT: srli a0, a0, 16 +; RV32IA-ZALASR-TSO-NEXT: ret +; +; RV64IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire: +; RV64IA-ZALASR-WMO: # %bb.0: +; RV64IA-ZALASR-WMO-NEXT: lh.aq a0, (a0) +; RV64IA-ZALASR-WMO-NEXT: slli a0, a0, 48 +; RV64IA-ZALASR-WMO-NEXT: srli a0, a0, 48 +; RV64IA-ZALASR-WMO-NEXT: ret +; +; RV64IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire: +; RV64IA-ZALASR-TSO: # %bb.0: +; RV64IA-ZALASR-TSO-NEXT: lh a0, 0(a0) +; RV64IA-ZALASR-TSO-NEXT: slli a0, a0, 48 +; RV64IA-ZALASR-TSO-NEXT: srli a0, a0, 48 +; RV64IA-ZALASR-TSO-NEXT: ret + %1 = load atomic i16, ptr %a acquire, align 2 + ret i16 %1 +} + +define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i16_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_2 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, rw +; RV32IA-WMO-NEXT: lh a0, 0(a0) +; RV32IA-WMO-NEXT: slli a0, a0, 16 +; RV32IA-WMO-NEXT: srli a0, a0, 16 +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i16_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lh a0, 0(a0) +; RV32IA-TSO-NEXT: slli a0, a0, 16 +; RV32IA-TSO-NEXT: srli a0, a0, 16 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i16_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_2 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: lh a0, 0(a0) +; RV64IA-WMO-NEXT: slli a0, a0, 48 +; RV64IA-WMO-NEXT: srli a0, a0, 48 +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i16_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lh a0, 0(a0) +; RV64IA-TSO-NEXT: slli a0, a0, 48 +; RV64IA-TSO-NEXT: srli a0, a0, 48 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 16 +; RV32IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 16 +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 16 +; RV32IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 16 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 48 +; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 48 +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 48 +; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 48 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-LABEL: atomic_load_i16_seq_cst: +; RV32IA-ZALASR: # %bb.0: +; RV32IA-ZALASR-NEXT: lh.aq a0, (a0) +; RV32IA-ZALASR-NEXT: slli a0, a0, 16 +; RV32IA-ZALASR-NEXT: srli a0, a0, 16 +; RV32IA-ZALASR-NEXT: ret +; +; RV64IA-ZALASR-LABEL: atomic_load_i16_seq_cst: +; RV64IA-ZALASR: # %bb.0: +; RV64IA-ZALASR-NEXT: lh.aq a0, (a0) +; RV64IA-ZALASR-NEXT: slli a0, a0, 48 +; RV64IA-ZALASR-NEXT: srli a0, a0, 48 +; RV64IA-ZALASR-NEXT: ret + %1 = load atomic i16, ptr %a seq_cst, align 2 + ret i16 %1 +} + +define zeroext i32 @atomic_load_i32_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i32_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i32_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i32_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i32_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lw a0, 0(a0) +; RV64IA-NEXT: slli a0, a0, 32 +; RV64IA-NEXT: srli a0, a0, 32 +; RV64IA-NEXT: ret + %1 = load atomic i32, ptr %a unordered, align 4 + ret i32 %1 +} + +define zeroext i32 @atomic_load_i32_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i32_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i32_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i32_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i32_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lw a0, 0(a0) +; RV64IA-NEXT: slli a0, a0, 32 +; RV64IA-NEXT: srli a0, a0, 32 +; RV64IA-NEXT: ret + %1 = load atomic i32, ptr %a monotonic, align 4 + ret i32 %1 +} + +define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i32_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i32_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lw a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i32_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i32_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i32_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lw a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: slli a0, a0, 32 +; RV64IA-WMO-NEXT: srli a0, a0, 32 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i32_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: slli a0, a0, 32 +; RV64IA-TSO-NEXT: srli a0, a0, 32 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 32 +; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 32 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 32 +; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 32 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire: +; RV32IA-ZALASR-WMO: # %bb.0: +; RV32IA-ZALASR-WMO-NEXT: lw.aq a0, (a0) +; RV32IA-ZALASR-WMO-NEXT: ret +; +; RV32IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire: +; RV32IA-ZALASR-TSO: # %bb.0: +; RV32IA-ZALASR-TSO-NEXT: lw a0, 0(a0) +; RV32IA-ZALASR-TSO-NEXT: ret +; +; RV64IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire: +; RV64IA-ZALASR-WMO: # %bb.0: +; RV64IA-ZALASR-WMO-NEXT: lw.aq a0, (a0) +; RV64IA-ZALASR-WMO-NEXT: slli a0, a0, 32 +; RV64IA-ZALASR-WMO-NEXT: srli a0, a0, 32 +; RV64IA-ZALASR-WMO-NEXT: ret +; +; RV64IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire: +; RV64IA-ZALASR-TSO: # %bb.0: +; RV64IA-ZALASR-TSO-NEXT: lw a0, 0(a0) +; RV64IA-ZALASR-TSO-NEXT: slli a0, a0, 32 +; RV64IA-ZALASR-TSO-NEXT: srli a0, a0, 32 +; RV64IA-ZALASR-TSO-NEXT: ret + %1 = load atomic i32, ptr %a acquire, align 4 + ret i32 %1 +} + +define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i32_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_i32_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, rw +; RV32IA-WMO-NEXT: lw a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_i32_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_i32_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: lw a0, 0(a0) +; RV64IA-WMO-NEXT: slli a0, a0, 32 +; RV64IA-WMO-NEXT: srli a0, a0, 32 +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i32_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: slli a0, a0, 32 +; RV64IA-TSO-NEXT: srli a0, a0, 32 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 32 +; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 32 +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 32 +; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 32 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-LABEL: atomic_load_i32_seq_cst: +; RV32IA-ZALASR: # %bb.0: +; RV32IA-ZALASR-NEXT: lw.aq a0, (a0) +; RV32IA-ZALASR-NEXT: ret +; +; RV64IA-ZALASR-LABEL: atomic_load_i32_seq_cst: +; RV64IA-ZALASR: # %bb.0: +; RV64IA-ZALASR-NEXT: lw.aq a0, (a0) +; RV64IA-ZALASR-NEXT: slli a0, a0, 32 +; RV64IA-ZALASR-NEXT: srli a0, a0, 32 +; RV64IA-ZALASR-NEXT: ret + %1 = load atomic i32, ptr %a seq_cst, align 4 + ret i32 %1 +} + +define zeroext i64 @atomic_load_i64_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i64_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i64_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 0 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i64_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i64_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: ld a0, 0(a0) +; RV64IA-NEXT: ret + %1 = load atomic i64, ptr %a unordered, align 8 + ret i64 %1 +} + +define zeroext i64 @atomic_load_i64_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i64_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i64_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 0 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i64_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_i64_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: ld a0, 0(a0) +; RV64IA-NEXT: ret + %1 = load atomic i64, ptr %a monotonic, align 8 + ret i64 %1 +} + +define zeroext i64 @atomic_load_i64_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i64_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i64_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 2 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i64_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i64_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: ld a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i64_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i64_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i64_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-ZALASR-WMO-LABEL: atomic_load_i64_acquire: +; RV64IA-ZALASR-WMO: # %bb.0: +; RV64IA-ZALASR-WMO-NEXT: ld.aq a0, (a0) +; RV64IA-ZALASR-WMO-NEXT: ret +; +; RV64IA-ZALASR-TSO-LABEL: atomic_load_i64_acquire: +; RV64IA-ZALASR-TSO: # %bb.0: +; RV64IA-ZALASR-TSO-NEXT: ld a0, 0(a0) +; RV64IA-ZALASR-TSO-NEXT: ret + %1 = load atomic i64, ptr %a acquire, align 8 + ret i64 %1 +} + +define zeroext i64 @atomic_load_i64_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_i64_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_i64_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 5 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_i64_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_i64_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: ld a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_i64_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i64_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i64_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-ZALASR-LABEL: atomic_load_i64_seq_cst: +; RV64IA-ZALASR: # %bb.0: +; RV64IA-ZALASR-NEXT: ld.aq a0, (a0) +; RV64IA-ZALASR-NEXT: ret + %1 = load atomic i64, ptr %a seq_cst, align 8 + ret i64 %1 +} + From 22bf5ceb077c47a870b76958ef3855b91e22671c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20G=C3=B3rski?= Date: Sun, 20 Apr 2025 01:43:24 +0200 Subject: [PATCH 2/6] [RISCV] Optimized `and` with `atomic_load` into `zextload` when safe. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 44 ++++ llvm/test/CodeGen/RISCV/atomic-load-zext.ll | 264 ++++++-------------- 2 files changed, 121 insertions(+), 187 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f7d192756fd56..3af3fea7eb161 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15148,6 +15148,48 @@ static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); } +static SDValue reduceANDOfAtomicLoad(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + if (N->getOpcode() != ISD::AND) + return SDValue(); + + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() != ISD::ATOMIC_LOAD) + return SDValue(); + + AtomicSDNode *ALoad = cast(N0.getNode()); + if (isStrongerThanMonotonic(ALoad->getSuccessOrdering())) + return SDValue(); + + EVT LoadedVT = ALoad->getMemoryVT(); + EVT ResultVT = N->getValueType(0); + + SDValue MaskVal = N->getOperand(1); + ConstantSDNode *MaskConst = dyn_cast(MaskVal); + if (!MaskConst) + return SDValue(); + uint64_t Mask = MaskConst->getZExtValue(); + uint64_t ExpectedMask = LoadedVT.getSizeInBits() == 8 ? 0xFF + : LoadedVT.getSizeInBits() == 16 ? 0xFFFF + : LoadedVT.getSizeInBits() == 32 ? 0xFFFFFFFF + : 0xFFFFFFFFFFFFFFFF; + if (Mask != ExpectedMask) + return SDValue(); + + SDLoc DL(N); + SDValue Chain = ALoad->getChain(); + SDValue Ptr = ALoad->getBasePtr(); + MachineMemOperand *MemOp = ALoad->getMemOperand(); + SDValue ZextLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, ResultVT, Chain, Ptr, + MemOp->getPointerInfo(), LoadedVT, + MemOp->getAlign(), MemOp->getFlags()); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), ZextLoad); + DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1)); + DCI.recursivelyDeleteUnusedNodes(N0.getNode()); + return SDValue(N, 0); +} + // Combines two comparison operation and logic operation to one selection // operation(min, max) and logic operation. Returns new constructed Node if // conditions for optimization are satisfied. @@ -15182,6 +15224,8 @@ static SDValue performANDCombine(SDNode *N, return V; if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) return V; + if (SDValue V = reduceANDOfAtomicLoad(N, DCI)) + return V; if (DCI.isAfterLegalizeDAG()) if (SDValue V = combineDeMorganOfBoolean(N, DAG)) diff --git a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll index ca76a9ba58614..1fcf5f085646d 100644 --- a/llvm/test/CodeGen/RISCV/atomic-load-zext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-load-zext.ll @@ -47,8 +47,7 @@ define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind { ; ; RV32IA-LABEL: atomic_load_i1_unordered: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lb a0, 0(a0) -; RV32IA-NEXT: zext.b a0, a0 +; RV32IA-NEXT: lbu a0, 0(a0) ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomic_load_i1_unordered: @@ -64,8 +63,7 @@ define zeroext i1 @atomic_load_i1_unordered(ptr %a) nounwind { ; ; RV64IA-LABEL: atomic_load_i1_unordered: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lb a0, 0(a0) -; RV64IA-NEXT: zext.b a0, a0 +; RV64IA-NEXT: lbu a0, 0(a0) ; RV64IA-NEXT: ret %1 = load atomic i8, ptr %a unordered, align 1 %2 = trunc nuw i8 %1 to i1 @@ -86,8 +84,7 @@ define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind { ; ; RV32IA-LABEL: atomic_load_i1_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lb a0, 0(a0) -; RV32IA-NEXT: zext.b a0, a0 +; RV32IA-NEXT: lbu a0, 0(a0) ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomic_load_i1_monotonic: @@ -103,8 +100,7 @@ define zeroext i1 @atomic_load_i1_monotonic(ptr %a) nounwind { ; ; RV64IA-LABEL: atomic_load_i1_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lb a0, 0(a0) -; RV64IA-NEXT: zext.b a0, a0 +; RV64IA-NEXT: lbu a0, 0(a0) ; RV64IA-NEXT: ret %1 = load atomic i8, ptr %a monotonic, align 1 %2 = trunc nuw i8 %1 to i1 @@ -125,15 +121,13 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind { ; ; RV32IA-WMO-LABEL: atomic_load_i1_acquire: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: lb a0, 0(a0) +; RV32IA-WMO-NEXT: lbu a0, 0(a0) ; RV32IA-WMO-NEXT: fence r, rw -; RV32IA-WMO-NEXT: zext.b a0, a0 ; RV32IA-WMO-NEXT: ret ; ; RV32IA-TSO-LABEL: atomic_load_i1_acquire: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: lb a0, 0(a0) -; RV32IA-TSO-NEXT: zext.b a0, a0 +; RV32IA-TSO-NEXT: lbu a0, 0(a0) ; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomic_load_i1_acquire: @@ -149,41 +143,35 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind { ; ; RV64IA-WMO-LABEL: atomic_load_i1_acquire: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: lb a0, 0(a0) +; RV64IA-WMO-NEXT: lbu a0, 0(a0) ; RV64IA-WMO-NEXT: fence r, rw -; RV64IA-WMO-NEXT: zext.b a0, a0 ; RV64IA-WMO-NEXT: ret ; ; RV64IA-TSO-LABEL: atomic_load_i1_acquire: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: lb a0, 0(a0) -; RV64IA-TSO-NEXT: zext.b a0, a0 +; RV64IA-TSO-NEXT: lbu a0, 0(a0) ; RV64IA-TSO-NEXT: ret ; ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire: ; RV32IA-WMO-TRAILING-FENCE: # %bb.0: -; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw -; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 ; RV32IA-WMO-TRAILING-FENCE-NEXT: ret ; ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire: ; RV32IA-TSO-TRAILING-FENCE: # %bb.0: -; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) -; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV32IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire: ; RV64IA-WMO-TRAILING-FENCE: # %bb.0: -; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw -; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 ; RV64IA-WMO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_acquire: ; RV64IA-TSO-TRAILING-FENCE: # %bb.0: -; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) -; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV32IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire: @@ -194,8 +182,7 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind { ; ; RV32IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire: ; RV32IA-ZALASR-TSO: # %bb.0: -; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0) -; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0 +; RV32IA-ZALASR-TSO-NEXT: lbu a0, 0(a0) ; RV32IA-ZALASR-TSO-NEXT: ret ; ; RV64IA-ZALASR-WMO-LABEL: atomic_load_i1_acquire: @@ -206,8 +193,7 @@ define zeroext i1 @atomic_load_i1_acquire(ptr %a) nounwind { ; ; RV64IA-ZALASR-TSO-LABEL: atomic_load_i1_acquire: ; RV64IA-ZALASR-TSO: # %bb.0: -; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0) -; RV64IA-ZALASR-TSO-NEXT: zext.b a0, a0 +; RV64IA-ZALASR-TSO-NEXT: lbu a0, 0(a0) ; RV64IA-ZALASR-TSO-NEXT: ret %1 = load atomic i8, ptr %a acquire, align 1 %2 = trunc nuw i8 %1 to i1 @@ -229,16 +215,14 @@ define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind { ; RV32IA-WMO-LABEL: atomic_load_i1_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, rw -; RV32IA-WMO-NEXT: lb a0, 0(a0) -; RV32IA-WMO-NEXT: zext.b a0, a0 +; RV32IA-WMO-NEXT: lbu a0, 0(a0) ; RV32IA-WMO-NEXT: fence r, rw ; RV32IA-WMO-NEXT: ret ; ; RV32IA-TSO-LABEL: atomic_load_i1_seq_cst: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: fence rw, rw -; RV32IA-TSO-NEXT: lb a0, 0(a0) -; RV32IA-TSO-NEXT: zext.b a0, a0 +; RV32IA-TSO-NEXT: lbu a0, 0(a0) ; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomic_load_i1_seq_cst: @@ -255,46 +239,40 @@ define zeroext i1 @atomic_load_i1_seq_cst(ptr %a) nounwind { ; RV64IA-WMO-LABEL: atomic_load_i1_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, rw -; RV64IA-WMO-NEXT: lb a0, 0(a0) -; RV64IA-WMO-NEXT: zext.b a0, a0 +; RV64IA-WMO-NEXT: lbu a0, 0(a0) ; RV64IA-WMO-NEXT: fence r, rw ; RV64IA-WMO-NEXT: ret ; ; RV64IA-TSO-LABEL: atomic_load_i1_seq_cst: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: fence rw, rw -; RV64IA-TSO-NEXT: lb a0, 0(a0) -; RV64IA-TSO-NEXT: zext.b a0, a0 +; RV64IA-TSO-NEXT: lbu a0, 0(a0) ; RV64IA-TSO-NEXT: ret ; ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst: ; RV32IA-WMO-TRAILING-FENCE: # %bb.0: ; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw -; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) -; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw ; RV32IA-WMO-TRAILING-FENCE-NEXT: ret ; ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst: ; RV32IA-TSO-TRAILING-FENCE: # %bb.0: ; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw -; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) -; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV32IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst: ; RV64IA-WMO-TRAILING-FENCE: # %bb.0: ; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw -; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) -; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw ; RV64IA-WMO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i1_seq_cst: ; RV64IA-TSO-TRAILING-FENCE: # %bb.0: ; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw -; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) -; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV32IA-ZALASR-LABEL: atomic_load_i1_seq_cst: @@ -327,8 +305,7 @@ define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind { ; ; RV32IA-LABEL: atomic_load_i8_unordered: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lb a0, 0(a0) -; RV32IA-NEXT: zext.b a0, a0 +; RV32IA-NEXT: lbu a0, 0(a0) ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomic_load_i8_unordered: @@ -344,8 +321,7 @@ define zeroext i8 @atomic_load_i8_unordered(ptr %a) nounwind { ; ; RV64IA-LABEL: atomic_load_i8_unordered: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lb a0, 0(a0) -; RV64IA-NEXT: zext.b a0, a0 +; RV64IA-NEXT: lbu a0, 0(a0) ; RV64IA-NEXT: ret %1 = load atomic i8, ptr %a unordered, align 1 ret i8 %1 @@ -365,8 +341,7 @@ define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind { ; ; RV32IA-LABEL: atomic_load_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lb a0, 0(a0) -; RV32IA-NEXT: zext.b a0, a0 +; RV32IA-NEXT: lbu a0, 0(a0) ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomic_load_i8_monotonic: @@ -382,8 +357,7 @@ define zeroext i8 @atomic_load_i8_monotonic(ptr %a) nounwind { ; ; RV64IA-LABEL: atomic_load_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lb a0, 0(a0) -; RV64IA-NEXT: zext.b a0, a0 +; RV64IA-NEXT: lbu a0, 0(a0) ; RV64IA-NEXT: ret %1 = load atomic i8, ptr %a monotonic, align 1 ret i8 %1 @@ -403,15 +377,13 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind { ; ; RV32IA-WMO-LABEL: atomic_load_i8_acquire: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: lb a0, 0(a0) +; RV32IA-WMO-NEXT: lbu a0, 0(a0) ; RV32IA-WMO-NEXT: fence r, rw -; RV32IA-WMO-NEXT: zext.b a0, a0 ; RV32IA-WMO-NEXT: ret ; ; RV32IA-TSO-LABEL: atomic_load_i8_acquire: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: lb a0, 0(a0) -; RV32IA-TSO-NEXT: zext.b a0, a0 +; RV32IA-TSO-NEXT: lbu a0, 0(a0) ; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomic_load_i8_acquire: @@ -427,41 +399,35 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind { ; ; RV64IA-WMO-LABEL: atomic_load_i8_acquire: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: lb a0, 0(a0) +; RV64IA-WMO-NEXT: lbu a0, 0(a0) ; RV64IA-WMO-NEXT: fence r, rw -; RV64IA-WMO-NEXT: zext.b a0, a0 ; RV64IA-WMO-NEXT: ret ; ; RV64IA-TSO-LABEL: atomic_load_i8_acquire: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: lb a0, 0(a0) -; RV64IA-TSO-NEXT: zext.b a0, a0 +; RV64IA-TSO-NEXT: lbu a0, 0(a0) ; RV64IA-TSO-NEXT: ret ; ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: ; RV32IA-WMO-TRAILING-FENCE: # %bb.0: -; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw -; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 ; RV32IA-WMO-TRAILING-FENCE-NEXT: ret ; ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: ; RV32IA-TSO-TRAILING-FENCE: # %bb.0: -; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) -; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV32IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: ; RV64IA-WMO-TRAILING-FENCE: # %bb.0: -; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw -; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 ; RV64IA-WMO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_acquire: ; RV64IA-TSO-TRAILING-FENCE: # %bb.0: -; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) -; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV32IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire: @@ -472,8 +438,7 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind { ; ; RV32IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire: ; RV32IA-ZALASR-TSO: # %bb.0: -; RV32IA-ZALASR-TSO-NEXT: lb a0, 0(a0) -; RV32IA-ZALASR-TSO-NEXT: zext.b a0, a0 +; RV32IA-ZALASR-TSO-NEXT: lbu a0, 0(a0) ; RV32IA-ZALASR-TSO-NEXT: ret ; ; RV64IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire: @@ -484,8 +449,7 @@ define zeroext i8 @atomic_load_i8_acquire(ptr %a) nounwind { ; ; RV64IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire: ; RV64IA-ZALASR-TSO: # %bb.0: -; RV64IA-ZALASR-TSO-NEXT: lb a0, 0(a0) -; RV64IA-ZALASR-TSO-NEXT: zext.b a0, a0 +; RV64IA-ZALASR-TSO-NEXT: lbu a0, 0(a0) ; RV64IA-ZALASR-TSO-NEXT: ret %1 = load atomic i8, ptr %a acquire, align 1 ret i8 %1 @@ -506,16 +470,14 @@ define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind { ; RV32IA-WMO-LABEL: atomic_load_i8_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, rw -; RV32IA-WMO-NEXT: lb a0, 0(a0) -; RV32IA-WMO-NEXT: zext.b a0, a0 +; RV32IA-WMO-NEXT: lbu a0, 0(a0) ; RV32IA-WMO-NEXT: fence r, rw ; RV32IA-WMO-NEXT: ret ; ; RV32IA-TSO-LABEL: atomic_load_i8_seq_cst: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: fence rw, rw -; RV32IA-TSO-NEXT: lb a0, 0(a0) -; RV32IA-TSO-NEXT: zext.b a0, a0 +; RV32IA-TSO-NEXT: lbu a0, 0(a0) ; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomic_load_i8_seq_cst: @@ -532,46 +494,40 @@ define zeroext i8 @atomic_load_i8_seq_cst(ptr %a) nounwind { ; RV64IA-WMO-LABEL: atomic_load_i8_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, rw -; RV64IA-WMO-NEXT: lb a0, 0(a0) -; RV64IA-WMO-NEXT: zext.b a0, a0 +; RV64IA-WMO-NEXT: lbu a0, 0(a0) ; RV64IA-WMO-NEXT: fence r, rw ; RV64IA-WMO-NEXT: ret ; ; RV64IA-TSO-LABEL: atomic_load_i8_seq_cst: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: fence rw, rw -; RV64IA-TSO-NEXT: lb a0, 0(a0) -; RV64IA-TSO-NEXT: zext.b a0, a0 +; RV64IA-TSO-NEXT: lbu a0, 0(a0) ; RV64IA-TSO-NEXT: ret ; ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: ; RV32IA-WMO-TRAILING-FENCE: # %bb.0: ; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw -; RV32IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) -; RV32IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw ; RV32IA-WMO-TRAILING-FENCE-NEXT: ret ; ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: ; RV32IA-TSO-TRAILING-FENCE: # %bb.0: ; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw -; RV32IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) -; RV32IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV32IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: ; RV64IA-WMO-TRAILING-FENCE: # %bb.0: ; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw -; RV64IA-WMO-TRAILING-FENCE-NEXT: lb a0, 0(a0) -; RV64IA-WMO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw ; RV64IA-WMO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i8_seq_cst: ; RV64IA-TSO-TRAILING-FENCE: # %bb.0: ; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw -; RV64IA-TSO-TRAILING-FENCE-NEXT: lb a0, 0(a0) -; RV64IA-TSO-TRAILING-FENCE-NEXT: zext.b a0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: lbu a0, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV32IA-ZALASR-LABEL: atomic_load_i8_seq_cst: @@ -604,9 +560,7 @@ define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind { ; ; RV32IA-LABEL: atomic_load_i16_unordered: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lh a0, 0(a0) -; RV32IA-NEXT: slli a0, a0, 16 -; RV32IA-NEXT: srli a0, a0, 16 +; RV32IA-NEXT: lhu a0, 0(a0) ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomic_load_i16_unordered: @@ -623,9 +577,7 @@ define zeroext i16 @atomic_load_i16_unordered(ptr %a) nounwind { ; ; RV64IA-LABEL: atomic_load_i16_unordered: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lh a0, 0(a0) -; RV64IA-NEXT: slli a0, a0, 48 -; RV64IA-NEXT: srli a0, a0, 48 +; RV64IA-NEXT: lhu a0, 0(a0) ; RV64IA-NEXT: ret %1 = load atomic i16, ptr %a unordered, align 2 ret i16 %1 @@ -646,9 +598,7 @@ define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind { ; ; RV32IA-LABEL: atomic_load_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: lh a0, 0(a0) -; RV32IA-NEXT: slli a0, a0, 16 -; RV32IA-NEXT: srli a0, a0, 16 +; RV32IA-NEXT: lhu a0, 0(a0) ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomic_load_i16_monotonic: @@ -665,9 +615,7 @@ define zeroext i16 @atomic_load_i16_monotonic(ptr %a) nounwind { ; ; RV64IA-LABEL: atomic_load_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lh a0, 0(a0) -; RV64IA-NEXT: slli a0, a0, 48 -; RV64IA-NEXT: srli a0, a0, 48 +; RV64IA-NEXT: lhu a0, 0(a0) ; RV64IA-NEXT: ret %1 = load atomic i16, ptr %a monotonic, align 2 ret i16 %1 @@ -688,17 +636,13 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind { ; ; RV32IA-WMO-LABEL: atomic_load_i16_acquire: ; RV32IA-WMO: # %bb.0: -; RV32IA-WMO-NEXT: lh a0, 0(a0) +; RV32IA-WMO-NEXT: lhu a0, 0(a0) ; RV32IA-WMO-NEXT: fence r, rw -; RV32IA-WMO-NEXT: slli a0, a0, 16 -; RV32IA-WMO-NEXT: srli a0, a0, 16 ; RV32IA-WMO-NEXT: ret ; ; RV32IA-TSO-LABEL: atomic_load_i16_acquire: ; RV32IA-TSO: # %bb.0: -; RV32IA-TSO-NEXT: lh a0, 0(a0) -; RV32IA-TSO-NEXT: slli a0, a0, 16 -; RV32IA-TSO-NEXT: srli a0, a0, 16 +; RV32IA-TSO-NEXT: lhu a0, 0(a0) ; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomic_load_i16_acquire: @@ -715,47 +659,35 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind { ; ; RV64IA-WMO-LABEL: atomic_load_i16_acquire: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: lh a0, 0(a0) +; RV64IA-WMO-NEXT: lhu a0, 0(a0) ; RV64IA-WMO-NEXT: fence r, rw -; RV64IA-WMO-NEXT: slli a0, a0, 48 -; RV64IA-WMO-NEXT: srli a0, a0, 48 ; RV64IA-WMO-NEXT: ret ; ; RV64IA-TSO-LABEL: atomic_load_i16_acquire: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: lh a0, 0(a0) -; RV64IA-TSO-NEXT: slli a0, a0, 48 -; RV64IA-TSO-NEXT: srli a0, a0, 48 +; RV64IA-TSO-NEXT: lhu a0, 0(a0) ; RV64IA-TSO-NEXT: ret ; ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: ; RV32IA-WMO-TRAILING-FENCE: # %bb.0: -; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: lhu a0, 0(a0) ; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw -; RV32IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 16 -; RV32IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 16 ; RV32IA-WMO-TRAILING-FENCE-NEXT: ret ; ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: ; RV32IA-TSO-TRAILING-FENCE: # %bb.0: -; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) -; RV32IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 16 -; RV32IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 16 +; RV32IA-TSO-TRAILING-FENCE-NEXT: lhu a0, 0(a0) ; RV32IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: ; RV64IA-WMO-TRAILING-FENCE: # %bb.0: -; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: lhu a0, 0(a0) ; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw -; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 48 -; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 48 ; RV64IA-WMO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_acquire: ; RV64IA-TSO-TRAILING-FENCE: # %bb.0: -; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) -; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 48 -; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 48 +; RV64IA-TSO-TRAILING-FENCE-NEXT: lhu a0, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV32IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire: @@ -767,9 +699,7 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind { ; ; RV32IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire: ; RV32IA-ZALASR-TSO: # %bb.0: -; RV32IA-ZALASR-TSO-NEXT: lh a0, 0(a0) -; RV32IA-ZALASR-TSO-NEXT: slli a0, a0, 16 -; RV32IA-ZALASR-TSO-NEXT: srli a0, a0, 16 +; RV32IA-ZALASR-TSO-NEXT: lhu a0, 0(a0) ; RV32IA-ZALASR-TSO-NEXT: ret ; ; RV64IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire: @@ -781,9 +711,7 @@ define zeroext i16 @atomic_load_i16_acquire(ptr %a) nounwind { ; ; RV64IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire: ; RV64IA-ZALASR-TSO: # %bb.0: -; RV64IA-ZALASR-TSO-NEXT: lh a0, 0(a0) -; RV64IA-ZALASR-TSO-NEXT: slli a0, a0, 48 -; RV64IA-ZALASR-TSO-NEXT: srli a0, a0, 48 +; RV64IA-ZALASR-TSO-NEXT: lhu a0, 0(a0) ; RV64IA-ZALASR-TSO-NEXT: ret %1 = load atomic i16, ptr %a acquire, align 2 ret i16 %1 @@ -805,18 +733,14 @@ define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind { ; RV32IA-WMO-LABEL: atomic_load_i16_seq_cst: ; RV32IA-WMO: # %bb.0: ; RV32IA-WMO-NEXT: fence rw, rw -; RV32IA-WMO-NEXT: lh a0, 0(a0) -; RV32IA-WMO-NEXT: slli a0, a0, 16 -; RV32IA-WMO-NEXT: srli a0, a0, 16 +; RV32IA-WMO-NEXT: lhu a0, 0(a0) ; RV32IA-WMO-NEXT: fence r, rw ; RV32IA-WMO-NEXT: ret ; ; RV32IA-TSO-LABEL: atomic_load_i16_seq_cst: ; RV32IA-TSO: # %bb.0: ; RV32IA-TSO-NEXT: fence rw, rw -; RV32IA-TSO-NEXT: lh a0, 0(a0) -; RV32IA-TSO-NEXT: slli a0, a0, 16 -; RV32IA-TSO-NEXT: srli a0, a0, 16 +; RV32IA-TSO-NEXT: lhu a0, 0(a0) ; RV32IA-TSO-NEXT: ret ; ; RV64I-LABEL: atomic_load_i16_seq_cst: @@ -834,52 +758,40 @@ define zeroext i16 @atomic_load_i16_seq_cst(ptr %a) nounwind { ; RV64IA-WMO-LABEL: atomic_load_i16_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, rw -; RV64IA-WMO-NEXT: lh a0, 0(a0) -; RV64IA-WMO-NEXT: slli a0, a0, 48 -; RV64IA-WMO-NEXT: srli a0, a0, 48 +; RV64IA-WMO-NEXT: lhu a0, 0(a0) ; RV64IA-WMO-NEXT: fence r, rw ; RV64IA-WMO-NEXT: ret ; ; RV64IA-TSO-LABEL: atomic_load_i16_seq_cst: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: fence rw, rw -; RV64IA-TSO-NEXT: lh a0, 0(a0) -; RV64IA-TSO-NEXT: slli a0, a0, 48 -; RV64IA-TSO-NEXT: srli a0, a0, 48 +; RV64IA-TSO-NEXT: lhu a0, 0(a0) ; RV64IA-TSO-NEXT: ret ; ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: ; RV32IA-WMO-TRAILING-FENCE: # %bb.0: ; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw -; RV32IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) -; RV32IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 16 -; RV32IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 16 +; RV32IA-WMO-TRAILING-FENCE-NEXT: lhu a0, 0(a0) ; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw ; RV32IA-WMO-TRAILING-FENCE-NEXT: ret ; ; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: ; RV32IA-TSO-TRAILING-FENCE: # %bb.0: ; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw -; RV32IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) -; RV32IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 16 -; RV32IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 16 +; RV32IA-TSO-TRAILING-FENCE-NEXT: lhu a0, 0(a0) ; RV32IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: ; RV64IA-WMO-TRAILING-FENCE: # %bb.0: ; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw -; RV64IA-WMO-TRAILING-FENCE-NEXT: lh a0, 0(a0) -; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 48 -; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 48 +; RV64IA-WMO-TRAILING-FENCE-NEXT: lhu a0, 0(a0) ; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw ; RV64IA-WMO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i16_seq_cst: ; RV64IA-TSO-TRAILING-FENCE: # %bb.0: ; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw -; RV64IA-TSO-TRAILING-FENCE-NEXT: lh a0, 0(a0) -; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 48 -; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 48 +; RV64IA-TSO-TRAILING-FENCE-NEXT: lhu a0, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV32IA-ZALASR-LABEL: atomic_load_i16_seq_cst: @@ -929,9 +841,7 @@ define zeroext i32 @atomic_load_i32_unordered(ptr %a) nounwind { ; ; RV64IA-LABEL: atomic_load_i32_unordered: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lw a0, 0(a0) -; RV64IA-NEXT: slli a0, a0, 32 -; RV64IA-NEXT: srli a0, a0, 32 +; RV64IA-NEXT: lwu a0, 0(a0) ; RV64IA-NEXT: ret %1 = load atomic i32, ptr %a unordered, align 4 ret i32 %1 @@ -967,9 +877,7 @@ define zeroext i32 @atomic_load_i32_monotonic(ptr %a) nounwind { ; ; RV64IA-LABEL: atomic_load_i32_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: lw a0, 0(a0) -; RV64IA-NEXT: slli a0, a0, 32 -; RV64IA-NEXT: srli a0, a0, 32 +; RV64IA-NEXT: lwu a0, 0(a0) ; RV64IA-NEXT: ret %1 = load atomic i32, ptr %a monotonic, align 4 ret i32 %1 @@ -1011,17 +919,13 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind { ; ; RV64IA-WMO-LABEL: atomic_load_i32_acquire: ; RV64IA-WMO: # %bb.0: -; RV64IA-WMO-NEXT: lw a0, 0(a0) +; RV64IA-WMO-NEXT: lwu a0, 0(a0) ; RV64IA-WMO-NEXT: fence r, rw -; RV64IA-WMO-NEXT: slli a0, a0, 32 -; RV64IA-WMO-NEXT: srli a0, a0, 32 ; RV64IA-WMO-NEXT: ret ; ; RV64IA-TSO-LABEL: atomic_load_i32_acquire: ; RV64IA-TSO: # %bb.0: -; RV64IA-TSO-NEXT: lw a0, 0(a0) -; RV64IA-TSO-NEXT: slli a0, a0, 32 -; RV64IA-TSO-NEXT: srli a0, a0, 32 +; RV64IA-TSO-NEXT: lwu a0, 0(a0) ; RV64IA-TSO-NEXT: ret ; ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: @@ -1037,17 +941,13 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind { ; ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: ; RV64IA-WMO-TRAILING-FENCE: # %bb.0: -; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: lwu a0, 0(a0) ; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw -; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 32 -; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 32 ; RV64IA-WMO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_acquire: ; RV64IA-TSO-TRAILING-FENCE: # %bb.0: -; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) -; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 32 -; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 32 +; RV64IA-TSO-TRAILING-FENCE-NEXT: lwu a0, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV32IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire: @@ -1069,9 +969,7 @@ define zeroext i32 @atomic_load_i32_acquire(ptr %a) nounwind { ; ; RV64IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire: ; RV64IA-ZALASR-TSO: # %bb.0: -; RV64IA-ZALASR-TSO-NEXT: lw a0, 0(a0) -; RV64IA-ZALASR-TSO-NEXT: slli a0, a0, 32 -; RV64IA-ZALASR-TSO-NEXT: srli a0, a0, 32 +; RV64IA-ZALASR-TSO-NEXT: lwu a0, 0(a0) ; RV64IA-ZALASR-TSO-NEXT: ret %1 = load atomic i32, ptr %a acquire, align 4 ret i32 %1 @@ -1116,18 +1014,14 @@ define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind { ; RV64IA-WMO-LABEL: atomic_load_i32_seq_cst: ; RV64IA-WMO: # %bb.0: ; RV64IA-WMO-NEXT: fence rw, rw -; RV64IA-WMO-NEXT: lw a0, 0(a0) -; RV64IA-WMO-NEXT: slli a0, a0, 32 -; RV64IA-WMO-NEXT: srli a0, a0, 32 +; RV64IA-WMO-NEXT: lwu a0, 0(a0) ; RV64IA-WMO-NEXT: fence r, rw ; RV64IA-WMO-NEXT: ret ; ; RV64IA-TSO-LABEL: atomic_load_i32_seq_cst: ; RV64IA-TSO: # %bb.0: ; RV64IA-TSO-NEXT: fence rw, rw -; RV64IA-TSO-NEXT: lw a0, 0(a0) -; RV64IA-TSO-NEXT: slli a0, a0, 32 -; RV64IA-TSO-NEXT: srli a0, a0, 32 +; RV64IA-TSO-NEXT: lwu a0, 0(a0) ; RV64IA-TSO-NEXT: ret ; ; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: @@ -1146,18 +1040,14 @@ define zeroext i32 @atomic_load_i32_seq_cst(ptr %a) nounwind { ; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: ; RV64IA-WMO-TRAILING-FENCE: # %bb.0: ; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw -; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) -; RV64IA-WMO-TRAILING-FENCE-NEXT: slli a0, a0, 32 -; RV64IA-WMO-TRAILING-FENCE-NEXT: srli a0, a0, 32 +; RV64IA-WMO-TRAILING-FENCE-NEXT: lwu a0, 0(a0) ; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw ; RV64IA-WMO-TRAILING-FENCE-NEXT: ret ; ; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_i32_seq_cst: ; RV64IA-TSO-TRAILING-FENCE: # %bb.0: ; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw -; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) -; RV64IA-TSO-TRAILING-FENCE-NEXT: slli a0, a0, 32 -; RV64IA-TSO-TRAILING-FENCE-NEXT: srli a0, a0, 32 +; RV64IA-TSO-TRAILING-FENCE-NEXT: lwu a0, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret ; ; RV32IA-ZALASR-LABEL: atomic_load_i32_seq_cst: From 14f212efec8758c7414cb075783650bab8c521fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20G=C3=B3rski?= Date: Thu, 24 Apr 2025 15:07:14 +0200 Subject: [PATCH 3/6] fixup! [RISCV] Optimized `and` with `atomic_load` into `zextload` when safe. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 614b5631b3bd5..742beaaf5644b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15283,6 +15283,8 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N, SDValue N0 = N->getOperand(0); if (N0.getOpcode() != ISD::ATOMIC_LOAD) return SDValue(); + if (!N0.hasOneUse()) + return SDValue(); AtomicSDNode *ALoad = cast(N0.getNode()); if (isStrongerThanMonotonic(ALoad->getSuccessOrdering())) @@ -15310,7 +15312,7 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N, SDValue ZextLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, ResultVT, Chain, Ptr, MemOp->getPointerInfo(), LoadedVT, MemOp->getAlign(), MemOp->getFlags()); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), ZextLoad); + DCI.CombineTo(N, ZextLoad); DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1)); DCI.recursivelyDeleteUnusedNodes(N0.getNode()); return SDValue(N, 0); From ce6f04f9163fce4dde40d96bba22d742573f2bd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20G=C3=B3rski?= Date: Thu, 24 Apr 2025 22:07:46 +0200 Subject: [PATCH 4/6] fixup! [RISCV] Optimized `and` with `atomic_load` into `zextload` when safe. used maskTrailingOnes --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 2f154990085a3..a6e165545367d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15299,11 +15299,7 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N, EVT LoadedVT = ALoad->getMemoryVT(); EVT ResultVT = N->getValueType(0); - SDValue MaskVal = N->getOperand(1); - ConstantSDNode *MaskConst = dyn_cast(MaskVal); - if (!MaskConst) - return SDValue(); - uint64_t Mask = MaskConst->getZExtValue(); + uint64_t Mask = maskTrailingOnes(LoadedVT.getSizeInBits()); uint64_t ExpectedMask = LoadedVT.getSizeInBits() == 8 ? 0xFF : LoadedVT.getSizeInBits() == 16 ? 0xFFFF : LoadedVT.getSizeInBits() == 32 ? 0xFFFFFFFF From 78ecf8aaf1c3b842f4e908475431aa668a82f655 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20G=C3=B3rski?= Date: Fri, 25 Apr 2025 14:57:25 +0200 Subject: [PATCH 5/6] fixup! [RISCV] Optimized `and` with `atomic_load` into `zextload` when safe. Added pattern match for atomic_load_zext_n with replacement of getLoad with getAtomicLoad. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 12 +++--------- llvm/lib/Target/RISCV/RISCVInstrInfoA.td | 3 +++ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index a6e165545367d..6ad6fa8255844 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15297,8 +15297,6 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N, return SDValue(); EVT LoadedVT = ALoad->getMemoryVT(); - EVT ResultVT = N->getValueType(0); - uint64_t Mask = maskTrailingOnes(LoadedVT.getSizeInBits()); uint64_t ExpectedMask = LoadedVT.getSizeInBits() == 8 ? 0xFF : LoadedVT.getSizeInBits() == 16 ? 0xFFFF @@ -15307,13 +15305,9 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N, if (Mask != ExpectedMask) return SDValue(); - SDLoc DL(N); - SDValue Chain = ALoad->getChain(); - SDValue Ptr = ALoad->getBasePtr(); - MachineMemOperand *MemOp = ALoad->getMemOperand(); - SDValue ZextLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, ResultVT, Chain, Ptr, - MemOp->getPointerInfo(), LoadedVT, - MemOp->getAlign(), MemOp->getFlags()); + SDValue ZextLoad = DAG.getAtomicLoad( + ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0), + ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand()); DCI.CombineTo(N, ZextLoad); DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1)); DCI.recursivelyDeleteUnusedNodes(N0.getNode()); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index b348e774d50b8..fc649847e078c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -167,6 +167,8 @@ class seq_cst_store let Predicates = [HasAtomicLdSt] in { def : LdPat, LB>; def : LdPat, LH>; + def : LdPat, LBU>; + def : LdPat, LHU>; def : StPat, SB, GPR, XLenVT>; def : StPat, SH, GPR, XLenVT>; @@ -179,6 +181,7 @@ let Predicates = [HasAtomicLdSt, IsRV32] in { let Predicates = [HasAtomicLdSt, IsRV64] in { def : LdPat, LW>; + def : LdPat, LWU>; def : LdPat, LD, i64>; def : StPat, SD, GPR, i64>; } From 769d14fba67d5965212f647721ca543b48df53a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20G=C3=B3rski?= Date: Fri, 25 Apr 2025 19:01:03 +0200 Subject: [PATCH 6/6] fixup! fixup! [RISCV] Optimized `and` with `atomic_load` into `zextload` when safe. corrected wrong usage of maskTrailingOnes --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6ad6fa8255844..86938e3e32ca9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15297,11 +15297,11 @@ static SDValue reduceANDOfAtomicLoad(SDNode *N, return SDValue(); EVT LoadedVT = ALoad->getMemoryVT(); - uint64_t Mask = maskTrailingOnes(LoadedVT.getSizeInBits()); - uint64_t ExpectedMask = LoadedVT.getSizeInBits() == 8 ? 0xFF - : LoadedVT.getSizeInBits() == 16 ? 0xFFFF - : LoadedVT.getSizeInBits() == 32 ? 0xFFFFFFFF - : 0xFFFFFFFFFFFFFFFF; + ConstantSDNode *MaskConst = dyn_cast(N->getOperand(1)); + if (!MaskConst) + return SDValue(); + uint64_t Mask = MaskConst->getZExtValue(); + uint64_t ExpectedMask = maskTrailingOnes(LoadedVT.getSizeInBits()); if (Mask != ExpectedMask) return SDValue();