-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AArch64][CodeGen] Add patterns for +lsfe atomic stores #131174
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
When FEAT_LSFE is enabled, the ST{B}FADD, ST{B}FMAX and ST{B}FMIN
atomic instructions are available. This adds patterns to match an
atomicrmw fadd, fmin or fmax to these instructions when the result
is unused.
|
@llvm/pr-subscribers-backend-aarch64 Author: Kerry McLaughlin (kmclaughlin-arm) ChangesWhen FEAT_LSFE is enabled, the ST{B}FADD, ST{B}FMAX and ST{B}FMIN Patch is 34.39 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131174.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 2d7a9d6f00bd0..aaf8e1bfc8f0f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -548,13 +548,13 @@ defm atomic_load_fmin : binary_atomic_op_fp<atomic_load_fmin>;
defm atomic_load_fmax : binary_atomic_op_fp<atomic_load_fmax>;
let Predicates = [HasLSFE] in {
- defm : LDFPOPregister_patterns<"LDFADD", "atomic_load_fadd">;
- defm : LDFPOPregister_patterns<"LDFMAXNM", "atomic_load_fmax">;
- defm : LDFPOPregister_patterns<"LDFMINNM", "atomic_load_fmin">;
+ defm : LDFPOPregister_patterns<"FADD", "atomic_load_fadd">;
+ defm : LDFPOPregister_patterns<"FMAXNM", "atomic_load_fmax">;
+ defm : LDFPOPregister_patterns<"FMINNM", "atomic_load_fmin">;
- defm : LDBFPOPregister_patterns<"LDBFADD", "atomic_load_fadd">;
- defm : LDBFPOPregister_patterns<"LDBFMAXNM", "atomic_load_fmax">;
- defm : LDBFPOPregister_patterns<"LDBFMINNM", "atomic_load_fmin">;
+ defm : LDBFPOPregister_patterns<"BFADD", "atomic_load_fadd">;
+ defm : LDBFPOPregister_patterns<"BFMAXNM", "atomic_load_fmax">;
+ defm : LDBFPOPregister_patterns<"BFMINNM", "atomic_load_fmin">;
}
// v8.9a/v9.4a FEAT_LRCPC patterns
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 255cd0ec5840c..99bd85a448d83 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -12493,19 +12493,39 @@ multiclass LDOPregister_patterns_mod<string inst, string op, string mod> {
(i32 (!cast<Instruction>(mod#Wrr) WZR, GPR32:$Rm))>;
}
+class AtomicStoreMonotonicFrag<SDNode atomic_op> : PatFrag<
+ (ops node:$ptr, node:$value),
+ (atomic_op node:$ptr, node:$value),
+ [{ return SDValue(N, 0).use_empty() &&
+ cast<AtomicSDNode>(N)->getMergedOrdering() == AtomicOrdering::Monotonic; }]>;
+
+class AtomicStoreReleaseFrag<SDNode atomic_op> : PatFrag<
+ (ops node:$ptr, node:$value),
+ (atomic_op node:$ptr, node:$value),
+ [{ return SDValue(N, 0).use_empty() &&
+ cast<AtomicSDNode>(N)->getMergedOrdering() == AtomicOrdering::Release; }]>;
+
let Predicates = [HasLSFE] in
multiclass LDFPOPregister_patterns_ord_dag<string inst, string suffix, string op,
- ValueType vt, dag data> {
+ ValueType vt, dag data> {
def : Pat<(!cast<PatFrag>(op#"_"#vt#"_monotonic") FPR64:$Rn, data),
- (!cast<Instruction>(inst # suffix) data, FPR64:$Rn)>;
+ (!cast<Instruction>("LD" # inst # suffix) data, FPR64:$Rn)>;
def : Pat<(!cast<PatFrag>(op#"_"#vt#"_acquire") FPR64:$Rn, data),
- (!cast<Instruction>(inst # "A" # suffix) data, FPR64:$Rn)>;
+ (!cast<Instruction>("LD" # inst # "A" # suffix) data, FPR64:$Rn)>;
def : Pat<(!cast<PatFrag>(op#"_"#vt#"_release") FPR64:$Rn, data),
- (!cast<Instruction>(inst # "L" # suffix) data, FPR64:$Rn)>;
+ (!cast<Instruction>("LD" # inst # "L" # suffix) data, FPR64:$Rn)>;
def : Pat<(!cast<PatFrag>(op#"_"#vt#"_acq_rel") FPR64:$Rn, data),
- (!cast<Instruction>(inst # "AL" # suffix) data, FPR64:$Rn)>;
+ (!cast<Instruction>("LD" # inst # "AL" # suffix) data, FPR64:$Rn)>;
def : Pat<(!cast<PatFrag>(op#"_"#vt#"_seq_cst") FPR64:$Rn, data),
- (!cast<Instruction>(inst # "AL" # suffix) data, FPR64:$Rn)>;
+ (!cast<Instruction>("LD" # inst # "AL" # suffix) data, FPR64:$Rn)>;
+
+ let AddedComplexity = 5 in {
+ def : Pat<(AtomicStoreMonotonicFrag<!cast<SDNode>(op)> FPR64:$Rn, data),
+ (!cast<Instruction>("ST" # inst # suffix) data, FPR64:$Rn)>;
+
+ def : Pat<(AtomicStoreReleaseFrag<!cast<SDNode>(op)> FPR64:$Rn, data),
+ (!cast<Instruction>("ST" # inst # "L" # suffix) data, FPR64:$Rn)>;
+ }
}
multiclass LDFPOPregister_patterns_ord<string inst, string suffix, string op,
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
index fc9a126f79a83..bd9b814ac533e 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
@@ -10,6 +10,13 @@ define dso_local half @atomicrmw_fadd_half_aligned_monotonic(ptr %ptr, half %val
ret half %r
}
+define dso_local void @atomicrmw_fadd_half_aligned_monotonic_to_store(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_monotonic_to_store:
+; CHECK: stfadd h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, half %value monotonic, align 2
+ ret void
+}
+
define dso_local half @atomicrmw_fadd_half_aligned_acquire(ptr %ptr, half %value) {
; CHECK-LABEL: atomicrmw_fadd_half_aligned_acquire:
; CHECK: ldfadda h0, h0, [x0]
@@ -24,6 +31,13 @@ define dso_local half @atomicrmw_fadd_half_aligned_release(ptr %ptr, half %value
ret half %r
}
+define dso_local void @atomicrmw_fadd_half_aligned_release_to_store(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_release_to_store:
+; CHECK: stfaddl h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, half %value release, align 2
+ ret void
+}
+
define dso_local half @atomicrmw_fadd_half_aligned_acq_rel(ptr %ptr, half %value) {
; CHECK-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
; CHECK: ldfaddal h0, h0, [x0]
@@ -45,6 +59,13 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_monotonic(ptr %ptr, bfloa
ret bfloat %r
}
+define dso_local void @atomicrmw_fadd_bfloat_aligned_monotonic_to_store(ptr %ptr, bfloat %value) {
+; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_monotonic_to_store:
+; CHECK: stbfadd h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, bfloat %value monotonic, align 2
+ ret void
+}
+
define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_acquire(ptr %ptr, bfloat %value) {
; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_acquire:
; CHECK: ldbfadda h0, h0, [x0]
@@ -59,6 +80,13 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_release(ptr %ptr, bfloat
ret bfloat %r
}
+define dso_local void @atomicrmw_fadd_bfloat_aligned_release_to_store(ptr %ptr, bfloat %value) {
+; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_release_to_store:
+; CHECK: stbfaddl h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, bfloat %value release, align 2
+ ret void
+}
+
define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_acq_rel(ptr %ptr, bfloat %value) {
; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_acq_rel:
; CHECK: ldbfaddal h0, h0, [x0]
@@ -80,6 +108,13 @@ define dso_local float @atomicrmw_fadd_float_aligned_monotonic(ptr %ptr, float %
ret float %r
}
+define dso_local void @atomicrmw_fadd_float_aligned_monotonic_to_store(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_monotonic_to_store:
+; CHECK: stfadd s0, [x0]
+ %r = atomicrmw fadd ptr %ptr, float %value monotonic, align 4
+ ret void
+}
+
define dso_local float @atomicrmw_fadd_float_aligned_acquire(ptr %ptr, float %value) {
; CHECK-LABEL: atomicrmw_fadd_float_aligned_acquire:
; CHECK: ldfadda s0, s0, [x0]
@@ -94,6 +129,13 @@ define dso_local float @atomicrmw_fadd_float_aligned_release(ptr %ptr, float %va
ret float %r
}
+define dso_local void @atomicrmw_fadd_float_aligned_release_to_store(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fadd_float_aligned_release_to_store:
+; CHECK: stfaddl s0, [x0]
+ %r = atomicrmw fadd ptr %ptr, float %value release, align 4
+ ret void
+}
+
define dso_local float @atomicrmw_fadd_float_aligned_acq_rel(ptr %ptr, float %value) {
; CHECK-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
; CHECK: ldfaddal s0, s0, [x0]
@@ -115,6 +157,13 @@ define dso_local double @atomicrmw_fadd_double_aligned_monotonic(ptr %ptr, doubl
ret double %r
}
+define dso_local void @atomicrmw_fadd_double_aligned_monotonic_to_store(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_monotonic_to_store:
+; CHECK: stfadd d0, [x0]
+ %r = atomicrmw fadd ptr %ptr, double %value monotonic, align 8
+ ret void
+}
+
define dso_local double @atomicrmw_fadd_double_aligned_acquire(ptr %ptr, double %value) {
; CHECK-LABEL: atomicrmw_fadd_double_aligned_acquire:
; CHECK: ldfadda d0, d0, [x0]
@@ -129,6 +178,13 @@ define dso_local double @atomicrmw_fadd_double_aligned_release(ptr %ptr, double
ret double %r
}
+define dso_local void @atomicrmw_fadd_double_aligned_release_to_store(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fadd_double_aligned_release_to_store:
+; CHECK: stfaddl d0, [x0]
+ %r = atomicrmw fadd ptr %ptr, double %value release, align 8
+ ret void
+}
+
define dso_local double @atomicrmw_fadd_double_aligned_acq_rel(ptr %ptr, double %value) {
; CHECK-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
; CHECK: ldfaddal d0, d0, [x0]
@@ -805,6 +861,13 @@ define dso_local half @atomicrmw_fmax_half_aligned_monotonic(ptr %ptr, half %val
ret half %r
}
+define dso_local void @atomicrmw_fmax_half_aligned_monotonic_to_store(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_monotonic_to_store:
+; CHECK: stfmaxnm h0, [x0]
+ %r = atomicrmw fmax ptr %ptr, half %value monotonic, align 2
+ ret void
+}
+
define dso_local half @atomicrmw_fmax_half_aligned_acquire(ptr %ptr, half %value) {
; CHECK-LABEL: atomicrmw_fmax_half_aligned_acquire:
; CHECK: ldfmaxnma h0, h0, [x0]
@@ -819,6 +882,13 @@ define dso_local half @atomicrmw_fmax_half_aligned_release(ptr %ptr, half %value
ret half %r
}
+define dso_local void @atomicrmw_fmax_half_aligned_release_to_store(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmax_half_aligned_release_to_store:
+; CHECK: stfmaxnml h0, [x0]
+ %r = atomicrmw fmax ptr %ptr, half %value release, align 2
+ ret void
+}
+
define dso_local half @atomicrmw_fmax_half_aligned_acq_rel(ptr %ptr, half %value) {
; CHECK-LABEL: atomicrmw_fmax_half_aligned_acq_rel:
; CHECK: ldfmaxnmal h0, h0, [x0]
@@ -840,6 +910,13 @@ define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_monotonic(ptr %ptr, bfloa
ret bfloat %r
}
+define dso_local void @atomicrmw_fmax_bfloat_aligned_monotonic_to_store(ptr %ptr, bfloat %value) {
+; CHECK-LABEL: atomicrmw_fmax_bfloat_aligned_monotonic_to_store:
+; CHECK: stbfmaxnm h0, [x0]
+ %r = atomicrmw fmax ptr %ptr, bfloat %value monotonic, align 2
+ ret void
+}
+
define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_acquire(ptr %ptr, bfloat %value) {
; CHECK-LABEL: atomicrmw_fmax_bfloat_aligned_acquire:
; CHECK: ldbfmaxnma h0, h0, [x0]
@@ -854,6 +931,13 @@ define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_release(ptr %ptr, bfloat
ret bfloat %r
}
+define dso_local void @atomicrmw_fmax_bfloat_aligned_release_to_store(ptr %ptr, bfloat %value) {
+; CHECK-LABEL: atomicrmw_fmax_bfloat_aligned_release_to_store:
+; CHECK: stbfmaxnml h0, [x0]
+ %r = atomicrmw fmax ptr %ptr, bfloat %value release, align 2
+ ret void
+}
+
define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_acq_rel(ptr %ptr, bfloat %value) {
; CHECK-LABEL: atomicrmw_fmax_bfloat_aligned_acq_rel:
; CHECK: ldbfmaxnmal h0, h0, [x0]
@@ -875,6 +959,13 @@ define dso_local float @atomicrmw_fmax_float_aligned_monotonic(ptr %ptr, float %
ret float %r
}
+define dso_local void @atomicrmw_fmax_float_aligned_monotonic_to_store(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_monotonic_to_store:
+; CHECK: stfmaxnm s0, [x0]
+ %r = atomicrmw fmax ptr %ptr, float %value monotonic, align 4
+ ret void
+}
+
define dso_local float @atomicrmw_fmax_float_aligned_acquire(ptr %ptr, float %value) {
; CHECK-LABEL: atomicrmw_fmax_float_aligned_acquire:
; CHECK: ldfmaxnma s0, s0, [x0]
@@ -889,6 +980,13 @@ define dso_local float @atomicrmw_fmax_float_aligned_release(ptr %ptr, float %va
ret float %r
}
+define dso_local void @atomicrmw_fmax_float_aligned_release_to_store(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmax_float_aligned_release_to_store:
+; CHECK: stfmaxnml s0, [x0]
+ %r = atomicrmw fmax ptr %ptr, float %value release, align 4
+ ret void
+}
+
define dso_local float @atomicrmw_fmax_float_aligned_acq_rel(ptr %ptr, float %value) {
; CHECK-LABEL: atomicrmw_fmax_float_aligned_acq_rel:
; CHECK: ldfmaxnmal s0, s0, [x0]
@@ -910,6 +1008,13 @@ define dso_local double @atomicrmw_fmax_double_aligned_monotonic(ptr %ptr, doubl
ret double %r
}
+define dso_local void @atomicrmw_fmax_double_aligned_monotonic_to_store(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_monotonic_to_store:
+; CHECK: stfmaxnm d0, [x0]
+ %r = atomicrmw fmax ptr %ptr, double %value monotonic, align 8
+ ret void
+}
+
define dso_local double @atomicrmw_fmax_double_aligned_acquire(ptr %ptr, double %value) {
; CHECK-LABEL: atomicrmw_fmax_double_aligned_acquire:
; CHECK: ldfmaxnma d0, d0, [x0]
@@ -924,6 +1029,13 @@ define dso_local double @atomicrmw_fmax_double_aligned_release(ptr %ptr, double
ret double %r
}
+define dso_local void @atomicrmw_fmax_double_aligned_release_to_store(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmax_double_aligned_release_to_store:
+; CHECK: stfmaxnml d0, [x0]
+ %r = atomicrmw fmax ptr %ptr, double %value release, align 8
+ ret void
+}
+
define dso_local double @atomicrmw_fmax_double_aligned_acq_rel(ptr %ptr, double %value) {
; CHECK-LABEL: atomicrmw_fmax_double_aligned_acq_rel:
; CHECK: ldfmaxnmal d0, d0, [x0]
@@ -1120,6 +1232,13 @@ define dso_local half @atomicrmw_fmin_half_aligned_monotonic(ptr %ptr, half %val
ret half %r
}
+define dso_local void @atomicrmw_fmin_half_aligned_monotonic_to_store(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_monotonic_to_store:
+; CHECK: stfminnm h0, [x0]
+ %r = atomicrmw fmin ptr %ptr, half %value monotonic, align 2
+ ret void
+}
+
define dso_local half @atomicrmw_fmin_half_aligned_acquire(ptr %ptr, half %value) {
; CHECK-LABEL: atomicrmw_fmin_half_aligned_acquire:
; CHECK: ldfminnma h0, h0, [x0]
@@ -1134,6 +1253,13 @@ define dso_local half @atomicrmw_fmin_half_aligned_release(ptr %ptr, half %value
ret half %r
}
+define dso_local void @atomicrmw_fmin_half_aligned_release_to_store(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fmin_half_aligned_release_to_store:
+; CHECK: stfminnml h0, [x0]
+ %r = atomicrmw fmin ptr %ptr, half %value release, align 2
+ ret void
+}
+
define dso_local half @atomicrmw_fmin_half_aligned_acq_rel(ptr %ptr, half %value) {
; CHECK-LABEL: atomicrmw_fmin_half_aligned_acq_rel:
; CHECK: ldfminnmal h0, h0, [x0]
@@ -1155,6 +1281,13 @@ define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_monotonic(ptr %ptr, bfloa
ret bfloat %r
}
+define dso_local void @atomicrmw_fmin_bfloat_aligned_monotonic_to_store(ptr %ptr, bfloat %value) {
+; CHECK-LABEL: atomicrmw_fmin_bfloat_aligned_monotonic_to_store:
+; CHECK: stbfminnm h0, [x0]
+ %r = atomicrmw fmin ptr %ptr, bfloat %value monotonic, align 2
+ ret void
+}
+
define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_acquire(ptr %ptr, bfloat %value) {
; CHECK-LABEL: atomicrmw_fmin_bfloat_aligned_acquire:
; CHECK: ldbfminnma h0, h0, [x0]
@@ -1169,6 +1302,13 @@ define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_release(ptr %ptr, bfloat
ret bfloat %r
}
+define dso_local void @atomicrmw_fmin_bfloat_aligned_release_to_store(ptr %ptr, bfloat %value) {
+; CHECK-LABEL: atomicrmw_fmin_bfloat_aligned_release_to_store:
+; CHECK: stbfminnml h0, [x0]
+ %r = atomicrmw fmin ptr %ptr, bfloat %value release, align 2
+ ret void
+}
+
define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_acq_rel(ptr %ptr, bfloat %value) {
; CHECK-LABEL: atomicrmw_fmin_bfloat_aligned_acq_rel:
; CHECK: ldbfminnmal h0, h0, [x0]
@@ -1190,6 +1330,13 @@ define dso_local float @atomicrmw_fmin_float_aligned_monotonic(ptr %ptr, float %
ret float %r
}
+define dso_local void @atomicrmw_fmin_float_aligned_monotonic_to_store(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_monotonic_to_store:
+; CHECK: stfminnm s0, [x0]
+ %r = atomicrmw fmin ptr %ptr, float %value monotonic, align 4
+ ret void
+}
+
define dso_local float @atomicrmw_fmin_float_aligned_acquire(ptr %ptr, float %value) {
; CHECK-LABEL: atomicrmw_fmin_float_aligned_acquire:
; CHECK: ldfminnma s0, s0, [x0]
@@ -1204,6 +1351,13 @@ define dso_local float @atomicrmw_fmin_float_aligned_release(ptr %ptr, float %va
ret float %r
}
+define dso_local void @atomicrmw_fmin_float_aligned_release_to_store(ptr %ptr, float %value) {
+; CHECK-LABEL: atomicrmw_fmin_float_aligned_release_to_store:
+; CHECK: stfminnml s0, [x0]
+ %r = atomicrmw fmin ptr %ptr, float %value release, align 4
+ ret void
+}
+
define dso_local float @atomicrmw_fmin_float_aligned_acq_rel(ptr %ptr, float %value) {
; CHECK-LABEL: atomicrmw_fmin_float_aligned_acq_rel:
; CHECK: ldfminnmal s0, s0, [x0]
@@ -1225,6 +1379,13 @@ define dso_local double @atomicrmw_fmin_double_aligned_monotonic(ptr %ptr, doubl
ret double %r
}
+define dso_local void @atomicrmw_fmin_double_aligned_monotonic_to_store(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_monotonic_to_store:
+; CHECK: stfminnm d0, [x0]
+ %r = atomicrmw fmin ptr %ptr, double %value monotonic, align 8
+ ret void
+}
+
define dso_local double @atomicrmw_fmin_double_aligned_acquire(ptr %ptr, double %value) {
; CHECK-LABEL: atomicrmw_fmin_double_aligned_acquire:
; CHECK: ldfminnma d0, d0, [x0]
@@ -1239,6 +1400,13 @@ define dso_local double @atomicrmw_fmin_double_aligned_release(ptr %ptr, double
ret double %r
}
+define dso_local void @atomicrmw_fmin_double_aligned_release_to_store(ptr %ptr, double %value) {
+; CHECK-LABEL: atomicrmw_fmin_double_aligned_release_to_store:
+; CHECK: stfminnml d0, [x0]
+ %r = atomicrmw fmin ptr %ptr, double %value release, align 8
+ ret void
+}
+
define dso_local double @atomicrmw_fmin_double_aligned_acq_rel(ptr %ptr, double %value) {
; CHECK-LABEL: atomicrmw_fmin_double_aligned_acq_rel:
; CHECK: ldfminnmal d0, d0, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll
index a22cc5806d86d..67a5565f31d94 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll
@@ -10,6 +10,13 @@ define dso_local half @atomicrmw_fadd_half_aligned_monotonic(ptr %ptr, half %val
ret half %r
}
+define dso_local void @atomicrmw_fadd_half_aligned_monotonic_to_store(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_monotonic_to_store:
+; CHECK: stfadd h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, half %value monotonic, align 2
+ ret void
+}
+
define dso_local half @atomicrmw_fadd_half_aligned_acquire(ptr %ptr, half %value) {
; CHECK-LABEL: atomicrmw_fadd_half_aligned_acquire:
; CHECK: ldfadda h0, h0, [x0]
@@ -24,6 +31,13 @@ define dso_local half @atomicrmw_fadd_half_aligned_release(ptr %ptr, half %value
ret half %r
}
+define dso_local void @atomicrmw_fadd_half_aligned_release_to_store(ptr %ptr, half %value) {
+; CHECK-LABEL: atomicrmw_fadd_half_aligned_release_to_store:
+; CHECK: stfaddl h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, half %value release, align 2
+ ret void
+}
+
define dso_local half @atomicrmw_fadd_half_aligned_acq_rel(ptr %ptr, half %value) {
; CHECK-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
; CHECK: ldfaddal h0, h0, [x0]
@@ -45,6 +59,13 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_monotonic(ptr %ptr, bfloa
ret bfloat %r
}
+define dso_local void @atomicrmw_fadd_bfloat_aligned_monotonic_to_store(ptr %ptr, bfloat %value) {
+; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_monotonic_to_store:
+; CHECK: stbfadd h0, [x0]
+ %r = atomicrmw fadd ptr %ptr, bfloat %value monotonic, align 2
+ ret void
+}
+
define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_acquire(ptr %ptr, bfloat %value) {
; CHECK-LABEL: atomicrmw_fadd_bfloat_aligned_acquire:
; CHECK: ldbfadda h0, h0, [x0]
@@ -59,6 +80,13 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_release(ptr %ptr, bfloat
ret ...
[truncated]
|
|
✅ With the latest revision this PR passed the Python code formatter. |
jthackray
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, nice and compact code.
|
Hi - I'm not an atomics expert by a long shot, but I don't believe we generate stadd (for example) from codegen and I would expect these new intrinsics to act the same. There was a thread on this in #72887, which links to a couple of threads on the GCC mailing lists. It looks like this is implementing relaxed and release semantics, not acquire, which would be the last thread. The important part being Should be we treating these intrinsics the same as the existing ldadd etc, or do you know if they can be treated differently from a memory model perspective? Or can we do this for all intrinsics now? Thanks. |
Hi @davemgreen, thank you for pointing out the other patch & mailing list threads. I wasn't aware this had already been discussed and after reading through these I'm going to close this PR as I don't believe these intrinsics can be treated differently to the existing ones. |
|
For future reference, here is a proposal to add "atomic reduction operations" that would be able to make use of I wonder whether we should add the tests anyway, to check that they don't lower to |
Yeah they might be useful to keep. The last few times this has come up there has been a suggestion to add a comment somewhere to explain why we don't generate them too. |
Following the discussion on llvm#131174, update generate-tests.py script to emit atomicrmw tests where the result is unused and add a note to explain why these do use ST[F]ADD.
|
@tmatheson-arm @davemgreen I've moved the tests & changes to the generate-tests script into #132022, adding a reference to this PR. |
When FEAT_LSFE is enabled, the ST{B}FADD, ST{B}FMAX and ST{B}FMIN
atomic instructions are available. This patch adds patterns to match an
atomicrmw fadd, fmin or fmax to these instructions when the result is unused.