-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AArch64] Mark aarch64_set_fpmr as IntrWriteMem #146353
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
llvm.aarch64.set.fpmr only writes to inaccessible memory. Tag it with the IntrWriteMem and IntrInaccessibleMemOnly properties so the optimiser can treat it as a pure write. The original patch did not add this property, causing the intrinsic to be conservatively treated as readwrite. This commit fixes that.
|
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-clang Author: None (CarolineConcatto) Changesllvm.aarch64.set.fpmr only writes to inaccessible memory. Tag it with the IntrWriteMem and IntrInaccessibleMemOnly properties so the optimiser can treat it as a pure write. The original patch did not add this property, causing the intrinsic to be conservatively treated as readwrite. This commit fixes that. Full diff: https://github.com/llvm/llvm-project/pull/146353.diff 11 Files Affected:
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
index 9913c54567719..f7030b62c0f8e 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
@@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn, float16x4_t vm, fpm_t fpm) {
mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm) {
return vcvtq_mf8_f16_fpm(vn, vm, fpm);
}
+
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
index 44db59df6c1c4..c9c4e277440ae 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
@@ -83,6 +83,8 @@ float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, f
return vdot_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16(
// CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -268,3 +270,5 @@ float32x4_t test_vdotq_lane_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x8_t vm,
float32x4_t test_vdotq_laneq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpmr) {
return vdotq_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr);
}
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
index d4f074a92b05b..222a8e7272745 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c
@@ -154,6 +154,8 @@ float16x8_t test_vmlalb_lane(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t vm, fp
return vmlalbq_lane_f16_mf8_fpm(vd, vn, vm, 0, fpm);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <8 x half> @test_vmlalb_laneq(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -379,3 +381,5 @@ float32x4_t test_vmlalltt_lane(float32x4_t vd, mfloat8x16_t vn, mfloat8x8_t vm,
float32x4_t test_vmlalltt_laneq(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpm) {
return vmlallttq_laneq_f32_mf8_fpm(vd, vn, vm, 15, fpm);
}
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
index ae2e780f84cfe..3dbeb0aab6e25 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
@@ -32,6 +32,8 @@ svmfloat8_t test_cvt_f16_x2(svfloat16x2_t zn, fpm_t fpmr) __arm_streaming {
return SVE_ACLE_FUNC(svcvt_mf8,_f16_x2,_fpm)(zn, fpmr);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: @test_cvt_f32_x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
@@ -207,3 +209,5 @@ svbfloat16x2_t test_cvtl1_bf16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming {
svbfloat16x2_t test_cvtl2_bf16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming {
return SVE_ACLE_FUNC(svcvtl2_bf16,_mf8,_x2_fpm)(zn, fpmr);
}
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
index 95d6383ab30ef..c11baf60c6b48 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
@@ -35,6 +35,8 @@ void test_svmopa_za16_mf8_m(svbool_t pn, svbool_t pm, svmfloat8_t zn,
SVE_ACLE_FUNC(svmopa_za16,_mf8,_m_fpm)(1, pn, pm, zn, zm, fpmr);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+//
// CHECK-LABEL: define dso_local void @test_svmopa_za32_mf8_m(
// CHECK-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -53,3 +55,6 @@ void test_svmopa_za32_mf8_m(svbool_t pn, svbool_t pm, svmfloat8_t zn,
svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
SVE_ACLE_FUNC(svmopa_za32,_mf8,_m_fpm)(3, pn, pm, zn, zm, fpmr);
}
+
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c
index d603045edf282..ef8319ffa6372 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c
@@ -35,6 +35,8 @@ void test_svmla_lane_za16_vg2x1(uint32_t slice, svmfloat8_t zn, svmfloat8_t zm,
SME_ACLE_FUNC(svmla_lane_za16,_mf8,_vg2x1_fpm,,)(slice, zn, zm, 0, fpm);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local void @test_svmla_lane_za16_vg2x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -314,3 +316,6 @@ void test_svmla_multi_za32_vg4x2(uint32_t slice, svmfloat8x2_t zn, svmfloat8x2_t
void test_svmla_multi_za32_vg4x4(uint32_t slice, svmfloat8x4_t zn, svmfloat8x4_t zm, fpm_t fpm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmla_za32,_mf8,_vg4x4_fpm,,)(slice, zn, zm, fpm);
}
+
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
index c026b8aa216f3..7187831f2a0a4 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
@@ -46,6 +46,8 @@ svbfloat16_t test_svcvt1_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svcvt1_bf16,_mf8,_fpm)(zn, fpm);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svcvt2_bf16_mf8(
// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -171,3 +173,6 @@ svfloat16_t test_svcvtlt1_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
svfloat16_t test_svcvtlt2_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svcvtlt2_f16,_mf8,_fpm)(zn, fpm);
}
+
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c
index ed5b0ce02af4b..0ffad95720954 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c
@@ -46,6 +46,8 @@ svmfloat8_t test_svcvtn_f8_bf16(svbfloat16x2_t zn_zm, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svcvtn_mf8,_bf16_x2,_fpm)(zn_zm, fpm);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svcvtn_f8_f16(
// CHECK-SAME: <vscale x 8 x half> [[ZN_ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -99,3 +101,6 @@ svmfloat8_t test_svcvtnb_f8_f32(svfloat32x2_t zn_zm, fpm_t fpm) STREAMING {
svmfloat8_t test_svcvtnt_f8_f32(svmfloat8_t zd, svfloat32x2_t zn_zm, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svcvtnt_mf8,_f32_x2,_fpm)(zd, zn_zm, fpm);
}
+
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c
index 2f3994df03784..e166c130e3a72 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c
@@ -46,6 +46,8 @@ svfloat32_t test_svdot_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm,
return SVE_ACLE_FUNC(svdot,_f32_mf8,_fpm)(zda, zn, zm, fpm);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svdot_n_f32_mf8(
// CHECK-SAME: <vscale x 4 x float> [[ZDA:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -147,3 +149,6 @@ svfloat32_t test_svdot_lane_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t
svfloat16_t test_svdot_lane_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svdot_lane,_f16_mf8,_fpm)(zda, zn, zm, 7, fpm);
}
+
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c
index 425e6a57ffe3c..de5fffc0116d9 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c
@@ -46,6 +46,8 @@ svfloat16_t test_svmlalb_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm
return SVE_ACLE_FUNC(svmlalb,_f16_mf8,_fpm)(zda, zn, zm, fpm);
}
+// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
+
// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svmlalb_n_f16_mf8(
// CHECK-SAME: <vscale x 8 x half> [[ZDA:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -387,3 +389,6 @@ svfloat32_t test_svmlalltb_lane_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloa
svfloat32_t test_svmlalltt_lane_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svmlalltt_lane,_f32_mf8,_fpm)(zda, zn, zm, 7, fpm);
}
+
+
+// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 0ec5f5163118e..31d05bc163fa3 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -761,7 +761,7 @@ let TargetPrefix = "aarch64" in {
class RNDR_Intrinsic
: DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects]>;
class FPMR_Set_Intrinsic
- : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleMemOnly]>;
+ : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>;
}
// FP environment registers.
|
| // CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]] | ||
| // CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test changes look manual? Which is going to make it awkward in the future. Perhaps add a dedicated test file for this specific purpose?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've tried to find similar test in Sema, but I think what I want is just when we have the llvm-ir/codegen.
Let me know if the test I added is fine.
It is also now checking other fp8 functions memory attributes, besides set.fpmr.
3917109 to
9a29cb4
Compare
llvm.aarch64.set.fpmr only writes to inaccessible memory. Tag it with the IntrWriteMem and IntrInaccessibleMemOnly properties so the optimiser can treat it as a pure write.
The original patch did not add this property, causing the intrinsic to be conservatively treated as readwrite. This commit fixes that.