Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c
Original file line number Diff line number Diff line change
Expand Up @@ -330,3 +330,6 @@ mfloat8x8_t test_vcvt_mf8_f16_fpm(float16x4_t vn, float16x4_t vm, fpm_t fpm) {
mfloat8x16_t test_vcvtq_mf8_f16_fpm(float16x8_t vn, float16x8_t vm, fpm_t fpm) {
return vcvtq_mf8_f16_fpm(vn, vm, fpm);
}

// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test changes look manual? Which is going to make it awkward in the future. Perhaps add a dedicated test file for this specific purpose?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've tried to find similar test in Sema, but I think what I want is just when we have the llvm-ir/codegen.
Let me know if the test I added is fine.
It is also now checking other fp8 functions memory attributes, besides set.fpmr.

Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, f
return vdot_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr);
}

// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]

// CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16(
// CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
Expand Down Expand Up @@ -268,3 +270,5 @@ float32x4_t test_vdotq_lane_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x8_t vm,
float32x4_t test_vdotq_laneq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpmr) {
return vdotq_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr);
}

// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ float16x8_t test_vmlalb_lane(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t vm, fp
return vmlalbq_lane_f16_mf8_fpm(vd, vn, vm, 0, fpm);
}

// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]

// CHECK-LABEL: define dso_local <8 x half> @test_vmlalb_laneq(
// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
Expand Down Expand Up @@ -379,3 +381,5 @@ float32x4_t test_vmlalltt_lane(float32x4_t vd, mfloat8x16_t vn, mfloat8x8_t vm,
float32x4_t test_vmlalltt_laneq(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpm) {
return vmlallttq_laneq_f32_mf8_fpm(vd, vn, vm, 15, fpm);
}

// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
4 changes: 4 additions & 0 deletions clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ svmfloat8_t test_cvt_f16_x2(svfloat16x2_t zn, fpm_t fpmr) __arm_streaming {
return SVE_ACLE_FUNC(svcvt_mf8,_f16_x2,_fpm)(zn, fpmr);
}

// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]

// CHECK-LABEL: @test_cvt_f32_x4(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
Expand Down Expand Up @@ -207,3 +209,5 @@ svbfloat16x2_t test_cvtl1_bf16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming {
svbfloat16x2_t test_cvtl2_bf16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming {
return SVE_ACLE_FUNC(svcvtl2_bf16,_mf8,_x2_fpm)(zn, fpmr);
}

// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ void test_svmopa_za16_mf8_m(svbool_t pn, svbool_t pm, svmfloat8_t zn,
SVE_ACLE_FUNC(svmopa_za16,_mf8,_m_fpm)(1, pn, pm, zn, zm, fpmr);
}

// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]
//
// CHECK-LABEL: define dso_local void @test_svmopa_za32_mf8_m(
// CHECK-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
Expand All @@ -53,3 +55,6 @@ void test_svmopa_za32_mf8_m(svbool_t pn, svbool_t pm, svmfloat8_t zn,
svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
SVE_ACLE_FUNC(svmopa_za32,_mf8,_m_fpm)(3, pn, pm, zn, zm, fpmr);
}


// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
5 changes: 5 additions & 0 deletions clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_mla.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ void test_svmla_lane_za16_vg2x1(uint32_t slice, svmfloat8_t zn, svmfloat8_t zm,
SME_ACLE_FUNC(svmla_lane_za16,_mf8,_vg2x1_fpm,,)(slice, zn, zm, 0, fpm);
}

// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]

// CHECK-LABEL: define dso_local void @test_svmla_lane_za16_vg2x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
Expand Down Expand Up @@ -314,3 +316,6 @@ void test_svmla_multi_za32_vg4x2(uint32_t slice, svmfloat8x2_t zn, svmfloat8x2_t
void test_svmla_multi_za32_vg4x4(uint32_t slice, svmfloat8x4_t zn, svmfloat8x4_t zm, fpm_t fpm) __arm_streaming __arm_inout("za") {
SME_ACLE_FUNC(svmla_za32,_mf8,_vg4x4_fpm,,)(slice, zn, zm, fpm);
}


// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
5 changes: 5 additions & 0 deletions clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ svbfloat16_t test_svcvt1_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svcvt1_bf16,_mf8,_fpm)(zn, fpm);
}

// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]

// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svcvt2_bf16_mf8(
// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
Expand Down Expand Up @@ -171,3 +173,6 @@ svfloat16_t test_svcvtlt1_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
svfloat16_t test_svcvtlt2_f16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svcvtlt2_f16,_mf8,_fpm)(zn, fpm);
}


// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ svmfloat8_t test_svcvtn_f8_bf16(svbfloat16x2_t zn_zm, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svcvtn_mf8,_bf16_x2,_fpm)(zn_zm, fpm);
}

// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]

// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svcvtn_f8_f16(
// CHECK-SAME: <vscale x 8 x half> [[ZN_ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
Expand Down Expand Up @@ -99,3 +101,6 @@ svmfloat8_t test_svcvtnb_f8_f32(svfloat32x2_t zn_zm, fpm_t fpm) STREAMING {
svmfloat8_t test_svcvtnt_f8_f32(svmfloat8_t zd, svfloat32x2_t zn_zm, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svcvtnt_mf8,_f32_x2,_fpm)(zd, zn_zm, fpm);
}


// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ svfloat32_t test_svdot_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm,
return SVE_ACLE_FUNC(svdot,_f32_mf8,_fpm)(zda, zn, zm, fpm);
}

// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]

// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svdot_n_f32_mf8(
// CHECK-SAME: <vscale x 4 x float> [[ZDA:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
Expand Down Expand Up @@ -147,3 +149,6 @@ svfloat32_t test_svdot_lane_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t
svfloat16_t test_svdot_lane_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svdot_lane,_f16_mf8,_fpm)(zda, zn, zm, 7, fpm);
}


// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ svfloat16_t test_svmlalb_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm
return SVE_ACLE_FUNC(svmlalb,_f16_mf8,_fpm)(zda, zn, zm, fpm);
}

// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR1:#.*]]

// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svmlalb_n_f16_mf8(
// CHECK-SAME: <vscale x 8 x half> [[ZDA:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
Expand Down Expand Up @@ -387,3 +389,6 @@ svfloat32_t test_svmlalltb_lane_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloa
svfloat32_t test_svmlalltt_lane_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svmlalltt_lane,_f32_mf8,_fpm)(zda, zn, zm, 7, fpm);
}


// CHECK: attributes [[ATTR1]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
2 changes: 1 addition & 1 deletion llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ let TargetPrefix = "aarch64" in {
class RNDR_Intrinsic
: DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects]>;
class FPMR_Set_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleMemOnly]>;
: DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>;
}

// FP environment registers.
Expand Down
Loading