Added new fp8 intrinsic in IntrinsicsAArch64.td, updated tests accordingly

Amichaxx · Amichaxx · commit 308d3229a14c · 2025-11-13T11:40:08.000Z
Formatting
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
@@ -1201,11 +1201,11 @@ let SVETargetGuard = "sve-f16f32mm", SMETargetGuard = InvalidMode in {
 }
 
 let SVETargetGuard = "sve2,f8f32mm", SMETargetGuard = InvalidMode in {
-  def SVMLLA_F32_MF8 : SInst<"svmmla[_f32_mf8]", "dd~~>", "f", MergeNone, "aarch64_sve_fmmla", [IsOverloadCvt]>;
+  def SVMLLA_F32_MF8 : SInst<"svmmla[_f32_mf8]", "dd~~>", "f", MergeNone, "aarch64_sve_fp8_fmmla", [IsOverloadCvt]>;
 }
 
 let SVETargetGuard = "sve2,f8f16mm", SMETargetGuard = InvalidMode in {
-  def SVMLLA_F16_MF8 : SInst<"svmmla[_f16_mf8]", "dd~~>", "h", MergeNone, "aarch64_sve_fmmla", [IsOverloadCvt]>;
+  def SVMLLA_F16_MF8 : SInst<"svmmla[_f16_mf8]", "dd~~>", "h", MergeNone, "aarch64_sve_fp8_fmmla", [IsOverloadCvt]>;
 }
 
 def SVTRN1Q : SInst<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn1q">;
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_fmmla-f16mf8.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_fmmla-f16mf8.c
@@ -20,14 +20,14 @@
 // CHECK-SAME: <vscale x 8 x half> [[ACC:%.*]], <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmmla.nxv8f16.nxv16i8(<vscale x 8 x half> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.fmmla.nxv8f16.nxv16i8(<vscale x 8 x half> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 // CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z11test_f16mf8u13__SVFloat16_tu13__SVMfloat8_tS0_m(
 // CPP-CHECK-SAME: <vscale x 8 x half> [[ACC:%.*]], <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
 // CPP-CHECK-NEXT:  [[ENTRY:.*:]]
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmmla.nxv8f16.nxv16i8(<vscale x 8 x half> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.fmmla.nxv8f16.nxv16i8(<vscale x 8 x half> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 svfloat16_t test_f16mf8(svfloat16_t acc, svmfloat8_t a, svmfloat8_t b, fpm_t fpmr) {
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_fmmla-f32mf8.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_fmmla-f32mf8.c
@@ -21,14 +21,14 @@
 // CHECK-SAME: <vscale x 4 x float> [[ACC:%.*]], <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32.nxv16i8(<vscale x 4 x float> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fp8.fmmla.nxv4f32.nxv16i8(<vscale x 4 x float> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 // CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z11test_f32mf8u13__SVFloat32_tu13__SVMfloat8_tS0_m(
 // CPP-CHECK-SAME: <vscale x 4 x float> [[ACC:%.*]], <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
 // CPP-CHECK-NEXT:  [[ENTRY:.*:]]
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32.nxv16i8(<vscale x 4 x float> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fp8.fmmla.nxv4f32.nxv16i8(<vscale x 4 x float> [[ACC]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_f32mf8(svfloat32_t acc, svmfloat8_t a, svmfloat8_t b, fpm_t fpmr) {
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2807,9 +2807,14 @@ def int_aarch64_sve_sudot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
 //
 
 def int_aarch64_sve_fmmla
-  : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
-                          [ LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1> ],
-                          [ IntrNoMem ]>;
+  : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                          [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
+                          [IntrNoMem]>;
+
+def int_aarch64_sve_fp8_fmmla
+  : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                          [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
+                          [IntrReadMem, IntrInaccessibleMemOnly]>;
 
 //
 // SVE ACLE: 7.2. BFloat16 extensions
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -11145,7 +11145,7 @@ class sve2_fp8_mmla<bit opc, ZPRRegOp dst_ty, string mnemonic>
 
 multiclass sve2_fp8_fmmla<bits<1> opc, ZPRRegOp zprty, string mnemonic, ValueType ResVT> {
   def NAME : sve2_fp8_mmla<opc, zprty, mnemonic>;
-  def : Pat<(ResVT (int_aarch64_sve_fmmla ResVT:$acc, nxv16i8:$zn, nxv16i8:$zm)),
+  def : Pat<(ResVT (int_aarch64_sve_fp8_fmmla ResVT:$acc, nxv16i8:$zn, nxv16i8:$zm)),
             (!cast<Instruction>(NAME) $acc, $zn, $zm)>;
 }
 
diff --git a/llvm/test/CodeGen/AArch64/sve2-fmmla-f16mf8.ll b/llvm/test/CodeGen/AArch64/sve2-fmmla-f16mf8.ll
@@ -7,6 +7,6 @@ define <vscale x 8 x half> @fmmla_f16mf8(<vscale x 8 x half> %acc, <vscale x 16
 ; CHECK-NEXT:    fmmla z0.h, z1.b, z2.b
 ; CHECK-NEXT:    ret
 entry:
-  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmmla.mf8f16(<vscale x 8 x half> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fp8.fmmla.nxv8f16.nxv16i8(<vscale x 8 x half> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 8 x half> %out
 }
diff --git a/llvm/test/CodeGen/AArch64/sve2-fmmla-f32mf8.ll b/llvm/test/CodeGen/AArch64/sve2-fmmla-f32mf8.ll
@@ -7,6 +7,6 @@ define dso_local <vscale x 4 x float> @fmmla_f32mf8(<vscale x 4 x float> %acc, <
 ; CHECK-NEXT:    fmmla z0.s, z1.b, z2.b
 ; CHECK-NEXT:    ret
 entry:
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.mf8f32(<vscale x 4 x float> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fp8.fmmla.nxv4f32.nxv16i82(<vscale x 4 x float> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
   ret <vscale x 4 x float> %out
 }

Original file line number	Diff line number	Diff line change
`@@ -1201,11 +1201,11 @@ let SVETargetGuard = "sve-f16f32mm", SMETargetGuard = InvalidMode in {`
`1201`	`1201`	`}`
`1202`	`1202`
`1203`	`1203`	`let SVETargetGuard = "sve2,f8f32mm", SMETargetGuard = InvalidMode in {`
`1204`		`- def SVMLLA_F32_MF8 : SInst<"svmmla[_f32_mf8]", "dd~~>", "f", MergeNone, "aarch64_sve_fmmla", [IsOverloadCvt]>;`
	`1204`	`+ def SVMLLA_F32_MF8 : SInst<"svmmla[_f32_mf8]", "dd~~>", "f", MergeNone, "aarch64_sve_fp8_fmmla", [IsOverloadCvt]>;`
`1205`	`1205`	`}`
`1206`	`1206`
`1207`	`1207`	`let SVETargetGuard = "sve2,f8f16mm", SMETargetGuard = InvalidMode in {`
`1208`		`- def SVMLLA_F16_MF8 : SInst<"svmmla[_f16_mf8]", "dd~~>", "h", MergeNone, "aarch64_sve_fmmla", [IsOverloadCvt]>;`
	`1208`	`+ def SVMLLA_F16_MF8 : SInst<"svmmla[_f16_mf8]", "dd~~>", "h", MergeNone, "aarch64_sve_fp8_fmmla", [IsOverloadCvt]>;`
`1209`	`1209`	`}`
`1210`	`1210`
`1211`	`1211`	`def SVTRN1Q : SInst<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn1q">;`
Original file line number	Diff line number	Diff line change
`@@ -11145,7 +11145,7 @@ class sve2_fp8_mmla<bit opc, ZPRRegOp dst_ty, string mnemonic>`
`11145`	`11145`
`11146`	`11146`	`multiclass sve2_fp8_fmmla<bits<1> opc, ZPRRegOp zprty, string mnemonic, ValueType ResVT> {`
`11147`	`11147`	`def NAME : sve2_fp8_mmla<opc, zprty, mnemonic>;`
`11148`		`- def : Pat<(ResVT (int_aarch64_sve_fmmla ResVT:$acc, nxv16i8:$zn, nxv16i8:$zm)),`
	`11148`	`+ def : Pat<(ResVT (int_aarch64_sve_fp8_fmmla ResVT:$acc, nxv16i8:$zn, nxv16i8:$zm)),`
`11149`	`11149`	`(!cast<Instruction>(NAME) $acc, $zn, $zm)>;`
`11150`	`11150`	`}`
`11151`	`11151`
Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,6 @@ define <vscale x 8 x half> @fmmla_f16mf8(<vscale x 8 x half> %acc, <vscale x 16`
`7`	`7`	`; CHECK-NEXT: fmmla z0.h, z1.b, z2.b`
`8`	`8`	`; CHECK-NEXT: ret`
`9`	`9`	`entry:`
`10`		`- %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmmla.mf8f16(<vscale x 8 x half> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)`
	`10`	`+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fp8.fmmla.nxv8f16.nxv16i8(<vscale x 8 x half> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)`
`11`	`11`	`ret <vscale x 8 x half> %out`
`12`	`12`	`}`
Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,6 @@ define dso_local <vscale x 4 x float> @fmmla_f32mf8(<vscale x 4 x float> %acc, <`
`7`	`7`	`; CHECK-NEXT: fmmla z0.s, z1.b, z2.b`
`8`	`8`	`; CHECK-NEXT: ret`
`9`	`9`	`entry:`
`10`		`- %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.mf8f32(<vscale x 4 x float> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)`
	`10`	`+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fp8.fmmla.nxv4f32.nxv16i82(<vscale x 4 x float> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)`
`11`	`11`	`ret <vscale x 4 x float> %out`
`12`	`12`	`}`