fixup! [AArch64][llvm] Add support for vmmlaq_[f16,f32]_mf8 intrinsics

jthackray · jthackray · commit 52b1c7604901 · 2025-11-07T11:52:07.000Z
Split testcase files
diff --git a/clang/test/CodeGen/AArch64/v9.6a-neon-f16-intrinsics.c b/clang/test/CodeGen/AArch64/v9.6a-neon-f16-intrinsics.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +v9.6a -target-feature +f8f16mm -target-feature +f8f32mm -target-feature +fp8 \
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +v9.6a -target-feature +f8f16mm -target-feature +fp8 \
 // RUN: -disable-O0-optnone -emit-llvm -o - %s \
 // RUN: | opt -S -passes=mem2reg,sroa \
 // RUN: | FileCheck %s
@@ -23,17 +23,3 @@
 float16x8_t test_vmmlaq_f16_mf8(float16x8_t p0, mfloat8x16_t p1, mfloat8x16_t p2, fpm_t p3) {
   return vmmlaq_f16_mf8_fpm(p0, p1, p2, p3);
 }
-
-// CHECK-LABEL: define dso_local <4 x float> @test_vmmlaq_f32_mf8(
-// CHECK-SAME: <4 x float> noundef [[P0:%.*]], <16 x i8> [[P1:%.*]], <16 x i8> [[P2:%.*]], i64 noundef [[P3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    call void @llvm.aarch64.set.fpmr(i64 [[P3]])
-// CHECK-NEXT:    [[FMMLA_I:%.*]] = bitcast <16 x i8> [[P1]] to <4 x float>
-// CHECK-NEXT:    [[FMMLA1_I:%.*]] = bitcast <16 x i8> [[P2]] to <4 x float>
-// CHECK-NEXT:    [[FMMLA2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmmla.v4f32.v4f32(<4 x float> [[P0]], <4 x float> [[FMMLA_I]], <4 x float> [[FMMLA1_I]])
-// CHECK-NEXT:    ret <4 x float> [[FMMLA2_I]]
-//
-float32x4_t test_vmmlaq_f32_mf8(float32x4_t p0, mfloat8x16_t p1, mfloat8x16_t p2, fpm_t p3) {
-  return vmmlaq_f32_mf8_fpm(p0, p1, p2, p3);
-}
-
diff --git a/clang/test/CodeGen/AArch64/v9.6a-neon-f32-intrinsics.c b/clang/test/CodeGen/AArch64/v9.6a-neon-f32-intrinsics.c
@@ -0,0 +1,23 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +v9.6a -target-feature +f8f32mm -target-feature +fp8 \
+// RUN: -disable-O0-optnone -emit-llvm -o - %s \
+// RUN: | opt -S -passes=mem2reg,sroa \
+// RUN: | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+// CHECK-LABEL: define dso_local <4 x float> @test_vmmlaq_f32_mf8(
+// CHECK-SAME: <4 x float> noundef [[P0:%.*]], <16 x i8> [[P1:%.*]], <16 x i8> [[P2:%.*]], i64 noundef [[P3:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    call void @llvm.aarch64.set.fpmr(i64 [[P3]])
+// CHECK-NEXT:    [[FMMLA_I:%.*]] = bitcast <16 x i8> [[P1]] to <4 x float>
+// CHECK-NEXT:    [[FMMLA1_I:%.*]] = bitcast <16 x i8> [[P2]] to <4 x float>
+// CHECK-NEXT:    [[FMMLA2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmmla.v4f32.v4f32(<4 x float> [[P0]], <4 x float> [[FMMLA_I]], <4 x float> [[FMMLA1_I]])
+// CHECK-NEXT:    ret <4 x float> [[FMMLA2_I]]
+//
+float32x4_t test_vmmlaq_f32_mf8(float32x4_t p0, mfloat8x16_t p1, mfloat8x16_t p2, fpm_t p3) {
+  return vmmlaq_f32_mf8_fpm(p0, p1, p2, p3);
+}
+