11// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2- // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
3- // RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix CHECK-CXX
2+ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s
3+ // RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s -check-prefix CHECK-CXX
44
55// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -S -o /dev/null %s
66
7- // REQUIES : aarch64-registered-target
7+ // REQUIRES : aarch64-registered-target
88
99#include <arm_neon.h>
1010
1111// CHECK-LABEL: define dso_local <8 x half> @test_vmlalb(
1212// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
1313// CHECK-NEXT: [[ENTRY:.*:]]
14- // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
15- // CHECK-NEXT: [[VMLAL1_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
14+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
15+ // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
16+ // CHECK-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
1617// CHECK-NEXT: ret <8 x half> [[VMLAL1_I]]
1718//
1819// CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z11test_vmlalb13__Float16x8_tu14__MFloat8x16_tu14__MFloat8x16_tm(
1920// CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
2021// CHECK-CXX-NEXT: [[ENTRY:.*:]]
21- // CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
22- // CHECK-CXX-NEXT: [[VMLAL1_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
22+ // CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
23+ // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
24+ // CHECK-CXX-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
2325// CHECK-CXX-NEXT: ret <8 x half> [[VMLAL1_I]]
2426//
2527float16x8_t test_vmlalb (float16x8_t vd , mfloat8x16_t vn , mfloat8x16_t vm , fpm_t fpm ) {
@@ -29,15 +31,17 @@ float16x8_t test_vmlalb(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t
2931// CHECK-LABEL: define dso_local <8 x half> @test_vmlalt(
3032// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
3133// CHECK-NEXT: [[ENTRY:.*:]]
32- // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
33- // CHECK-NEXT: [[VMLAL1_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
34+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
35+ // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
36+ // CHECK-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
3437// CHECK-NEXT: ret <8 x half> [[VMLAL1_I]]
3538//
3639// CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z11test_vmlalt13__Float16x8_tu14__MFloat8x16_tu14__MFloat8x16_tm(
3740// CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
3841// CHECK-CXX-NEXT: [[ENTRY:.*:]]
39- // CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
40- // CHECK-CXX-NEXT: [[VMLAL1_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
42+ // CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8>
43+ // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
44+ // CHECK-CXX-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
4145// CHECK-CXX-NEXT: ret <8 x half> [[VMLAL1_I]]
4246//
4347float16x8_t test_vmlalt (float16x8_t vd , mfloat8x16_t vn , mfloat8x16_t vm , fpm_t fpm ) {
@@ -47,15 +51,15 @@ float16x8_t test_vmlalt(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t
4751// CHECK-LABEL: define dso_local <4 x float> @test_vmlallbb(
4852// CHECK-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
4953// CHECK-NEXT: [[ENTRY:.*:]]
50- // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
51- // CHECK-NEXT: [[VMLALL_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.fp8.fmlallbb.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
54+ // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
55+ // CHECK-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlallbb.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
5256// CHECK-NEXT: ret <4 x float> [[VMLALL_I]]
5357//
5458// CHECK-CXX-LABEL: define dso_local noundef <4 x float> @_Z13test_vmlallbb13__Float32x4_tu14__MFloat8x16_tu14__MFloat8x16_tm(
5559// CHECK-CXX-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
5660// CHECK-CXX-NEXT: [[ENTRY:.*:]]
57- // CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
58- // CHECK-CXX-NEXT: [[VMLALL_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.fp8.fmlallbb.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
61+ // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
62+ // CHECK-CXX-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlallbb.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
5963// CHECK-CXX-NEXT: ret <4 x float> [[VMLALL_I]]
6064//
6165float32x4_t test_vmlallbb (float32x4_t vd , mfloat8x16_t vn , mfloat8x16_t vm , fpm_t fpm ) {
@@ -65,15 +69,15 @@ float32x4_t test_vmlallbb(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_
6569// CHECK-LABEL: define dso_local <4 x float> @test_vmlallbt(
6670// CHECK-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
6771// CHECK-NEXT: [[ENTRY:.*:]]
68- // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
69- // CHECK-NEXT: [[VMLALL_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.fp8.fmlallbt.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
72+ // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
73+ // CHECK-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlallbt.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
7074// CHECK-NEXT: ret <4 x float> [[VMLALL_I]]
7175//
7276// CHECK-CXX-LABEL: define dso_local noundef <4 x float> @_Z13test_vmlallbt13__Float32x4_tu14__MFloat8x16_tu14__MFloat8x16_tm(
7377// CHECK-CXX-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
7478// CHECK-CXX-NEXT: [[ENTRY:.*:]]
75- // CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
76- // CHECK-CXX-NEXT: [[VMLALL_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.fp8.fmlallbt.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
79+ // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
80+ // CHECK-CXX-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlallbt.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
7781// CHECK-CXX-NEXT: ret <4 x float> [[VMLALL_I]]
7882//
7983float32x4_t test_vmlallbt (float32x4_t vd , mfloat8x16_t vn , mfloat8x16_t vm , fpm_t fpm ) {
@@ -83,15 +87,15 @@ float32x4_t test_vmlallbt(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_
8387// CHECK-LABEL: define dso_local <4 x float> @test_vmlalltb(
8488// CHECK-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
8589// CHECK-NEXT: [[ENTRY:.*:]]
86- // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
87- // CHECK-NEXT: [[VMLALL_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.fp8.fmlalltb.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
90+ // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
91+ // CHECK-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlalltb.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
8892// CHECK-NEXT: ret <4 x float> [[VMLALL_I]]
8993//
9094// CHECK-CXX-LABEL: define dso_local noundef <4 x float> @_Z13test_vmlalltb13__Float32x4_tu14__MFloat8x16_tu14__MFloat8x16_tm(
9195// CHECK-CXX-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
9296// CHECK-CXX-NEXT: [[ENTRY:.*:]]
93- // CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
94- // CHECK-CXX-NEXT: [[VMLALL_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.fp8.fmlalltb.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
97+ // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
98+ // CHECK-CXX-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlalltb.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
9599// CHECK-CXX-NEXT: ret <4 x float> [[VMLALL_I]]
96100//
97101float32x4_t test_vmlalltb (float32x4_t vd , mfloat8x16_t vn , mfloat8x16_t vm , fpm_t fpm ) {
@@ -101,15 +105,15 @@ float32x4_t test_vmlalltb(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_
101105// CHECK-LABEL: define dso_local <4 x float> @test_vmlalltt(
102106// CHECK-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
103107// CHECK-NEXT: [[ENTRY:.*:]]
104- // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
105- // CHECK-NEXT: [[VMLALL_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.fp8.fmlalltt.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
108+ // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
109+ // CHECK-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlalltt.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
106110// CHECK-NEXT: ret <4 x float> [[VMLALL_I]]
107111//
108112// CHECK-CXX-LABEL: define dso_local noundef <4 x float> @_Z13test_vmlalltt13__Float32x4_tu14__MFloat8x16_tu14__MFloat8x16_tm(
109113// CHECK-CXX-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
110114// CHECK-CXX-NEXT: [[ENTRY:.*:]]
111- // CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
112- // CHECK-CXX-NEXT: [[VMLALL_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.fp8.fmlalltt.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
115+ // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
116+ // CHECK-CXX-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlalltt.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]])
113117// CHECK-CXX-NEXT: ret <4 x float> [[VMLALL_I]]
114118//
115119float32x4_t test_vmlalltt (float32x4_t vd , mfloat8x16_t vn , mfloat8x16_t vm , fpm_t fpm ) {
0 commit comments