|
| 1 | +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s |
| 3 | +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s -check-prefix CHECK-CXX |
| 4 | + |
| 5 | +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -S -o /dev/null %s |
| 6 | + |
| 7 | +// REQUIRES: aarch64-registered-target |
| 8 | + |
| 9 | +#include <arm_neon.h> |
| 10 | + |
| 11 | +// CHECK-LABEL: define dso_local <8 x half> @test_vmlalb( |
| 12 | +// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] { |
| 13 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 14 | +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8> |
| 15 | +// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) |
| 16 | +// CHECK-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) |
| 17 | +// CHECK-NEXT: ret <8 x half> [[VMLAL1_I]] |
| 18 | +// |
| 19 | +// CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z11test_vmlalb13__Float16x8_t14__Mfloat8x16_tS0_m( |
| 20 | +// CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] { |
| 21 | +// CHECK-CXX-NEXT: [[ENTRY:.*:]] |
| 22 | +// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8> |
| 23 | +// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) |
| 24 | +// CHECK-CXX-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalb.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) |
| 25 | +// CHECK-CXX-NEXT: ret <8 x half> [[VMLAL1_I]] |
| 26 | +// |
| 27 | +float16x8_t test_vmlalb(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpm) { |
| 28 | + return vmlalbq_f16_mf8_fpm(vd, vn, vm, fpm); |
| 29 | +} |
| 30 | + |
| 31 | +// CHECK-LABEL: define dso_local <8 x half> @test_vmlalt( |
| 32 | +// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { |
| 33 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 34 | +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8> |
| 35 | +// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) |
| 36 | +// CHECK-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) |
| 37 | +// CHECK-NEXT: ret <8 x half> [[VMLAL1_I]] |
| 38 | +// |
| 39 | +// CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z11test_vmlalt13__Float16x8_t14__Mfloat8x16_tS0_m( |
| 40 | +// CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { |
| 41 | +// CHECK-CXX-NEXT: [[ENTRY:.*:]] |
| 42 | +// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8> |
| 43 | +// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) |
| 44 | +// CHECK-CXX-NEXT: [[VMLAL1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fmlalt.v8f16(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) |
| 45 | +// CHECK-CXX-NEXT: ret <8 x half> [[VMLAL1_I]] |
| 46 | +// |
| 47 | +float16x8_t test_vmlalt(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpm) { |
| 48 | + return vmlaltq_f16_mf8_fpm(vd, vn, vm, fpm); |
| 49 | +} |
| 50 | + |
| 51 | +// CHECK-LABEL: define dso_local <4 x float> @test_vmlallbb( |
| 52 | +// CHECK-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { |
| 53 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 54 | +// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) |
| 55 | +// CHECK-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlallbb.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) |
| 56 | +// CHECK-NEXT: ret <4 x float> [[VMLALL_I]] |
| 57 | +// |
| 58 | +// CHECK-CXX-LABEL: define dso_local noundef <4 x float> @_Z13test_vmlallbb13__Float32x4_t14__Mfloat8x16_tS0_m( |
| 59 | +// CHECK-CXX-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { |
| 60 | +// CHECK-CXX-NEXT: [[ENTRY:.*:]] |
| 61 | +// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) |
| 62 | +// CHECK-CXX-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlallbb.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) |
| 63 | +// CHECK-CXX-NEXT: ret <4 x float> [[VMLALL_I]] |
| 64 | +// |
| 65 | +float32x4_t test_vmlallbb(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpm) { |
| 66 | + return vmlallbbq_f32_mf8_fpm(vd, vn, vm, fpm); |
| 67 | +} |
| 68 | + |
| 69 | +// CHECK-LABEL: define dso_local <4 x float> @test_vmlallbt( |
| 70 | +// CHECK-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { |
| 71 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 72 | +// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) |
| 73 | +// CHECK-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlallbt.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) |
| 74 | +// CHECK-NEXT: ret <4 x float> [[VMLALL_I]] |
| 75 | +// |
| 76 | +// CHECK-CXX-LABEL: define dso_local noundef <4 x float> @_Z13test_vmlallbt13__Float32x4_t14__Mfloat8x16_tS0_m( |
| 77 | +// CHECK-CXX-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { |
| 78 | +// CHECK-CXX-NEXT: [[ENTRY:.*:]] |
| 79 | +// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) |
| 80 | +// CHECK-CXX-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlallbt.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) |
| 81 | +// CHECK-CXX-NEXT: ret <4 x float> [[VMLALL_I]] |
| 82 | +// |
| 83 | +float32x4_t test_vmlallbt(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpm) { |
| 84 | + return vmlallbtq_f32_mf8_fpm(vd, vn, vm, fpm); |
| 85 | +} |
| 86 | + |
| 87 | +// CHECK-LABEL: define dso_local <4 x float> @test_vmlalltb( |
| 88 | +// CHECK-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { |
| 89 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 90 | +// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) |
| 91 | +// CHECK-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlalltb.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) |
| 92 | +// CHECK-NEXT: ret <4 x float> [[VMLALL_I]] |
| 93 | +// |
| 94 | +// CHECK-CXX-LABEL: define dso_local noundef <4 x float> @_Z13test_vmlalltb13__Float32x4_t14__Mfloat8x16_tS0_m( |
| 95 | +// CHECK-CXX-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { |
| 96 | +// CHECK-CXX-NEXT: [[ENTRY:.*:]] |
| 97 | +// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) |
| 98 | +// CHECK-CXX-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlalltb.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) |
| 99 | +// CHECK-CXX-NEXT: ret <4 x float> [[VMLALL_I]] |
| 100 | +// |
| 101 | +float32x4_t test_vmlalltb(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpm) { |
| 102 | + return vmlalltbq_f32_mf8_fpm(vd, vn, vm, fpm); |
| 103 | +} |
| 104 | + |
| 105 | +// CHECK-LABEL: define dso_local <4 x float> @test_vmlalltt( |
| 106 | +// CHECK-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { |
| 107 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 108 | +// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) |
| 109 | +// CHECK-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlalltt.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) |
| 110 | +// CHECK-NEXT: ret <4 x float> [[VMLALL_I]] |
| 111 | +// |
| 112 | +// CHECK-CXX-LABEL: define dso_local noundef <4 x float> @_Z13test_vmlalltt13__Float32x4_t14__Mfloat8x16_tS0_m( |
| 113 | +// CHECK-CXX-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { |
| 114 | +// CHECK-CXX-NEXT: [[ENTRY:.*:]] |
| 115 | +// CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) |
| 116 | +// CHECK-CXX-NEXT: [[VMLALL_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fmlalltt.v4f32(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) |
| 117 | +// CHECK-CXX-NEXT: ret <4 x float> [[VMLALL_I]] |
| 118 | +// |
| 119 | +float32x4_t test_vmlalltt(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpm) { |
| 120 | + return vmlallttq_f32_mf8_fpm(vd, vn, vm, fpm); |
| 121 | +} |
0 commit comments