|
| 1 | +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 |
| 2 | + |
| 3 | +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8dot2 -target-feature +fp8dot4 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s |
| 4 | + |
| 5 | +// REQUIES: aarch64-registered-target |
| 6 | + |
| 7 | +#include <arm_neon.h> |
| 8 | + |
| 9 | +// CHECK-LABEL: define dso_local <4 x half> @test_vdot_f16( |
| 10 | +// CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] { |
| 11 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 12 | +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) |
| 13 | +// CHECK-NEXT: [[FDOT21_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[VD]], <8 x i8> [[VN]], <8 x i8> [[VM]]) |
| 14 | +// CHECK-NEXT: ret <4 x half> [[FDOT21_I]] |
| 15 | +// |
| 16 | +float16x4_t test_vdot_f16(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, fpm_t fpmr) { |
| 17 | + return vdot_f16_mf8_fpm(vd, vn, vm, fpmr); |
| 18 | +} |
| 19 | + |
| 20 | +// CHECK-LABEL: define dso_local <8 x half> @test_vdotq_f16( |
| 21 | +// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { |
| 22 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 23 | +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) |
| 24 | +// CHECK-NEXT: [[FDOT21_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.fp8.fdot2.v8f16.v16i8(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) |
| 25 | +// CHECK-NEXT: ret <8 x half> [[FDOT21_I]] |
| 26 | +// |
| 27 | +float16x8_t test_vdotq_f16(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpmr) { |
| 28 | + return vdotq_f16_mf8_fpm(vd, vn, vm, fpmr); |
| 29 | +} |
| 30 | + |
| 31 | +// CHECK-LABEL: define dso_local <4 x half> @test_vdot_lane_f16( |
| 32 | +// CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { |
| 33 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 34 | +// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VM]], <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| 35 | +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) |
| 36 | +// CHECK-NEXT: [[FDOT2_LANE1:%.*]] = tail call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[VD]], <8 x i8> [[VN]], <16 x i8> [[TMP0]], i32 3) |
| 37 | +// CHECK-NEXT: ret <4 x half> [[FDOT2_LANE1]] |
| 38 | +// |
| 39 | +float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, fpm_t fpmr) { |
| 40 | + return vdot_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr); |
| 41 | +} |
| 42 | + |
| 43 | +// CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16( |
| 44 | +// CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { |
| 45 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 46 | +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) |
| 47 | +// CHECK-NEXT: [[FDOT2_LANE1:%.*]] = tail call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[VD]], <8 x i8> [[VN]], <16 x i8> [[VM]], i32 7) |
| 48 | +// CHECK-NEXT: ret <4 x half> [[FDOT2_LANE1]] |
| 49 | +// |
| 50 | +float16x4_t test_vdot_laneq_f16(float16x4_t vd, mfloat8x8_t vn, mfloat8x16_t vm, fpm_t fpmr) { |
| 51 | + return vdot_laneq_f16_mf8_fpm(vd, vn, vm, 7, fpmr); |
| 52 | +} |
| 53 | + |
| 54 | +// CHECK-LABEL: define dso_local <8 x half> @test_vdotq_lane_f16( |
| 55 | +// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { |
| 56 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 57 | +// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VM]], <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| 58 | +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) |
| 59 | +// CHECK-NEXT: [[FDOT2_LANE1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v8f16.v16i8(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[TMP0]], i32 3) |
| 60 | +// CHECK-NEXT: ret <8 x half> [[FDOT2_LANE1]] |
| 61 | +// |
| 62 | +float16x8_t test_vdotq_lane_f16(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t vm, fpm_t fpmr) { |
| 63 | + return vdotq_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr); |
| 64 | +} |
| 65 | + |
| 66 | +// CHECK-LABEL: define dso_local <8 x half> @test_vdotq_laneq_f16( |
| 67 | +// CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { |
| 68 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 69 | +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) |
| 70 | +// CHECK-NEXT: [[FDOT2_LANE1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v8f16.v16i8(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]], i32 7) |
| 71 | +// CHECK-NEXT: ret <8 x half> [[FDOT2_LANE1]] |
| 72 | +// |
| 73 | +float16x8_t test_vdotq_laneq_f16(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpmr) { |
| 74 | + return vdotq_laneq_f16_mf8_fpm(vd, vn, vm, 7, fpmr); |
| 75 | +} |
| 76 | + |
| 77 | +// CHECK-LABEL: define dso_local <2 x float> @test_vdot_f32( |
| 78 | +// CHECK-SAME: <2 x float> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { |
| 79 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 80 | +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) |
| 81 | +// CHECK-NEXT: [[FDOT4_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.fp8.fdot4.v2f32.v8i8(<2 x float> [[VD]], <8 x i8> [[VN]], <8 x i8> [[VM]]) |
| 82 | +// CHECK-NEXT: ret <2 x float> [[FDOT4_I]] |
| 83 | +// |
| 84 | +float32x2_t test_vdot_f32(float32x2_t vd, mfloat8x8_t vn, mfloat8x8_t vm, fpm_t fpmr) { |
| 85 | + return vdot_f32_mf8_fpm(vd, vn, vm, fpmr); |
| 86 | +} |
| 87 | + |
| 88 | +// CHECK-LABEL: define dso_local <4 x float> @test_vdotq_f32( |
| 89 | +// CHECK-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { |
| 90 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 91 | +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) |
| 92 | +// CHECK-NEXT: [[FDOT4_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.fp8.fdot4.v4f32.v16i8(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) |
| 93 | +// CHECK-NEXT: ret <4 x float> [[FDOT4_I]] |
| 94 | +// |
| 95 | +float32x4_t test_vdotq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpmr) { |
| 96 | + return vdotq_f32_mf8_fpm(vd, vn, vm, fpmr); |
| 97 | +} |
| 98 | + |
| 99 | +// CHECK-LABEL: define dso_local <2 x float> @test_vdot_lane_f32( |
| 100 | +// CHECK-SAME: <2 x float> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { |
| 101 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 102 | +// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VM]], <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| 103 | +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) |
| 104 | +// CHECK-NEXT: [[FDOT4_LANE:%.*]] = tail call <2 x float> @llvm.aarch64.neon.fp8.fdot4.lane.v2f32.v8i8(<2 x float> [[VD]], <8 x i8> [[VN]], <16 x i8> [[TMP0]], i32 1) |
| 105 | +// CHECK-NEXT: ret <2 x float> [[FDOT4_LANE]] |
| 106 | +// |
| 107 | +float32x2_t test_vdot_lane_f32(float32x2_t vd, mfloat8x8_t vn, mfloat8x8_t vm, fpm_t fpmr) { |
| 108 | + return vdot_lane_f32_mf8_fpm(vd, vn, vm, 1, fpmr); |
| 109 | +} |
| 110 | + |
| 111 | +// CHECK-LABEL: define dso_local <2 x float> @test_vdot_laneq_f32( |
| 112 | +// CHECK-SAME: <2 x float> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { |
| 113 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 114 | +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) |
| 115 | +// CHECK-NEXT: [[FDOT4_LANE:%.*]] = tail call <2 x float> @llvm.aarch64.neon.fp8.fdot4.lane.v2f32.v8i8(<2 x float> [[VD]], <8 x i8> [[VN]], <16 x i8> [[VM]], i32 3) |
| 116 | +// CHECK-NEXT: ret <2 x float> [[FDOT4_LANE]] |
| 117 | +// |
| 118 | +float32x2_t test_vdot_laneq_f32(float32x2_t vd, mfloat8x8_t vn, mfloat8x16_t vm, fpm_t fpmr) { |
| 119 | + return vdot_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr); |
| 120 | +} |
| 121 | + |
| 122 | +// CHECK-LABEL: define dso_local <4 x float> @test_vdotq_lane_f32( |
| 123 | +// CHECK-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { |
| 124 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 125 | +// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[VM]], <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> |
| 126 | +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) |
| 127 | +// CHECK-NEXT: [[FDOT4_LANE:%.*]] = tail call <4 x float> @llvm.aarch64.neon.fp8.fdot4.lane.v4f32.v16i8(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[TMP0]], i32 1) |
| 128 | +// CHECK-NEXT: ret <4 x float> [[FDOT4_LANE]] |
| 129 | +// |
| 130 | +float32x4_t test_vdotq_lane_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x8_t vm, fpm_t fpmr) { |
| 131 | + return vdotq_lane_f32_mf8_fpm(vd, vn, vm, 1, fpmr); |
| 132 | +} |
| 133 | + |
| 134 | +// CHECK-LABEL: define dso_local <4 x float> @test_vdotq_laneq_f32( |
| 135 | +// CHECK-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { |
| 136 | +// CHECK-NEXT: [[ENTRY:.*:]] |
| 137 | +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) |
| 138 | +// CHECK-NEXT: [[FDOT4_LANE:%.*]] = tail call <4 x float> @llvm.aarch64.neon.fp8.fdot4.lane.v4f32.v16i8(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]], i32 3) |
| 139 | +// CHECK-NEXT: ret <4 x float> [[FDOT4_LANE]] |
| 140 | +// |
| 141 | +float32x4_t test_vdotq_laneq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpmr) { |
| 142 | + return vdotq_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr); |
| 143 | +} |
0 commit comments