11// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
22// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \
3- // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
3+ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg,instcombine | FileCheck %s
44
55// REQUIRES: aarch64-registered-target || arm-registered-target
66
@@ -17,10 +17,8 @@ bfloat16x4_t test_vcreate_bf16(uint64_t a) {
1717
1818// CHECK-LABEL: @test_vdup_n_bf16(
1919// CHECK-NEXT: entry:
20- // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x bfloat> poison, bfloat [[V:%.*]], i32 0
21- // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x bfloat> [[VECINIT_I]], bfloat [[V]], i32 1
22- // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x bfloat> [[VECINIT1_I]], bfloat [[V]], i32 2
23- // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x bfloat> [[VECINIT2_I]], bfloat [[V]], i32 3
20+ // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x bfloat> poison, bfloat [[V:%.*]], i64 0
21+ // CHECK-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x bfloat> [[VECINIT_I]], <4 x bfloat> poison, <4 x i32> zeroinitializer
2422// CHECK-NEXT: ret <4 x bfloat> [[VECINIT3_I]]
2523//
2624bfloat16x4_t test_vdup_n_bf16 (bfloat16_t v ) {
@@ -29,14 +27,8 @@ bfloat16x4_t test_vdup_n_bf16(bfloat16_t v) {
2927
3028// CHECK-LABEL: @test_vdupq_n_bf16(
3129// CHECK-NEXT: entry:
32- // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x bfloat> poison, bfloat [[V:%.*]], i32 0
33- // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x bfloat> [[VECINIT_I]], bfloat [[V]], i32 1
34- // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x bfloat> [[VECINIT1_I]], bfloat [[V]], i32 2
35- // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x bfloat> [[VECINIT2_I]], bfloat [[V]], i32 3
36- // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x bfloat> [[VECINIT3_I]], bfloat [[V]], i32 4
37- // CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x bfloat> [[VECINIT4_I]], bfloat [[V]], i32 5
38- // CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x bfloat> [[VECINIT5_I]], bfloat [[V]], i32 6
39- // CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x bfloat> [[VECINIT6_I]], bfloat [[V]], i32 7
30+ // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x bfloat> poison, bfloat [[V:%.*]], i64 0
31+ // CHECK-NEXT: [[VECINIT7_I:%.*]] = shufflevector <8 x bfloat> [[VECINIT_I]], <8 x bfloat> poison, <8 x i32> zeroinitializer
4032// CHECK-NEXT: ret <8 x bfloat> [[VECINIT7_I]]
4133//
4234bfloat16x8_t test_vdupq_n_bf16 (bfloat16_t v ) {
@@ -45,9 +37,7 @@ bfloat16x8_t test_vdupq_n_bf16(bfloat16_t v) {
4537
4638// CHECK-LABEL: @test_vdup_lane_bf16(
4739// CHECK-NEXT: entry:
48- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8>
49- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat>
50- // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
40+ // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[V:%.*]], <4 x bfloat> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
5141// CHECK-NEXT: ret <4 x bfloat> [[LANE]]
5242//
5343bfloat16x4_t test_vdup_lane_bf16 (bfloat16x4_t v ) {
@@ -56,9 +46,7 @@ bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) {
5646
5747// CHECK-LABEL: @test_vdupq_lane_bf16(
5848// CHECK-NEXT: entry:
59- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8>
60- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat>
61- // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
49+ // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[V:%.*]], <4 x bfloat> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
6250// CHECK-NEXT: ret <8 x bfloat> [[LANE]]
6351//
6452bfloat16x8_t test_vdupq_lane_bf16 (bfloat16x4_t v ) {
@@ -67,9 +55,7 @@ bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) {
6755
6856// CHECK-LABEL: @test_vdup_laneq_bf16(
6957// CHECK-NEXT: entry:
70- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8>
71- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat>
72- // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
58+ // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[V:%.*]], <8 x bfloat> poison, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
7359// CHECK-NEXT: ret <4 x bfloat> [[LANE]]
7460//
7561bfloat16x4_t test_vdup_laneq_bf16 (bfloat16x8_t v ) {
@@ -78,9 +64,7 @@ bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) {
7864
7965// CHECK-LABEL: @test_vdupq_laneq_bf16(
8066// CHECK-NEXT: entry:
81- // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8>
82- // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat>
83- // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
67+ // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[V:%.*]], <8 x bfloat> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
8468// CHECK-NEXT: ret <8 x bfloat> [[LANE]]
8569//
8670bfloat16x8_t test_vdupq_laneq_bf16 (bfloat16x8_t v ) {
@@ -98,7 +82,7 @@ bfloat16x8_t test_vcombine_bf16(bfloat16x4_t low, bfloat16x4_t high) {
9882
9983// CHECK-LABEL: @test_vget_high_bf16(
10084// CHECK-NEXT: entry:
101- // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]] , <4 x i32> <i32 4, i32 5, i32 6, i32 7>
85+ // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> poison , <4 x i32> <i32 4, i32 5, i32 6, i32 7>
10286// CHECK-NEXT: ret <4 x bfloat> [[SHUFFLE_I]]
10387//
10488bfloat16x4_t test_vget_high_bf16 (bfloat16x8_t a ) {
@@ -107,7 +91,7 @@ bfloat16x4_t test_vget_high_bf16(bfloat16x8_t a) {
10791
10892// CHECK-LABEL: @test_vget_low_bf16(
10993// CHECK-NEXT: entry:
110- // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]] , <4 x i32> <i32 0, i32 1, i32 2, i32 3>
94+ // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> poison , <4 x i32> <i32 0, i32 1, i32 2, i32 3>
11195// CHECK-NEXT: ret <4 x bfloat> [[SHUFFLE_I]]
11296//
11397bfloat16x4_t test_vget_low_bf16 (bfloat16x8_t a ) {
@@ -116,7 +100,7 @@ bfloat16x4_t test_vget_low_bf16(bfloat16x8_t a) {
116100
117101// CHECK-LABEL: @test_vget_lane_bf16(
118102// CHECK-NEXT: entry:
119- // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i32 1
103+ // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i64 1
120104// CHECK-NEXT: ret bfloat [[VGET_LANE]]
121105//
122106bfloat16_t test_vget_lane_bf16 (bfloat16x4_t v ) {
@@ -125,7 +109,7 @@ bfloat16_t test_vget_lane_bf16(bfloat16x4_t v) {
125109
126110// CHECK-LABEL: @test_vgetq_lane_bf16(
127111// CHECK-NEXT: entry:
128- // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i32 7
112+ // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i64 7
129113// CHECK-NEXT: ret bfloat [[VGETQ_LANE]]
130114//
131115bfloat16_t test_vgetq_lane_bf16 (bfloat16x8_t v ) {
@@ -134,7 +118,7 @@ bfloat16_t test_vgetq_lane_bf16(bfloat16x8_t v) {
134118
135119// CHECK-LABEL: @test_vset_lane_bf16(
136120// CHECK-NEXT: entry:
137- // CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i32 1
121+ // CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i64 1
138122// CHECK-NEXT: ret <4 x bfloat> [[VSET_LANE]]
139123//
140124bfloat16x4_t test_vset_lane_bf16 (bfloat16_t a , bfloat16x4_t v ) {
@@ -143,7 +127,7 @@ bfloat16x4_t test_vset_lane_bf16(bfloat16_t a, bfloat16x4_t v) {
143127
144128// CHECK-LABEL: @test_vsetq_lane_bf16(
145129// CHECK-NEXT: entry:
146- // CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i32 7
130+ // CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i64 7
147131// CHECK-NEXT: ret <8 x bfloat> [[VSET_LANE]]
148132//
149133bfloat16x8_t test_vsetq_lane_bf16 (bfloat16_t a , bfloat16x8_t v ) {
@@ -152,7 +136,7 @@ bfloat16x8_t test_vsetq_lane_bf16(bfloat16_t a, bfloat16x8_t v) {
152136
153137// CHECK-LABEL: @test_vduph_lane_bf16(
154138// CHECK-NEXT: entry:
155- // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i32 1
139+ // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i64 1
156140// CHECK-NEXT: ret bfloat [[VGET_LANE]]
157141//
158142bfloat16_t test_vduph_lane_bf16 (bfloat16x4_t v ) {
@@ -161,7 +145,7 @@ bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) {
161145
162146// CHECK-LABEL: @test_vduph_laneq_bf16(
163147// CHECK-NEXT: entry:
164- // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i32 7
148+ // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i64 7
165149// CHECK-NEXT: ret bfloat [[VGETQ_LANE]]
166150//
167151bfloat16_t test_vduph_laneq_bf16 (bfloat16x8_t v ) {
0 commit comments