@@ -66,6 +66,35 @@ define <16 x i8> @test_insert_v16i8_insert_2_undef_base(i8 %a) {
6666 %v.6 = insertelement <16 x i8 > %v.4 , i8 %a , i32 6
6767 %v.7 = insertelement <16 x i8 > %v.6 , i8 %a , i32 7
6868 %v.8 = insertelement <16 x i8 > %v.7 , i8 %a , i32 8
69+ %v.10 = insertelement <16 x i8 > %v.8 , i8 %a , i32 10
70+ %v.11 = insertelement <16 x i8 > %v.10 , i8 %a , i32 11
71+ %v.12 = insertelement <16 x i8 > %v.11 , i8 %a , i32 12
72+ %v.13 = insertelement <16 x i8 > %v.12 , i8 %a , i32 13
73+ %v.14 = insertelement <16 x i8 > %v.13 , i8 %a , i32 14
74+ %v.15 = insertelement <16 x i8 > %v.14 , i8 %a , i32 15
75+ ret <16 x i8 > %v.15
76+ }
77+
78+ ; Similar to above, but we leave element 8 as undef. One interesting part with
79+ ; this test case is that %a may be poison, so simply inserting %a also at
80+ ; index 8 would make the result vector more poisonous.
81+ define <16 x i8 > @test_insert_v16i8_insert_2_undef_base_skip8 (i32 %a0 ) {
82+ ; CHECK-LABEL: test_insert_v16i8_insert_2_undef_base_skip8:
83+ ; CHECK: // %bb.0:
84+ ; CHECK-NEXT: lsr w8, w0, #5
85+ ; CHECK-NEXT: dup.16b v0, w8
86+ ; CHECK-NEXT: mov.b v0[5], wzr
87+ ; CHECK-NEXT: mov.b v0[9], wzr
88+ ; CHECK-NEXT: ret
89+ %a1 = lshr exact i32 %a0 , 5
90+ %a = trunc i32 %a1 to i8
91+ %v.0 = insertelement <16 x i8 > <i8 undef , i8 undef , i8 undef , i8 undef , i8 undef , i8 0 , i8 undef , i8 undef , i8 undef , i8 0 , i8 undef , i8 undef , i8 undef , i8 undef , i8 undef , i8 undef > , i8 %a , i32 0
92+ %v.1 = insertelement <16 x i8 > %v.0 , i8 %a , i32 1
93+ %v.2 = insertelement <16 x i8 > %v.1 , i8 %a , i32 2
94+ %v.3 = insertelement <16 x i8 > %v.2 , i8 %a , i32 3
95+ %v.4 = insertelement <16 x i8 > %v.3 , i8 %a , i32 4
96+ %v.6 = insertelement <16 x i8 > %v.4 , i8 %a , i32 6
97+ %v.7 = insertelement <16 x i8 > %v.6 , i8 %a , i32 7
6998 %v.10 = insertelement <16 x i8 > %v.7 , i8 %a , i32 10
7099 %v.11 = insertelement <16 x i8 > %v.10 , i8 %a , i32 11
71100 %v.12 = insertelement <16 x i8 > %v.11 , i8 %a , i32 12
@@ -75,8 +104,8 @@ define <16 x i8> @test_insert_v16i8_insert_2_undef_base(i8 %a) {
75104 ret <16 x i8 > %v.15
76105}
77106
78- define <16 x i8 > @test_insert_v16i8_insert_2_undef_base_different_valeus (i8 %a , i8 %b ) {
79- ; CHECK-LABEL: test_insert_v16i8_insert_2_undef_base_different_valeus :
107+ define <16 x i8 > @test_insert_v16i8_insert_2_undef_base_different_values (i8 %a , i8 %b ) {
108+ ; CHECK-LABEL: test_insert_v16i8_insert_2_undef_base_different_values :
80109; CHECK: // %bb.0:
81110; CHECK-NEXT: dup.16b v0, w0
82111; CHECK-NEXT: mov.b v0[2], w1
@@ -94,6 +123,42 @@ define <16 x i8> @test_insert_v16i8_insert_2_undef_base_different_valeus(i8 %a,
94123 %v.6 = insertelement <16 x i8 > %v.4 , i8 %a , i32 6
95124 %v.7 = insertelement <16 x i8 > %v.6 , i8 %b , i32 7
96125 %v.8 = insertelement <16 x i8 > %v.7 , i8 %a , i32 8
126+ %v.10 = insertelement <16 x i8 > %v.8 , i8 %a , i32 10
127+ %v.11 = insertelement <16 x i8 > %v.10 , i8 %a , i32 11
128+ %v.12 = insertelement <16 x i8 > %v.11 , i8 %b , i32 12
129+ %v.13 = insertelement <16 x i8 > %v.12 , i8 %a , i32 13
130+ %v.14 = insertelement <16 x i8 > %v.13 , i8 %a , i32 14
131+ %v.15 = insertelement <16 x i8 > %v.14 , i8 %b , i32 15
132+ ret <16 x i8 > %v.15
133+ }
134+
135+ ; Similar to above, but we leave element 8 as undef. One interesting part with
136+ ; this test case is that %a and %b may be poison, so simply inserting %a or %b
137+ ; at index 8 would make the result vector more poisonous.
138+ define <16 x i8 > @test_insert_v16i8_insert_2_undef_base_different_values_skip8 (i32 %a0 , i32 %b0 ) {
139+ ; CHECK-LABEL: test_insert_v16i8_insert_2_undef_base_different_values_skip8:
140+ ; CHECK: // %bb.0:
141+ ; CHECK-NEXT: lsr w8, w0, #5
142+ ; CHECK-NEXT: dup.16b v0, w8
143+ ; CHECK-NEXT: lsr w8, w1, #5
144+ ; CHECK-NEXT: mov.b v0[2], w8
145+ ; CHECK-NEXT: mov.b v0[5], wzr
146+ ; CHECK-NEXT: mov.b v0[7], w8
147+ ; CHECK-NEXT: mov.b v0[9], wzr
148+ ; CHECK-NEXT: mov.b v0[12], w8
149+ ; CHECK-NEXT: mov.b v0[15], w8
150+ ; CHECK-NEXT: ret
151+ %a1 = lshr exact i32 %a0 , 5
152+ %a = trunc i32 %a1 to i8
153+ %b1 = lshr exact i32 %b0 , 5
154+ %b = trunc i32 %b1 to i8
155+ %v.0 = insertelement <16 x i8 > <i8 undef , i8 undef , i8 undef , i8 undef , i8 undef , i8 0 , i8 undef , i8 undef , i8 undef , i8 0 , i8 undef , i8 undef , i8 undef , i8 undef , i8 undef , i8 undef > , i8 %a , i32 0
156+ %v.1 = insertelement <16 x i8 > %v.0 , i8 %a , i32 1
157+ %v.2 = insertelement <16 x i8 > %v.1 , i8 %b , i32 2
158+ %v.3 = insertelement <16 x i8 > %v.2 , i8 %a , i32 3
159+ %v.4 = insertelement <16 x i8 > %v.3 , i8 %a , i32 4
160+ %v.6 = insertelement <16 x i8 > %v.4 , i8 %a , i32 6
161+ %v.7 = insertelement <16 x i8 > %v.6 , i8 %b , i32 7
97162 %v.10 = insertelement <16 x i8 > %v.7 , i8 %a , i32 10
98163 %v.11 = insertelement <16 x i8 > %v.10 , i8 %a , i32 11
99164 %v.12 = insertelement <16 x i8 > %v.11 , i8 %b , i32 12
0 commit comments