@@ -137,35 +137,27 @@ define void @test_store_i8x2_unaligned(ptr %ptr, <2 x i8> %a) {
137137; O0-LABEL: test_store_i8x2_unaligned(
138138; O0: {
139139; O0-NEXT: .reg .b16 %rs<3>;
140- ; O0-NEXT: .reg .b32 %r<3 >;
140+ ; O0-NEXT: .reg .b32 %r<2 >;
141141; O0-NEXT: .reg .b64 %rd<2>;
142142; O0-EMPTY:
143143; O0-NEXT: // %bb.0:
144144; O0-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_param_0];
145145; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_store_i8x2_unaligned_param_1];
146146; O0-NEXT: mov.b32 %r1, {%rs1, %rs2};
147- ; O0-NEXT: st.b8 [%rd1], %r1;
148- ; O0-NEXT: shr.u32 %r2, %r1, 16;
149- ; O0-NEXT: st.b8 [%rd1+2], %r2;
150- ; O0-NEXT: st.b8 [%rd1+1], 0;
151- ; O0-NEXT: st.b8 [%rd1+3], 0;
147+ ; O0-NEXT: st.b8 [%rd1+1], %rs2;
148+ ; O0-NEXT: st.b8 [%rd1], %rs1;
152149; O0-NEXT: ret;
153150;
154151; O3-LABEL: test_store_i8x2_unaligned(
155152; O3: {
156153; O3-NEXT: .reg .b16 %rs<3>;
157- ; O3-NEXT: .reg .b32 %r<3>;
158154; O3-NEXT: .reg .b64 %rd<2>;
159155; O3-EMPTY:
160156; O3-NEXT: // %bb.0:
161157; O3-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_param_0];
162158; O3-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_store_i8x2_unaligned_param_1];
163- ; O3-NEXT: mov.b32 %r1, {%rs1, %rs2};
164- ; O3-NEXT: st.b8 [%rd1], %r1;
165- ; O3-NEXT: shr.u32 %r2, %r1, 16;
166- ; O3-NEXT: st.b8 [%rd1+2], %r2;
167- ; O3-NEXT: st.b8 [%rd1+3], 0;
168- ; O3-NEXT: st.b8 [%rd1+1], 0;
159+ ; O3-NEXT: st.b8 [%rd1+1], %rs2;
160+ ; O3-NEXT: st.b8 [%rd1], %rs1;
169161; O3-NEXT: ret;
170162 store <2 x i8 > %a , ptr %ptr , align 1
171163 ret void
@@ -178,9 +170,7 @@ define void @test_store_i8x2_unaligned_immediate(ptr %ptr) {
178170; O0-EMPTY:
179171; O0-NEXT: // %bb.0:
180172; O0-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_immediate_param_0];
181- ; O0-NEXT: st.b8 [%rd1+3], 0;
182- ; O0-NEXT: st.b8 [%rd1+2], 2;
183- ; O0-NEXT: st.b8 [%rd1+1], 0;
173+ ; O0-NEXT: st.b8 [%rd1+1], 2;
184174; O0-NEXT: st.b8 [%rd1], 1;
185175; O0-NEXT: ret;
186176;
@@ -190,9 +180,7 @@ define void @test_store_i8x2_unaligned_immediate(ptr %ptr) {
190180; O3-EMPTY:
191181; O3-NEXT: // %bb.0:
192182; O3-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_immediate_param_0];
193- ; O3-NEXT: st.b8 [%rd1+3], 0;
194- ; O3-NEXT: st.b8 [%rd1+2], 2;
195- ; O3-NEXT: st.b8 [%rd1+1], 0;
183+ ; O3-NEXT: st.b8 [%rd1+1], 2;
196184; O3-NEXT: st.b8 [%rd1], 1;
197185; O3-NEXT: ret;
198186 store <2 x i8 > <i8 1 , i8 2 >, ptr %ptr , align 1
@@ -202,46 +190,26 @@ define void @test_store_i8x2_unaligned_immediate(ptr %ptr) {
202190define i32 @test_zext_load_i8x2_unaligned (ptr %ptr ) {
203191; O0-LABEL: test_zext_load_i8x2_unaligned(
204192; O0: {
205- ; O0-NEXT: .local .align 2 .b8 __local_depot6[2];
206- ; O0-NEXT: .reg .b64 %SP;
207- ; O0-NEXT: .reg .b64 %SPL;
208- ; O0-NEXT: .reg .b16 %rs<5>;
209- ; O0-NEXT: .reg .b32 %r<2>;
193+ ; O0-NEXT: .reg .b16 %rs<3>;
210194; O0-NEXT: .reg .b64 %rd<2>;
211195; O0-EMPTY:
212196; O0-NEXT: // %bb.0:
213- ; O0-NEXT: mov.b64 %SPL, __local_depot6;
214- ; O0-NEXT: cvta.local.u64 %SP, %SPL;
215197; O0-NEXT: ld.param.b64 %rd1, [test_zext_load_i8x2_unaligned_param_0];
216- ; O0-NEXT: ld.b8 %rs1, [%rd1];
217- ; O0-NEXT: ld.b8 %rs2, [%rd1+1];
218- ; O0-NEXT: shl.b16 %rs3, %rs2, 8;
219- ; O0-NEXT: or.b16 %rs4, %rs3, %rs1;
220- ; O0-NEXT: st.b16 [%SP], %rs4;
221- ; O0-NEXT: ld.b16 %r1, [%SP];
222- ; O0-NEXT: st.param.b32 [func_retval0], %r1;
198+ ; O0-NEXT: ld.b8 %rs1, [%rd1+1];
199+ ; O0-NEXT: ld.b8 %rs2, [%rd1];
200+ ; O0-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1};
223201; O0-NEXT: ret;
224202;
225203; O3-LABEL: test_zext_load_i8x2_unaligned(
226204; O3: {
227- ; O3-NEXT: .local .align 2 .b8 __local_depot6[2];
228- ; O3-NEXT: .reg .b64 %SP;
229- ; O3-NEXT: .reg .b64 %SPL;
230- ; O3-NEXT: .reg .b16 %rs<5>;
231- ; O3-NEXT: .reg .b32 %r<2>;
205+ ; O3-NEXT: .reg .b16 %rs<3>;
232206; O3-NEXT: .reg .b64 %rd<2>;
233207; O3-EMPTY:
234208; O3-NEXT: // %bb.0:
235- ; O3-NEXT: mov.b64 %SPL, __local_depot6;
236- ; O3-NEXT: cvta.local.u64 %SP, %SPL;
237209; O3-NEXT: ld.param.b64 %rd1, [test_zext_load_i8x2_unaligned_param_0];
238- ; O3-NEXT: ld.b8 %rs1, [%rd1];
239- ; O3-NEXT: ld.b8 %rs2, [%rd1+1];
240- ; O3-NEXT: shl.b16 %rs3, %rs2, 8;
241- ; O3-NEXT: or.b16 %rs4, %rs3, %rs1;
242- ; O3-NEXT: st.b16 [%SP], %rs4;
243- ; O3-NEXT: ld.b16 %r1, [%SP];
244- ; O3-NEXT: st.param.b32 [func_retval0], %r1;
210+ ; O3-NEXT: ld.b8 %rs1, [%rd1+1];
211+ ; O3-NEXT: ld.b8 %rs2, [%rd1];
212+ ; O3-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1};
245213; O3-NEXT: ret;
246214 %a = load <2 x i8 >, ptr %ptr , align 1
247215 %b = zext <2 x i8 > %a to <2 x i16 >
@@ -252,46 +220,26 @@ define i32 @test_zext_load_i8x2_unaligned(ptr %ptr) {
252220define i32 @test_sext_load_i8x2_unaligned (ptr %ptr ) {
253221; O0-LABEL: test_sext_load_i8x2_unaligned(
254222; O0: {
255- ; O0-NEXT: .local .align 2 .b8 __local_depot7[2];
256- ; O0-NEXT: .reg .b64 %SP;
257- ; O0-NEXT: .reg .b64 %SPL;
258- ; O0-NEXT: .reg .b16 %rs<5>;
259- ; O0-NEXT: .reg .b32 %r<2>;
223+ ; O0-NEXT: .reg .b16 %rs<3>;
260224; O0-NEXT: .reg .b64 %rd<2>;
261225; O0-EMPTY:
262226; O0-NEXT: // %bb.0:
263- ; O0-NEXT: mov.b64 %SPL, __local_depot7;
264- ; O0-NEXT: cvta.local.u64 %SP, %SPL;
265227; O0-NEXT: ld.param.b64 %rd1, [test_sext_load_i8x2_unaligned_param_0];
266- ; O0-NEXT: ld.b8 %rs1, [%rd1];
267- ; O0-NEXT: ld.b8 %rs2, [%rd1+1];
268- ; O0-NEXT: shl.b16 %rs3, %rs2, 8;
269- ; O0-NEXT: or.b16 %rs4, %rs3, %rs1;
270- ; O0-NEXT: st.b16 [%SP], %rs4;
271- ; O0-NEXT: ld.s16 %r1, [%SP];
272- ; O0-NEXT: st.param.b32 [func_retval0], %r1;
228+ ; O0-NEXT: ld.s8 %rs1, [%rd1+1];
229+ ; O0-NEXT: ld.s8 %rs2, [%rd1];
230+ ; O0-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1};
273231; O0-NEXT: ret;
274232;
275233; O3-LABEL: test_sext_load_i8x2_unaligned(
276234; O3: {
277- ; O3-NEXT: .local .align 2 .b8 __local_depot7[2];
278- ; O3-NEXT: .reg .b64 %SP;
279- ; O3-NEXT: .reg .b64 %SPL;
280- ; O3-NEXT: .reg .b16 %rs<5>;
281- ; O3-NEXT: .reg .b32 %r<2>;
235+ ; O3-NEXT: .reg .b16 %rs<3>;
282236; O3-NEXT: .reg .b64 %rd<2>;
283237; O3-EMPTY:
284238; O3-NEXT: // %bb.0:
285- ; O3-NEXT: mov.b64 %SPL, __local_depot7;
286- ; O3-NEXT: cvta.local.u64 %SP, %SPL;
287239; O3-NEXT: ld.param.b64 %rd1, [test_sext_load_i8x2_unaligned_param_0];
288- ; O3-NEXT: ld.b8 %rs1, [%rd1];
289- ; O3-NEXT: ld.b8 %rs2, [%rd1+1];
290- ; O3-NEXT: shl.b16 %rs3, %rs2, 8;
291- ; O3-NEXT: or.b16 %rs4, %rs3, %rs1;
292- ; O3-NEXT: st.b16 [%SP], %rs4;
293- ; O3-NEXT: ld.s16 %r1, [%SP];
294- ; O3-NEXT: st.param.b32 [func_retval0], %r1;
240+ ; O3-NEXT: ld.s8 %rs1, [%rd1+1];
241+ ; O3-NEXT: ld.s8 %rs2, [%rd1];
242+ ; O3-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1};
295243; O3-NEXT: ret;
296244 %a = load <2 x i8 >, ptr %ptr , align 1
297245 %b = sext <2 x i8 > %a to <2 x i16 >
0 commit comments