@@ -210,8 +210,8 @@ define <4 x i32> @all_dynamic(i32 %idx0, i32 %idx1, i32 %idx2, i32 %idx3) {
210210
211211; Test mixed constant and dynamic insertelts with high ratio of dynamic ones.
212212; Should lower all insertelts to stores.
213- define <4 x i32 > @mix_high_dynamic_ratio (i32 %idx0 , i32 %idx1 ) {
214- ; CHECK-LABEL: mix_high_dynamic_ratio (
213+ define <4 x i32 > @mix_dynamic_constant (i32 %idx0 , i32 %idx1 ) {
214+ ; CHECK-LABEL: mix_dynamic_constant (
215215; CHECK: {
216216; CHECK-NEXT: .local .align 4 .b8 __local_depot6[16];
217217; CHECK-NEXT: .reg .b64 %SP;
@@ -222,13 +222,13 @@ define <4 x i32> @mix_high_dynamic_ratio(i32 %idx0, i32 %idx1) {
222222; CHECK-NEXT: // %bb.0:
223223; CHECK-NEXT: mov.b64 %SPL, __local_depot6;
224224; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
225- ; CHECK-NEXT: ld.param.b32 %rd1, [mix_high_dynamic_ratio_param_0 ];
225+ ; CHECK-NEXT: ld.param.b32 %rd1, [mix_dynamic_constant_param_0 ];
226226; CHECK-NEXT: and.b64 %rd2, %rd1, 3;
227227; CHECK-NEXT: shl.b64 %rd3, %rd2, 2;
228228; CHECK-NEXT: add.u64 %rd4, %SP, 0;
229229; CHECK-NEXT: add.s64 %rd5, %rd4, %rd3;
230230; CHECK-NEXT: st.b32 [%rd5], 10;
231- ; CHECK-NEXT: ld.param.b32 %rd6, [mix_high_dynamic_ratio_param_1 ];
231+ ; CHECK-NEXT: ld.param.b32 %rd6, [mix_dynamic_constant_param_1 ];
232232; CHECK-NEXT: and.b64 %rd7, %rd6, 3;
233233; CHECK-NEXT: shl.b64 %rd8, %rd7, 2;
234234; CHECK-NEXT: add.s64 %rd9, %rd4, %rd8;
@@ -246,50 +246,18 @@ define <4 x i32> @mix_high_dynamic_ratio(i32 %idx0, i32 %idx1) {
246246 ret <4 x i32 > %v2
247247}
248248
249- ; Test mixed constant and dynamic insertelts with low ratio of dynamic ones.
250- ; Should handle dynamic insertelt individually.
251- define <4 x i32 > @mix_low_dynamic_ratio (i32 %idx ) {
252- ; CHECK-LABEL: mix_low_dynamic_ratio(
253- ; CHECK: {
254- ; CHECK-NEXT: .local .align 4 .b8 __local_depot7[16];
255- ; CHECK-NEXT: .reg .b64 %SP;
256- ; CHECK-NEXT: .reg .b64 %SPL;
257- ; CHECK-NEXT: .reg .b32 %r<3>;
258- ; CHECK-NEXT: .reg .b64 %rd<6>;
259- ; CHECK-EMPTY:
260- ; CHECK-NEXT: // %bb.0:
261- ; CHECK-NEXT: mov.b64 %SPL, __local_depot7;
262- ; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
263- ; CHECK-NEXT: ld.param.b32 %rd1, [mix_low_dynamic_ratio_param_0];
264- ; CHECK-NEXT: and.b64 %rd2, %rd1, 3;
265- ; CHECK-NEXT: shl.b64 %rd3, %rd2, 2;
266- ; CHECK-NEXT: add.u64 %rd4, %SP, 0;
267- ; CHECK-NEXT: add.s64 %rd5, %rd4, %rd3;
268- ; CHECK-NEXT: st.b32 [%SP], 10;
269- ; CHECK-NEXT: st.b32 [%rd5], 20;
270- ; CHECK-NEXT: ld.b32 %r1, [%SP+4];
271- ; CHECK-NEXT: ld.b32 %r2, [%SP];
272- ; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r2, %r1, 30, 40};
273- ; CHECK-NEXT: ret;
274- %v0 = insertelement <4 x i32 > poison, i32 10 , i32 0
275- %v1 = insertelement <4 x i32 > %v0 , i32 20 , i32 %idx
276- %v2 = insertelement <4 x i32 > %v1 , i32 30 , i32 2
277- %v3 = insertelement <4 x i32 > %v2 , i32 40 , i32 3
278- ret <4 x i32 > %v3
279- }
280-
281249; Test two separate chains that don't interfere
282250define void @two_separate_chains (i32 %idx0 , i32 %idx1 , ptr %out0 , ptr %out1 ) {
283251; CHECK-LABEL: two_separate_chains(
284252; CHECK: {
285- ; CHECK-NEXT: .local .align 4 .b8 __local_depot8 [32];
253+ ; CHECK-NEXT: .local .align 4 .b8 __local_depot7 [32];
286254; CHECK-NEXT: .reg .b64 %SP;
287255; CHECK-NEXT: .reg .b64 %SPL;
288256; CHECK-NEXT: .reg .b32 %r<7>;
289257; CHECK-NEXT: .reg .b64 %rd<13>;
290258; CHECK-EMPTY:
291259; CHECK-NEXT: // %bb.0:
292- ; CHECK-NEXT: mov.b64 %SPL, __local_depot8 ;
260+ ; CHECK-NEXT: mov.b64 %SPL, __local_depot7 ;
293261; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
294262; CHECK-NEXT: ld.param.b32 %rd1, [two_separate_chains_param_0];
295263; CHECK-NEXT: and.b64 %rd2, %rd1, 3;
@@ -331,14 +299,14 @@ define void @two_separate_chains(i32 %idx0, i32 %idx1, ptr %out0, ptr %out1) {
331299define void @overlapping_chains (i32 %idx0 , i32 %idx1 , ptr %out0 , ptr %out1 ) {
332300; CHECK-LABEL: overlapping_chains(
333301; CHECK: {
334- ; CHECK-NEXT: .local .align 4 .b8 __local_depot9 [32];
302+ ; CHECK-NEXT: .local .align 4 .b8 __local_depot8 [32];
335303; CHECK-NEXT: .reg .b64 %SP;
336304; CHECK-NEXT: .reg .b64 %SPL;
337305; CHECK-NEXT: .reg .b32 %r<7>;
338306; CHECK-NEXT: .reg .b64 %rd<14>;
339307; CHECK-EMPTY:
340308; CHECK-NEXT: // %bb.0:
341- ; CHECK-NEXT: mov.b64 %SPL, __local_depot9 ;
309+ ; CHECK-NEXT: mov.b64 %SPL, __local_depot8 ;
342310; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
343311; CHECK-NEXT: ld.param.b32 %rd1, [overlapping_chains_param_0];
344312; CHECK-NEXT: and.b64 %rd2, %rd1, 3;
0 commit comments