@@ -1352,14 +1352,13 @@ define <vscale x 4 x i32> @insert_nxv1i32_nxv4i32_1(<vscale x 4 x i32> %vec, <vs
13521352; CHECK-NEXT: addvl sp, sp, #-1
13531353; CHECK-NEXT: rdvl x8, #1
13541354; CHECK-NEXT: mov x9, sp
1355- ; CHECK-NEXT: ptrue p1.b
1356- ; CHECK-NEXT: lsr x8, x8, #4
13571355; CHECK-NEXT: str z0, [sp]
1356+ ; CHECK-NEXT: lsr x8, x8, #4
13581357; CHECK-NEXT: whilelo p0.s, xzr, x8
13591358; CHECK-NEXT: cntw x8
1360- ; CHECK-NEXT: add x10 , x9, x8
1361- ; CHECK-NEXT: st1w { z1.s }, p0, [x10 ]
1362- ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9, x8 ]
1359+ ; CHECK-NEXT: add x8 , x9, x8
1360+ ; CHECK-NEXT: st1w { z1.s }, p0, [x8 ]
1361+ ; CHECK-NEXT: ldr z0, [sp ]
13631362; CHECK-NEXT: addvl sp, sp, #1
13641363; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
13651364; CHECK-NEXT: ret
@@ -1377,14 +1376,13 @@ define <vscale x 4 x i32> @insert_nxv1i32_nxv4i32_2(<vscale x 4 x i32> %vec, <vs
13771376; CHECK-NEXT: addvl sp, sp, #-1
13781377; CHECK-NEXT: rdvl x8, #1
13791378; CHECK-NEXT: mov x9, sp
1380- ; CHECK-NEXT: ptrue p1.b
1381- ; CHECK-NEXT: lsr x8, x8, #4
13821379; CHECK-NEXT: str z0, [sp]
1380+ ; CHECK-NEXT: lsr x8, x8, #4
13831381; CHECK-NEXT: whilelo p0.s, xzr, x8
13841382; CHECK-NEXT: cnth x8
1385- ; CHECK-NEXT: add x10 , x9, x8
1386- ; CHECK-NEXT: st1w { z1.s }, p0, [x10 ]
1387- ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9, x8 ]
1383+ ; CHECK-NEXT: add x8 , x9, x8
1384+ ; CHECK-NEXT: st1w { z1.s }, p0, [x8 ]
1385+ ; CHECK-NEXT: ldr z0, [sp ]
13881386; CHECK-NEXT: addvl sp, sp, #1
13891387; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
13901388; CHECK-NEXT: ret
@@ -1402,14 +1400,13 @@ define <vscale x 4 x i32> @insert_nxv1i32_nxv4i32_3(<vscale x 4 x i32> %vec, <vs
14021400; CHECK-NEXT: addvl sp, sp, #-1
14031401; CHECK-NEXT: rdvl x8, #1
14041402; CHECK-NEXT: mov x9, sp
1405- ; CHECK-NEXT: ptrue p1.b
1406- ; CHECK-NEXT: lsr x8, x8, #4
14071403; CHECK-NEXT: str z0, [sp]
1404+ ; CHECK-NEXT: lsr x8, x8, #4
14081405; CHECK-NEXT: whilelo p0.s, xzr, x8
14091406; CHECK-NEXT: cntw x8, all, mul #3
1410- ; CHECK-NEXT: add x10 , x9, x8
1411- ; CHECK-NEXT: st1w { z1.s }, p0, [x10 ]
1412- ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9, x8 ]
1407+ ; CHECK-NEXT: add x8 , x9, x8
1408+ ; CHECK-NEXT: st1w { z1.s }, p0, [x8 ]
1409+ ; CHECK-NEXT: ldr z0, [sp ]
14131410; CHECK-NEXT: addvl sp, sp, #1
14141411; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
14151412; CHECK-NEXT: ret
@@ -1456,7 +1453,7 @@ define <vscale x 2 x float> @insert_nxv1f32_nxv2f32_1(<vscale x 2 x float> %vec,
14561453; CHECK-NEXT: cntw x8
14571454; CHECK-NEXT: add x8, x9, x8
14581455; CHECK-NEXT: st1w { z1.d }, p1, [x8]
1459- ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8 ]
1456+ ; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl ]
14601457; CHECK-NEXT: addvl sp, sp, #1
14611458; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
14621459; CHECK-NEXT: ret
@@ -1498,13 +1495,12 @@ define <vscale x 4 x float> @insert_nxv1f32_nxv4f32_1(<vscale x 4 x float> %vec,
14981495; CHECK-NEXT: uunpklo z1.d, z1.s
14991496; CHECK-NEXT: mov x9, sp
15001497; CHECK-NEXT: lsr x8, x8, #4
1501- ; CHECK-NEXT: ptrue p1.b
15021498; CHECK-NEXT: str z0, [sp]
15031499; CHECK-NEXT: whilelo p0.d, xzr, x8
15041500; CHECK-NEXT: cntw x8
1505- ; CHECK-NEXT: add x10 , x9, x8
1506- ; CHECK-NEXT: st1w { z1.d }, p0, [x10 ]
1507- ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9, x8 ]
1501+ ; CHECK-NEXT: add x8 , x9, x8
1502+ ; CHECK-NEXT: st1w { z1.d }, p0, [x8 ]
1503+ ; CHECK-NEXT: ldr z0, [sp ]
15081504; CHECK-NEXT: addvl sp, sp, #1
15091505; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
15101506; CHECK-NEXT: ret
@@ -1522,14 +1518,11 @@ define <vscale x 4 x float> @insert_nxv1f32_nxv4f32_2(<vscale x 4 x float> %vec,
15221518; CHECK-NEXT: addvl sp, sp, #-1
15231519; CHECK-NEXT: rdvl x8, #1
15241520; CHECK-NEXT: uunpklo z1.d, z1.s
1525- ; CHECK-NEXT: ptrue p1.b
1526- ; CHECK-NEXT: lsr x8, x8, #4
15271521; CHECK-NEXT: str z0, [sp]
1528- ; CHECK-NEXT: mov x9, sp
1522+ ; CHECK-NEXT: lsr x8, x8, #4
15291523; CHECK-NEXT: whilelo p0.d, xzr, x8
1530- ; CHECK-NEXT: cnth x8
15311524; CHECK-NEXT: st1w { z1.d }, p0, [sp, #1, mul vl]
1532- ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9, x8 ]
1525+ ; CHECK-NEXT: ldr z0, [sp ]
15331526; CHECK-NEXT: addvl sp, sp, #1
15341527; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
15351528; CHECK-NEXT: ret
@@ -1549,13 +1542,12 @@ define <vscale x 4 x float> @insert_nxv1f32_nxv4f32_3(<vscale x 4 x float> %vec,
15491542; CHECK-NEXT: uunpklo z1.d, z1.s
15501543; CHECK-NEXT: mov x9, sp
15511544; CHECK-NEXT: lsr x8, x8, #4
1552- ; CHECK-NEXT: ptrue p1.b
15531545; CHECK-NEXT: str z0, [sp]
15541546; CHECK-NEXT: whilelo p0.d, xzr, x8
15551547; CHECK-NEXT: cntw x8, all, mul #3
1556- ; CHECK-NEXT: add x10 , x9, x8
1557- ; CHECK-NEXT: st1w { z1.d }, p0, [x10 ]
1558- ; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9, x8 ]
1548+ ; CHECK-NEXT: add x8 , x9, x8
1549+ ; CHECK-NEXT: st1w { z1.d }, p0, [x8 ]
1550+ ; CHECK-NEXT: ldr z0, [sp ]
15591551; CHECK-NEXT: addvl sp, sp, #1
15601552; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
15611553; CHECK-NEXT: ret
0 commit comments