Skip to content

Commit 9a12fd4

Browse files
Fix address calculation when loading the result of an expanded vector.insert.
1 parent 2d9f32a commit 9a12fd4

File tree

2 files changed

+24
-32
lines changed

2 files changed

+24
-32
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7545,10 +7545,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
75457545
DAG.getMaskFromElementCount(DL, SubVT, OrigVT.getVectorElementCount());
75467546

75477547
// Overwrite the sub-vector at the required offset.
7548-
StackPtr =
7548+
SDValue SubVecPtr =
75497549
TLI.getVectorSubVecPointer(DAG, StackPtr, VT, OrigVT, N->getOperand(2));
7550-
Ch = DAG.getMaskedStore(Ch, DL, SubVec, StackPtr,
7551-
DAG.getUNDEF(StackPtr.getValueType()), Mask, VT,
7550+
Ch = DAG.getMaskedStore(Ch, DL, SubVec, SubVecPtr,
7551+
DAG.getUNDEF(SubVecPtr.getValueType()), Mask, VT,
75527552
StoreMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
75537553

75547554
// Read back the result.

llvm/test/CodeGen/AArch64/sve-insert-vector.ll

Lines changed: 21 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,14 +1352,13 @@ define <vscale x 4 x i32> @insert_nxv1i32_nxv4i32_1(<vscale x 4 x i32> %vec, <vs
13521352
; CHECK-NEXT: addvl sp, sp, #-1
13531353
; CHECK-NEXT: rdvl x8, #1
13541354
; CHECK-NEXT: mov x9, sp
1355-
; CHECK-NEXT: ptrue p1.b
1356-
; CHECK-NEXT: lsr x8, x8, #4
13571355
; CHECK-NEXT: str z0, [sp]
1356+
; CHECK-NEXT: lsr x8, x8, #4
13581357
; CHECK-NEXT: whilelo p0.s, xzr, x8
13591358
; CHECK-NEXT: cntw x8
1360-
; CHECK-NEXT: add x10, x9, x8
1361-
; CHECK-NEXT: st1w { z1.s }, p0, [x10]
1362-
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9, x8]
1359+
; CHECK-NEXT: add x8, x9, x8
1360+
; CHECK-NEXT: st1w { z1.s }, p0, [x8]
1361+
; CHECK-NEXT: ldr z0, [sp]
13631362
; CHECK-NEXT: addvl sp, sp, #1
13641363
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
13651364
; CHECK-NEXT: ret
@@ -1377,14 +1376,13 @@ define <vscale x 4 x i32> @insert_nxv1i32_nxv4i32_2(<vscale x 4 x i32> %vec, <vs
13771376
; CHECK-NEXT: addvl sp, sp, #-1
13781377
; CHECK-NEXT: rdvl x8, #1
13791378
; CHECK-NEXT: mov x9, sp
1380-
; CHECK-NEXT: ptrue p1.b
1381-
; CHECK-NEXT: lsr x8, x8, #4
13821379
; CHECK-NEXT: str z0, [sp]
1380+
; CHECK-NEXT: lsr x8, x8, #4
13831381
; CHECK-NEXT: whilelo p0.s, xzr, x8
13841382
; CHECK-NEXT: cnth x8
1385-
; CHECK-NEXT: add x10, x9, x8
1386-
; CHECK-NEXT: st1w { z1.s }, p0, [x10]
1387-
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9, x8]
1383+
; CHECK-NEXT: add x8, x9, x8
1384+
; CHECK-NEXT: st1w { z1.s }, p0, [x8]
1385+
; CHECK-NEXT: ldr z0, [sp]
13881386
; CHECK-NEXT: addvl sp, sp, #1
13891387
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
13901388
; CHECK-NEXT: ret
@@ -1402,14 +1400,13 @@ define <vscale x 4 x i32> @insert_nxv1i32_nxv4i32_3(<vscale x 4 x i32> %vec, <vs
14021400
; CHECK-NEXT: addvl sp, sp, #-1
14031401
; CHECK-NEXT: rdvl x8, #1
14041402
; CHECK-NEXT: mov x9, sp
1405-
; CHECK-NEXT: ptrue p1.b
1406-
; CHECK-NEXT: lsr x8, x8, #4
14071403
; CHECK-NEXT: str z0, [sp]
1404+
; CHECK-NEXT: lsr x8, x8, #4
14081405
; CHECK-NEXT: whilelo p0.s, xzr, x8
14091406
; CHECK-NEXT: cntw x8, all, mul #3
1410-
; CHECK-NEXT: add x10, x9, x8
1411-
; CHECK-NEXT: st1w { z1.s }, p0, [x10]
1412-
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9, x8]
1407+
; CHECK-NEXT: add x8, x9, x8
1408+
; CHECK-NEXT: st1w { z1.s }, p0, [x8]
1409+
; CHECK-NEXT: ldr z0, [sp]
14131410
; CHECK-NEXT: addvl sp, sp, #1
14141411
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
14151412
; CHECK-NEXT: ret
@@ -1456,7 +1453,7 @@ define <vscale x 2 x float> @insert_nxv1f32_nxv2f32_1(<vscale x 2 x float> %vec,
14561453
; CHECK-NEXT: cntw x8
14571454
; CHECK-NEXT: add x8, x9, x8
14581455
; CHECK-NEXT: st1w { z1.d }, p1, [x8]
1459-
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8]
1456+
; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
14601457
; CHECK-NEXT: addvl sp, sp, #1
14611458
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
14621459
; CHECK-NEXT: ret
@@ -1498,13 +1495,12 @@ define <vscale x 4 x float> @insert_nxv1f32_nxv4f32_1(<vscale x 4 x float> %vec,
14981495
; CHECK-NEXT: uunpklo z1.d, z1.s
14991496
; CHECK-NEXT: mov x9, sp
15001497
; CHECK-NEXT: lsr x8, x8, #4
1501-
; CHECK-NEXT: ptrue p1.b
15021498
; CHECK-NEXT: str z0, [sp]
15031499
; CHECK-NEXT: whilelo p0.d, xzr, x8
15041500
; CHECK-NEXT: cntw x8
1505-
; CHECK-NEXT: add x10, x9, x8
1506-
; CHECK-NEXT: st1w { z1.d }, p0, [x10]
1507-
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9, x8]
1501+
; CHECK-NEXT: add x8, x9, x8
1502+
; CHECK-NEXT: st1w { z1.d }, p0, [x8]
1503+
; CHECK-NEXT: ldr z0, [sp]
15081504
; CHECK-NEXT: addvl sp, sp, #1
15091505
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
15101506
; CHECK-NEXT: ret
@@ -1522,14 +1518,11 @@ define <vscale x 4 x float> @insert_nxv1f32_nxv4f32_2(<vscale x 4 x float> %vec,
15221518
; CHECK-NEXT: addvl sp, sp, #-1
15231519
; CHECK-NEXT: rdvl x8, #1
15241520
; CHECK-NEXT: uunpklo z1.d, z1.s
1525-
; CHECK-NEXT: ptrue p1.b
1526-
; CHECK-NEXT: lsr x8, x8, #4
15271521
; CHECK-NEXT: str z0, [sp]
1528-
; CHECK-NEXT: mov x9, sp
1522+
; CHECK-NEXT: lsr x8, x8, #4
15291523
; CHECK-NEXT: whilelo p0.d, xzr, x8
1530-
; CHECK-NEXT: cnth x8
15311524
; CHECK-NEXT: st1w { z1.d }, p0, [sp, #1, mul vl]
1532-
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9, x8]
1525+
; CHECK-NEXT: ldr z0, [sp]
15331526
; CHECK-NEXT: addvl sp, sp, #1
15341527
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
15351528
; CHECK-NEXT: ret
@@ -1549,13 +1542,12 @@ define <vscale x 4 x float> @insert_nxv1f32_nxv4f32_3(<vscale x 4 x float> %vec,
15491542
; CHECK-NEXT: uunpklo z1.d, z1.s
15501543
; CHECK-NEXT: mov x9, sp
15511544
; CHECK-NEXT: lsr x8, x8, #4
1552-
; CHECK-NEXT: ptrue p1.b
15531545
; CHECK-NEXT: str z0, [sp]
15541546
; CHECK-NEXT: whilelo p0.d, xzr, x8
15551547
; CHECK-NEXT: cntw x8, all, mul #3
1556-
; CHECK-NEXT: add x10, x9, x8
1557-
; CHECK-NEXT: st1w { z1.d }, p0, [x10]
1558-
; CHECK-NEXT: ld1b { z0.b }, p1/z, [x9, x8]
1548+
; CHECK-NEXT: add x8, x9, x8
1549+
; CHECK-NEXT: st1w { z1.d }, p0, [x8]
1550+
; CHECK-NEXT: ldr z0, [sp]
15591551
; CHECK-NEXT: addvl sp, sp, #1
15601552
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
15611553
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)