Skip to content

Commit 0c0d26a

Browse files
committed
fixup! Clamp EVL1 when calculating StackPtr2.
1 parent f67d8dd commit 0c0d26a

File tree

2 files changed

+90
-78
lines changed

2 files changed

+90
-78
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3245,11 +3245,7 @@ void DAGTypeLegalizer::SplitVecRes_VP_SPLICE(SDNode *N, SDValue &Lo,
32453245
PtrInfo, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(),
32463246
Alignment);
32473247

3248-
unsigned EltWidth = VT.getScalarSizeInBits() / 8;
3249-
SDValue OffsetToV2 =
3250-
DAG.getNode(ISD::MUL, DL, PtrVT, DAG.getZExtOrTrunc(EVL1, DL, PtrVT),
3251-
DAG.getConstant(EltWidth, DL, PtrVT));
3252-
SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
3248+
SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, EVL1);
32533249

32543250
SDValue TrueMask = DAG.getBoolConstant(true, DL, Mask.getValueType(), VT);
32553251
SDValue StoreV1 = DAG.getStoreVP(DAG.getEntryNode(), DL, V1, StackPtr,
@@ -3266,9 +3262,11 @@ void DAGTypeLegalizer::SplitVecRes_VP_SPLICE(SDNode *N, SDValue &Lo,
32663262
Load = DAG.getLoadVP(VT, DL, StoreV2, StackPtr, Mask, EVL2, LoadMMO);
32673263
} else {
32683264
uint64_t TrailingElts = -Imm;
3265+
unsigned EltWidth = VT.getScalarSizeInBits() / 8;
32693266
SDValue TrailingBytes = DAG.getConstant(TrailingElts * EltWidth, DL, PtrVT);
32703267

32713268
// Make sure TrailingBytes doesn't exceed the size of vec1.
3269+
SDValue OffsetToV2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, StackPtr);
32723270
TrailingBytes =
32733271
DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, OffsetToV2);
32743272

llvm/test/CodeGen/RISCV/rvv/vp-splice.ll

Lines changed: 87 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -291,54 +291,61 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64(<vscale x 16 x i64> %va, <vs
291291
; CHECK-LABEL: test_vp_splice_nxv16i64:
292292
; CHECK: # %bb.0:
293293
; CHECK-NEXT: csrr a4, vlenb
294+
; CHECK-NEXT: slli a5, a4, 1
295+
; CHECK-NEXT: addi a5, a5, -1
294296
; CHECK-NEXT: slli a1, a4, 3
295-
; CHECK-NEXT: add a5, a0, a1
296-
; CHECK-NEXT: vl8re64.v v24, (a5)
297-
; CHECK-NEXT: mv a5, a2
298-
; CHECK-NEXT: bltu a2, a4, .LBB21_2
297+
; CHECK-NEXT: mv a7, a2
298+
; CHECK-NEXT: bltu a2, a5, .LBB21_2
299299
; CHECK-NEXT: # %bb.1:
300-
; CHECK-NEXT: mv a5, a4
300+
; CHECK-NEXT: mv a7, a5
301301
; CHECK-NEXT: .LBB21_2:
302302
; CHECK-NEXT: addi sp, sp, -80
303303
; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
304304
; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
305305
; CHECK-NEXT: addi s0, sp, 80
306-
; CHECK-NEXT: csrr a6, vlenb
307-
; CHECK-NEXT: slli a6, a6, 5
308-
; CHECK-NEXT: sub sp, sp, a6
306+
; CHECK-NEXT: csrr a5, vlenb
307+
; CHECK-NEXT: slli a5, a5, 5
308+
; CHECK-NEXT: sub sp, sp, a5
309309
; CHECK-NEXT: andi sp, sp, -64
310-
; CHECK-NEXT: vl8re64.v v0, (a0)
311-
; CHECK-NEXT: addi a0, sp, 64
312-
; CHECK-NEXT: sub a6, a2, a4
313-
; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma
314-
; CHECK-NEXT: vse64.v v8, (a0)
315-
; CHECK-NEXT: sltu a5, a2, a6
316-
; CHECK-NEXT: addi a5, a5, -1
317-
; CHECK-NEXT: and a5, a5, a6
318-
; CHECK-NEXT: add a6, a0, a1
319-
; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma
320-
; CHECK-NEXT: vse64.v v16, (a6)
321-
; CHECK-NEXT: sub a5, a3, a4
322-
; CHECK-NEXT: slli a2, a2, 3
323-
; CHECK-NEXT: sltu a6, a3, a5
324-
; CHECK-NEXT: add a2, a0, a2
325-
; CHECK-NEXT: addi a0, a6, -1
326-
; CHECK-NEXT: add a6, a2, a1
327-
; CHECK-NEXT: and a0, a0, a5
328-
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
329-
; CHECK-NEXT: vse64.v v24, (a6)
330-
; CHECK-NEXT: bltu a3, a4, .LBB21_4
310+
; CHECK-NEXT: add a5, a0, a1
311+
; CHECK-NEXT: slli a7, a7, 3
312+
; CHECK-NEXT: addi a6, sp, 64
313+
; CHECK-NEXT: mv t0, a2
314+
; CHECK-NEXT: bltu a2, a4, .LBB21_4
331315
; CHECK-NEXT: # %bb.3:
332-
; CHECK-NEXT: mv a3, a4
316+
; CHECK-NEXT: mv t0, a4
333317
; CHECK-NEXT: .LBB21_4:
334-
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
335-
; CHECK-NEXT: vse64.v v0, (a2)
336-
; CHECK-NEXT: addi a2, sp, 104
337-
; CHECK-NEXT: add a1, a2, a1
318+
; CHECK-NEXT: vl8re64.v v24, (a5)
319+
; CHECK-NEXT: add a5, a6, a7
320+
; CHECK-NEXT: vl8re64.v v0, (a0)
321+
; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma
322+
; CHECK-NEXT: vse64.v v8, (a6)
323+
; CHECK-NEXT: sub a0, a2, a4
324+
; CHECK-NEXT: sltu a2, a2, a0
325+
; CHECK-NEXT: addi a2, a2, -1
326+
; CHECK-NEXT: and a0, a2, a0
327+
; CHECK-NEXT: add a6, a6, a1
338328
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
329+
; CHECK-NEXT: vse64.v v16, (a6)
330+
; CHECK-NEXT: mv a0, a3
331+
; CHECK-NEXT: bltu a3, a4, .LBB21_6
332+
; CHECK-NEXT: # %bb.5:
333+
; CHECK-NEXT: mv a0, a4
334+
; CHECK-NEXT: .LBB21_6:
335+
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
336+
; CHECK-NEXT: vse64.v v0, (a5)
337+
; CHECK-NEXT: sub a2, a3, a4
338+
; CHECK-NEXT: add a5, a5, a1
339+
; CHECK-NEXT: sltu a3, a3, a2
340+
; CHECK-NEXT: addi a3, a3, -1
341+
; CHECK-NEXT: and a2, a3, a2
342+
; CHECK-NEXT: addi a3, sp, 104
343+
; CHECK-NEXT: add a1, a3, a1
344+
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
345+
; CHECK-NEXT: vse64.v v24, (a5)
339346
; CHECK-NEXT: vle64.v v16, (a1)
340-
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
341-
; CHECK-NEXT: vle64.v v8, (a2)
347+
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
348+
; CHECK-NEXT: vle64.v v8, (a3)
342349
; CHECK-NEXT: addi sp, s0, -80
343350
; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
344351
; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
@@ -351,14 +358,14 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64(<vscale x 16 x i64> %va, <vs
351358
define <vscale x 16 x i64> @test_vp_splice_nxv16i64_negative_offset(<vscale x 16 x i64> %va, <vscale x 16 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) nounwind {
352359
; CHECK-LABEL: test_vp_splice_nxv16i64_negative_offset:
353360
; CHECK: # %bb.0:
354-
; CHECK-NEXT: csrr a4, vlenb
355-
; CHECK-NEXT: slli a1, a4, 3
356-
; CHECK-NEXT: add a5, a0, a1
357-
; CHECK-NEXT: vl8re64.v v0, (a5)
358-
; CHECK-NEXT: mv a5, a2
359-
; CHECK-NEXT: bltu a2, a4, .LBB22_2
361+
; CHECK-NEXT: csrr a5, vlenb
362+
; CHECK-NEXT: slli a6, a5, 1
363+
; CHECK-NEXT: addi a6, a6, -1
364+
; CHECK-NEXT: slli a1, a5, 3
365+
; CHECK-NEXT: mv a4, a2
366+
; CHECK-NEXT: bltu a2, a6, .LBB22_2
360367
; CHECK-NEXT: # %bb.1:
361-
; CHECK-NEXT: mv a5, a4
368+
; CHECK-NEXT: mv a4, a6
362369
; CHECK-NEXT: .LBB22_2:
363370
; CHECK-NEXT: addi sp, sp, -80
364371
; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
@@ -368,43 +375,50 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64_negative_offset(<vscale x 16
368375
; CHECK-NEXT: slli a6, a6, 5
369376
; CHECK-NEXT: sub sp, sp, a6
370377
; CHECK-NEXT: andi sp, sp, -64
371-
; CHECK-NEXT: vl8re64.v v24, (a0)
372-
; CHECK-NEXT: addi a0, sp, 64
373-
; CHECK-NEXT: sub a6, a2, a4
374-
; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma
375-
; CHECK-NEXT: vse64.v v8, (a0)
376-
; CHECK-NEXT: sltu a5, a2, a6
377-
; CHECK-NEXT: addi a5, a5, -1
378-
; CHECK-NEXT: and a5, a5, a6
379378
; CHECK-NEXT: add a6, a0, a1
380-
; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma
381-
; CHECK-NEXT: vse64.v v16, (a6)
382-
; CHECK-NEXT: sub a6, a3, a4
383-
; CHECK-NEXT: slli a2, a2, 3
384-
; CHECK-NEXT: sltu a7, a3, a6
385-
; CHECK-NEXT: add a5, a0, a2
386-
; CHECK-NEXT: addi a7, a7, -1
387-
; CHECK-NEXT: and a0, a7, a6
388-
; CHECK-NEXT: add a6, a5, a1
389-
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
390-
; CHECK-NEXT: vse64.v v0, (a6)
391-
; CHECK-NEXT: bltu a3, a4, .LBB22_4
379+
; CHECK-NEXT: slli a4, a4, 3
380+
; CHECK-NEXT: addi a7, sp, 64
381+
; CHECK-NEXT: mv t0, a2
382+
; CHECK-NEXT: bltu a2, a5, .LBB22_4
392383
; CHECK-NEXT: # %bb.3:
393-
; CHECK-NEXT: mv a3, a4
384+
; CHECK-NEXT: mv t0, a5
394385
; CHECK-NEXT: .LBB22_4:
395-
; CHECK-NEXT: li a4, 8
396-
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
397-
; CHECK-NEXT: vse64.v v24, (a5)
398-
; CHECK-NEXT: bltu a2, a4, .LBB22_6
386+
; CHECK-NEXT: vl8re64.v v24, (a6)
387+
; CHECK-NEXT: add a6, a7, a4
388+
; CHECK-NEXT: vl8re64.v v0, (a0)
389+
; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma
390+
; CHECK-NEXT: vse64.v v8, (a7)
391+
; CHECK-NEXT: sub a0, a2, a5
392+
; CHECK-NEXT: sltu a2, a2, a0
393+
; CHECK-NEXT: addi a2, a2, -1
394+
; CHECK-NEXT: and a0, a2, a0
395+
; CHECK-NEXT: add a7, a7, a1
396+
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
397+
; CHECK-NEXT: vse64.v v16, (a7)
398+
; CHECK-NEXT: mv a0, a3
399+
; CHECK-NEXT: bltu a3, a5, .LBB22_6
399400
; CHECK-NEXT: # %bb.5:
400-
; CHECK-NEXT: li a2, 8
401+
; CHECK-NEXT: mv a0, a5
401402
; CHECK-NEXT: .LBB22_6:
402-
; CHECK-NEXT: sub a5, a5, a2
403-
; CHECK-NEXT: add a1, a5, a1
404403
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
404+
; CHECK-NEXT: vse64.v v0, (a6)
405+
; CHECK-NEXT: sub a2, a3, a5
406+
; CHECK-NEXT: add a5, a6, a1
407+
; CHECK-NEXT: sltu a3, a3, a2
408+
; CHECK-NEXT: addi a3, a3, -1
409+
; CHECK-NEXT: and a2, a3, a2
410+
; CHECK-NEXT: li a3, 8
411+
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
412+
; CHECK-NEXT: vse64.v v24, (a5)
413+
; CHECK-NEXT: bltu a4, a3, .LBB22_8
414+
; CHECK-NEXT: # %bb.7:
415+
; CHECK-NEXT: li a4, 8
416+
; CHECK-NEXT: .LBB22_8:
417+
; CHECK-NEXT: sub a2, a6, a4
418+
; CHECK-NEXT: add a1, a2, a1
405419
; CHECK-NEXT: vle64.v v16, (a1)
406-
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
407-
; CHECK-NEXT: vle64.v v8, (a5)
420+
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
421+
; CHECK-NEXT: vle64.v v8, (a2)
408422
; CHECK-NEXT: addi sp, s0, -80
409423
; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
410424
; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload

0 commit comments

Comments
 (0)