Skip to content

Commit 2512611

Browse files
authored
[RegAlloc] Add coverage leading to revert of pr160765 (#161614)
Essentially what happened is the following series of events: 1) We rematerialized the vmv.v.x into the loop. 2) As this was the last use of the instruction, we deleted the instruction, and removed it from the original live range. 3) We split the live range for the remat. 4) We tried to rematerialize the uses of that split interval, and crashed because the assert about the def being available in the original live interval does not hold.
1 parent c0a2bea commit 2512611

File tree

1 file changed

+132
-0
lines changed

1 file changed

+132
-0
lines changed

llvm/test/CodeGen/RISCV/rvv/remat.ll

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,3 +301,135 @@ define void @vfmv.s.f(ptr %p, double %x) {
301301
store volatile double %x, ptr %p
302302
ret void
303303
}
304+
305+
; This test is fairly fragile, but it's trying to cover the case which
306+
; caused the revert of bba9172 due to interaction with how rematerialize
307+
; instructions are pruned from the original live interval. In the result
308+
; below, we remat the vmv.v.x into the loop, but fail to remat the vmv.v.x
309+
; a second time after further splitting it's live range. We shouldn't need
310+
; to spill it to the stack at all.
311+
define i64 @dual_remat(i64 %0, <vscale x 16 x i64> %1, <vscale x 16 x i64> %2, ptr %p) #0 {
312+
; CHECK-LABEL: dual_remat:
313+
; CHECK: # %bb.0: # %entry
314+
; CHECK-NEXT: addi sp, sp, -16
315+
; CHECK-NEXT: .cfi_def_cfa_offset 16
316+
; CHECK-NEXT: csrr a1, vlenb
317+
; CHECK-NEXT: slli a2, a1, 5
318+
; CHECK-NEXT: add a1, a2, a1
319+
; CHECK-NEXT: sub sp, sp, a1
320+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x21, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 33 * vlenb
321+
; CHECK-NEXT: csrr a1, vlenb
322+
; CHECK-NEXT: slli a1, a1, 3
323+
; CHECK-NEXT: add a1, sp, a1
324+
; CHECK-NEXT: addi a1, a1, 16
325+
; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
326+
; CHECK-NEXT: addi a1, sp, 16
327+
; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
328+
; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
329+
; CHECK-NEXT: vmv.v.i v16, 0
330+
; CHECK-NEXT: csrr a2, vlenb
331+
; CHECK-NEXT: srli a1, a2, 3
332+
; CHECK-NEXT: slli a2, a2, 3
333+
; CHECK-NEXT: add a2, a3, a2
334+
; CHECK-NEXT: vmv.v.i v0, 0
335+
; CHECK-NEXT: .LBB8_1: # %vector.body
336+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
337+
; CHECK-NEXT: csrr a4, vlenb
338+
; CHECK-NEXT: mv a5, a4
339+
; CHECK-NEXT: slli a4, a4, 3
340+
; CHECK-NEXT: add a5, a5, a4
341+
; CHECK-NEXT: slli a4, a4, 1
342+
; CHECK-NEXT: add a4, a4, a5
343+
; CHECK-NEXT: add a4, sp, a4
344+
; CHECK-NEXT: addi a4, a4, 16
345+
; CHECK-NEXT: vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
346+
; CHECK-NEXT: vmv.v.x v8, a0
347+
; CHECK-NEXT: csrr a4, vlenb
348+
; CHECK-NEXT: slli a5, a4, 4
349+
; CHECK-NEXT: add a4, a5, a4
350+
; CHECK-NEXT: add a4, sp, a4
351+
; CHECK-NEXT: addi a4, a4, 16
352+
; CHECK-NEXT: vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
353+
; CHECK-NEXT: csrr a4, vlenb
354+
; CHECK-NEXT: mv a5, a4
355+
; CHECK-NEXT: slli a4, a4, 3
356+
; CHECK-NEXT: add a5, a5, a4
357+
; CHECK-NEXT: slli a4, a4, 1
358+
; CHECK-NEXT: add a4, a4, a5
359+
; CHECK-NEXT: add a4, sp, a4
360+
; CHECK-NEXT: addi a4, a4, 16
361+
; CHECK-NEXT: vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
362+
; CHECK-NEXT: vand.vv v16, v16, v8
363+
; CHECK-NEXT: vmsne.vi v24, v16, 0
364+
; CHECK-NEXT: csrr a4, vlenb
365+
; CHECK-NEXT: slli a4, a4, 4
366+
; CHECK-NEXT: add a4, sp, a4
367+
; CHECK-NEXT: addi a4, a4, 16
368+
; CHECK-NEXT: vs1r.v v24, (a4) # vscale x 8-byte Folded Spill
369+
; CHECK-NEXT: vand.vv v16, v0, v8
370+
; CHECK-NEXT: vmsne.vi v8, v16, 0
371+
; CHECK-NEXT: csrr a4, vlenb
372+
; CHECK-NEXT: mv a5, a4
373+
; CHECK-NEXT: slli a4, a4, 3
374+
; CHECK-NEXT: add a5, a5, a4
375+
; CHECK-NEXT: slli a4, a4, 1
376+
; CHECK-NEXT: add a4, a4, a5
377+
; CHECK-NEXT: add a4, sp, a4
378+
; CHECK-NEXT: addi a4, a4, 16
379+
; CHECK-NEXT: vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
380+
; CHECK-NEXT: csrr a4, vlenb
381+
; CHECK-NEXT: slli a4, a4, 4
382+
; CHECK-NEXT: add a4, sp, a4
383+
; CHECK-NEXT: addi a4, a4, 16
384+
; CHECK-NEXT: vl1r.v v9, (a4) # vscale x 8-byte Folded Reload
385+
; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
386+
; CHECK-NEXT: vslideup.vx v9, v8, a1
387+
; CHECK-NEXT: vsetvli a4, zero, e8, m2, ta, ma
388+
; CHECK-NEXT: vcpop.m a4, v9
389+
; CHECK-NEXT: csrr a5, vlenb
390+
; CHECK-NEXT: slli a6, a5, 4
391+
; CHECK-NEXT: add a5, a6, a5
392+
; CHECK-NEXT: add a5, sp, a5
393+
; CHECK-NEXT: addi a5, a5, 16
394+
; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
395+
; CHECK-NEXT: vs8r.v v8, (a3)
396+
; CHECK-NEXT: vs8r.v v8, (a2)
397+
; CHECK-NEXT: addi a5, sp, 16
398+
; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
399+
; CHECK-NEXT: vsetvli a5, zero, e64, m8, ta, ma
400+
; CHECK-NEXT: vor.vv v16, v16, v8
401+
; CHECK-NEXT: csrr a5, vlenb
402+
; CHECK-NEXT: slli a5, a5, 3
403+
; CHECK-NEXT: add a5, sp, a5
404+
; CHECK-NEXT: addi a5, a5, 16
405+
; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
406+
; CHECK-NEXT: vor.vv v0, v0, v8
407+
; CHECK-NEXT: beqz a4, .LBB8_1
408+
; CHECK-NEXT: # %bb.2: # %middle.block
409+
; CHECK-NEXT: andi a0, a0, 1
410+
; CHECK-NEXT: csrr a1, vlenb
411+
; CHECK-NEXT: slli a2, a1, 5
412+
; CHECK-NEXT: add a1, a2, a1
413+
; CHECK-NEXT: add sp, sp, a1
414+
; CHECK-NEXT: .cfi_def_cfa sp, 16
415+
; CHECK-NEXT: addi sp, sp, 16
416+
; CHECK-NEXT: .cfi_def_cfa_offset 0
417+
; CHECK-NEXT: ret
418+
entry:
419+
%broadcast.splatinsert = insertelement <vscale x 16 x i64> zeroinitializer, i64 %0, i64 0
420+
%broadcast.splat = shufflevector <vscale x 16 x i64> %broadcast.splatinsert, <vscale x 16 x i64> zeroinitializer, <vscale x 16 x i32> zeroinitializer
421+
br label %vector.body
422+
423+
vector.body: ; preds = %vector.body, %entry
424+
%vec.ind = phi <vscale x 16 x i64> [ zeroinitializer, %entry ], [ %vec.ind.next, %vector.body ]
425+
%3 = and <vscale x 16 x i64> %vec.ind, %broadcast.splat
426+
%4 = icmp ne <vscale x 16 x i64> %3, zeroinitializer
427+
store <vscale x 16 x i64> %broadcast.splat, ptr %p
428+
%5 = tail call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> %4)
429+
%vec.ind.next = or <vscale x 16 x i64> %vec.ind, %1
430+
br i1 %5, label %middle.block, label %vector.body
431+
432+
middle.block: ; preds = %vector.body
433+
%and.i = and i64 1, %0
434+
ret i64 %and.i
435+
}

0 commit comments

Comments
 (0)