@@ -301,3 +301,135 @@ define void @vfmv.s.f(ptr %p, double %x) {
301301 store volatile double %x , ptr %p
302302 ret void
303303}
304+
305+ ; This test is fairly fragile, but it's trying to cover the case which
306+ ; caused the revert of bba9172 due to interaction with how rematerialize
307+ ; instructions are pruned from the original live interval. In the result
308+ ; below, we remat the vmv.v.x into the loop, but fail to remat the vmv.v.x
309+ ; a second time after further splitting it's live range. We shouldn't need
310+ ; to spill it to the stack at all.
311+ define i64 @dual_remat (i64 %0 , <vscale x 16 x i64 > %1 , <vscale x 16 x i64 > %2 , ptr %p ) #0 {
312+ ; CHECK-LABEL: dual_remat:
313+ ; CHECK: # %bb.0: # %entry
314+ ; CHECK-NEXT: addi sp, sp, -16
315+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
316+ ; CHECK-NEXT: csrr a1, vlenb
317+ ; CHECK-NEXT: slli a2, a1, 5
318+ ; CHECK-NEXT: add a1, a2, a1
319+ ; CHECK-NEXT: sub sp, sp, a1
320+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x21, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 33 * vlenb
321+ ; CHECK-NEXT: csrr a1, vlenb
322+ ; CHECK-NEXT: slli a1, a1, 3
323+ ; CHECK-NEXT: add a1, sp, a1
324+ ; CHECK-NEXT: addi a1, a1, 16
325+ ; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
326+ ; CHECK-NEXT: addi a1, sp, 16
327+ ; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
328+ ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
329+ ; CHECK-NEXT: vmv.v.i v16, 0
330+ ; CHECK-NEXT: csrr a2, vlenb
331+ ; CHECK-NEXT: srli a1, a2, 3
332+ ; CHECK-NEXT: slli a2, a2, 3
333+ ; CHECK-NEXT: add a2, a3, a2
334+ ; CHECK-NEXT: vmv.v.i v0, 0
335+ ; CHECK-NEXT: .LBB8_1: # %vector.body
336+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
337+ ; CHECK-NEXT: csrr a4, vlenb
338+ ; CHECK-NEXT: mv a5, a4
339+ ; CHECK-NEXT: slli a4, a4, 3
340+ ; CHECK-NEXT: add a5, a5, a4
341+ ; CHECK-NEXT: slli a4, a4, 1
342+ ; CHECK-NEXT: add a4, a4, a5
343+ ; CHECK-NEXT: add a4, sp, a4
344+ ; CHECK-NEXT: addi a4, a4, 16
345+ ; CHECK-NEXT: vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
346+ ; CHECK-NEXT: vmv.v.x v8, a0
347+ ; CHECK-NEXT: csrr a4, vlenb
348+ ; CHECK-NEXT: slli a5, a4, 4
349+ ; CHECK-NEXT: add a4, a5, a4
350+ ; CHECK-NEXT: add a4, sp, a4
351+ ; CHECK-NEXT: addi a4, a4, 16
352+ ; CHECK-NEXT: vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
353+ ; CHECK-NEXT: csrr a4, vlenb
354+ ; CHECK-NEXT: mv a5, a4
355+ ; CHECK-NEXT: slli a4, a4, 3
356+ ; CHECK-NEXT: add a5, a5, a4
357+ ; CHECK-NEXT: slli a4, a4, 1
358+ ; CHECK-NEXT: add a4, a4, a5
359+ ; CHECK-NEXT: add a4, sp, a4
360+ ; CHECK-NEXT: addi a4, a4, 16
361+ ; CHECK-NEXT: vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
362+ ; CHECK-NEXT: vand.vv v16, v16, v8
363+ ; CHECK-NEXT: vmsne.vi v24, v16, 0
364+ ; CHECK-NEXT: csrr a4, vlenb
365+ ; CHECK-NEXT: slli a4, a4, 4
366+ ; CHECK-NEXT: add a4, sp, a4
367+ ; CHECK-NEXT: addi a4, a4, 16
368+ ; CHECK-NEXT: vs1r.v v24, (a4) # vscale x 8-byte Folded Spill
369+ ; CHECK-NEXT: vand.vv v16, v0, v8
370+ ; CHECK-NEXT: vmsne.vi v8, v16, 0
371+ ; CHECK-NEXT: csrr a4, vlenb
372+ ; CHECK-NEXT: mv a5, a4
373+ ; CHECK-NEXT: slli a4, a4, 3
374+ ; CHECK-NEXT: add a5, a5, a4
375+ ; CHECK-NEXT: slli a4, a4, 1
376+ ; CHECK-NEXT: add a4, a4, a5
377+ ; CHECK-NEXT: add a4, sp, a4
378+ ; CHECK-NEXT: addi a4, a4, 16
379+ ; CHECK-NEXT: vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
380+ ; CHECK-NEXT: csrr a4, vlenb
381+ ; CHECK-NEXT: slli a4, a4, 4
382+ ; CHECK-NEXT: add a4, sp, a4
383+ ; CHECK-NEXT: addi a4, a4, 16
384+ ; CHECK-NEXT: vl1r.v v9, (a4) # vscale x 8-byte Folded Reload
385+ ; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
386+ ; CHECK-NEXT: vslideup.vx v9, v8, a1
387+ ; CHECK-NEXT: vsetvli a4, zero, e8, m2, ta, ma
388+ ; CHECK-NEXT: vcpop.m a4, v9
389+ ; CHECK-NEXT: csrr a5, vlenb
390+ ; CHECK-NEXT: slli a6, a5, 4
391+ ; CHECK-NEXT: add a5, a6, a5
392+ ; CHECK-NEXT: add a5, sp, a5
393+ ; CHECK-NEXT: addi a5, a5, 16
394+ ; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
395+ ; CHECK-NEXT: vs8r.v v8, (a3)
396+ ; CHECK-NEXT: vs8r.v v8, (a2)
397+ ; CHECK-NEXT: addi a5, sp, 16
398+ ; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
399+ ; CHECK-NEXT: vsetvli a5, zero, e64, m8, ta, ma
400+ ; CHECK-NEXT: vor.vv v16, v16, v8
401+ ; CHECK-NEXT: csrr a5, vlenb
402+ ; CHECK-NEXT: slli a5, a5, 3
403+ ; CHECK-NEXT: add a5, sp, a5
404+ ; CHECK-NEXT: addi a5, a5, 16
405+ ; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
406+ ; CHECK-NEXT: vor.vv v0, v0, v8
407+ ; CHECK-NEXT: beqz a4, .LBB8_1
408+ ; CHECK-NEXT: # %bb.2: # %middle.block
409+ ; CHECK-NEXT: andi a0, a0, 1
410+ ; CHECK-NEXT: csrr a1, vlenb
411+ ; CHECK-NEXT: slli a2, a1, 5
412+ ; CHECK-NEXT: add a1, a2, a1
413+ ; CHECK-NEXT: add sp, sp, a1
414+ ; CHECK-NEXT: .cfi_def_cfa sp, 16
415+ ; CHECK-NEXT: addi sp, sp, 16
416+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
417+ ; CHECK-NEXT: ret
418+ entry:
419+ %broadcast.splatinsert = insertelement <vscale x 16 x i64 > zeroinitializer , i64 %0 , i64 0
420+ %broadcast.splat = shufflevector <vscale x 16 x i64 > %broadcast.splatinsert , <vscale x 16 x i64 > zeroinitializer , <vscale x 16 x i32 > zeroinitializer
421+ br label %vector.body
422+
423+ vector.body: ; preds = %vector.body, %entry
424+ %vec.ind = phi <vscale x 16 x i64 > [ zeroinitializer , %entry ], [ %vec.ind.next , %vector.body ]
425+ %3 = and <vscale x 16 x i64 > %vec.ind , %broadcast.splat
426+ %4 = icmp ne <vscale x 16 x i64 > %3 , zeroinitializer
427+ store <vscale x 16 x i64 > %broadcast.splat , ptr %p
428+ %5 = tail call i1 @llvm.vector.reduce.or.nxv16i1 (<vscale x 16 x i1 > %4 )
429+ %vec.ind.next = or <vscale x 16 x i64 > %vec.ind , %1
430+ br i1 %5 , label %middle.block , label %vector.body
431+
432+ middle.block: ; preds = %vector.body
433+ %and.i = and i64 1 , %0
434+ ret i64 %and.i
435+ }
0 commit comments