@@ -301,3 +301,135 @@ define void @vfmv.s.f(ptr %p, double %x) {
301301  store  volatile  double  %x , ptr  %p 
302302  ret  void 
303303}
304+ 
305+ ; This test is fairly fragile, but it's trying to cover the case which 
306+ ; caused the revert of bba9172 due to interaction with how rematerialize 
307+ ; instructions are pruned from the original live interval.  In the result 
308+ ; below, we remat the vmv.v.x into the loop, but fail to remat the vmv.v.x 
309+ ; a second time after further splitting it's live range.  We shouldn't need 
310+ ; to spill it to the stack at all. 
311+ define  i64  @dual_remat (i64  %0 , <vscale x 16  x i64 > %1 , <vscale x 16  x i64 > %2 , ptr  %p ) #0  {
312+ ; CHECK-LABEL: dual_remat: 
313+ ; CHECK:       # %bb.0: # %entry 
314+ ; CHECK-NEXT:    addi sp, sp, -16 
315+ ; CHECK-NEXT:    .cfi_def_cfa_offset 16 
316+ ; CHECK-NEXT:    csrr a1, vlenb 
317+ ; CHECK-NEXT:    slli a2, a1, 5 
318+ ; CHECK-NEXT:    add a1, a2, a1 
319+ ; CHECK-NEXT:    sub sp, sp, a1 
320+ ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x21, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 33 * vlenb 
321+ ; CHECK-NEXT:    csrr a1, vlenb 
322+ ; CHECK-NEXT:    slli a1, a1, 3 
323+ ; CHECK-NEXT:    add a1, sp, a1 
324+ ; CHECK-NEXT:    addi a1, a1, 16 
325+ ; CHECK-NEXT:    vs8r.v v16, (a1) # vscale x 64-byte Folded Spill 
326+ ; CHECK-NEXT:    addi a1, sp, 16 
327+ ; CHECK-NEXT:    vs8r.v v8, (a1) # vscale x 64-byte Folded Spill 
328+ ; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma 
329+ ; CHECK-NEXT:    vmv.v.i v16, 0 
330+ ; CHECK-NEXT:    csrr a2, vlenb 
331+ ; CHECK-NEXT:    srli a1, a2, 3 
332+ ; CHECK-NEXT:    slli a2, a2, 3 
333+ ; CHECK-NEXT:    add a2, a3, a2 
334+ ; CHECK-NEXT:    vmv.v.i v0, 0 
335+ ; CHECK-NEXT:  .LBB8_1: # %vector.body 
336+ ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1 
337+ ; CHECK-NEXT:    csrr a4, vlenb 
338+ ; CHECK-NEXT:    mv a5, a4 
339+ ; CHECK-NEXT:    slli a4, a4, 3 
340+ ; CHECK-NEXT:    add a5, a5, a4 
341+ ; CHECK-NEXT:    slli a4, a4, 1 
342+ ; CHECK-NEXT:    add a4, a4, a5 
343+ ; CHECK-NEXT:    add a4, sp, a4 
344+ ; CHECK-NEXT:    addi a4, a4, 16 
345+ ; CHECK-NEXT:    vs8r.v v16, (a4) # vscale x 64-byte Folded Spill 
346+ ; CHECK-NEXT:    vmv.v.x v8, a0 
347+ ; CHECK-NEXT:    csrr a4, vlenb 
348+ ; CHECK-NEXT:    slli a5, a4, 4 
349+ ; CHECK-NEXT:    add a4, a5, a4 
350+ ; CHECK-NEXT:    add a4, sp, a4 
351+ ; CHECK-NEXT:    addi a4, a4, 16 
352+ ; CHECK-NEXT:    vs8r.v v8, (a4) # vscale x 64-byte Folded Spill 
353+ ; CHECK-NEXT:    csrr a4, vlenb 
354+ ; CHECK-NEXT:    mv a5, a4 
355+ ; CHECK-NEXT:    slli a4, a4, 3 
356+ ; CHECK-NEXT:    add a5, a5, a4 
357+ ; CHECK-NEXT:    slli a4, a4, 1 
358+ ; CHECK-NEXT:    add a4, a4, a5 
359+ ; CHECK-NEXT:    add a4, sp, a4 
360+ ; CHECK-NEXT:    addi a4, a4, 16 
361+ ; CHECK-NEXT:    vl8r.v v16, (a4) # vscale x 64-byte Folded Reload 
362+ ; CHECK-NEXT:    vand.vv v16, v16, v8 
363+ ; CHECK-NEXT:    vmsne.vi v24, v16, 0 
364+ ; CHECK-NEXT:    csrr a4, vlenb 
365+ ; CHECK-NEXT:    slli a4, a4, 4 
366+ ; CHECK-NEXT:    add a4, sp, a4 
367+ ; CHECK-NEXT:    addi a4, a4, 16 
368+ ; CHECK-NEXT:    vs1r.v v24, (a4) # vscale x 8-byte Folded Spill 
369+ ; CHECK-NEXT:    vand.vv v16, v0, v8 
370+ ; CHECK-NEXT:    vmsne.vi v8, v16, 0 
371+ ; CHECK-NEXT:    csrr a4, vlenb 
372+ ; CHECK-NEXT:    mv a5, a4 
373+ ; CHECK-NEXT:    slli a4, a4, 3 
374+ ; CHECK-NEXT:    add a5, a5, a4 
375+ ; CHECK-NEXT:    slli a4, a4, 1 
376+ ; CHECK-NEXT:    add a4, a4, a5 
377+ ; CHECK-NEXT:    add a4, sp, a4 
378+ ; CHECK-NEXT:    addi a4, a4, 16 
379+ ; CHECK-NEXT:    vl8r.v v16, (a4) # vscale x 64-byte Folded Reload 
380+ ; CHECK-NEXT:    csrr a4, vlenb 
381+ ; CHECK-NEXT:    slli a4, a4, 4 
382+ ; CHECK-NEXT:    add a4, sp, a4 
383+ ; CHECK-NEXT:    addi a4, a4, 16 
384+ ; CHECK-NEXT:    vl1r.v v9, (a4) # vscale x 8-byte Folded Reload 
385+ ; CHECK-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma 
386+ ; CHECK-NEXT:    vslideup.vx v9, v8, a1 
387+ ; CHECK-NEXT:    vsetvli a4, zero, e8, m2, ta, ma 
388+ ; CHECK-NEXT:    vcpop.m a4, v9 
389+ ; CHECK-NEXT:    csrr a5, vlenb 
390+ ; CHECK-NEXT:    slli a6, a5, 4 
391+ ; CHECK-NEXT:    add a5, a6, a5 
392+ ; CHECK-NEXT:    add a5, sp, a5 
393+ ; CHECK-NEXT:    addi a5, a5, 16 
394+ ; CHECK-NEXT:    vl8r.v v8, (a5) # vscale x 64-byte Folded Reload 
395+ ; CHECK-NEXT:    vs8r.v v8, (a3) 
396+ ; CHECK-NEXT:    vs8r.v v8, (a2) 
397+ ; CHECK-NEXT:    addi a5, sp, 16 
398+ ; CHECK-NEXT:    vl8r.v v8, (a5) # vscale x 64-byte Folded Reload 
399+ ; CHECK-NEXT:    vsetvli a5, zero, e64, m8, ta, ma 
400+ ; CHECK-NEXT:    vor.vv v16, v16, v8 
401+ ; CHECK-NEXT:    csrr a5, vlenb 
402+ ; CHECK-NEXT:    slli a5, a5, 3 
403+ ; CHECK-NEXT:    add a5, sp, a5 
404+ ; CHECK-NEXT:    addi a5, a5, 16 
405+ ; CHECK-NEXT:    vl8r.v v8, (a5) # vscale x 64-byte Folded Reload 
406+ ; CHECK-NEXT:    vor.vv v0, v0, v8 
407+ ; CHECK-NEXT:    beqz a4, .LBB8_1 
408+ ; CHECK-NEXT:  # %bb.2: # %middle.block 
409+ ; CHECK-NEXT:    andi a0, a0, 1 
410+ ; CHECK-NEXT:    csrr a1, vlenb 
411+ ; CHECK-NEXT:    slli a2, a1, 5 
412+ ; CHECK-NEXT:    add a1, a2, a1 
413+ ; CHECK-NEXT:    add sp, sp, a1 
414+ ; CHECK-NEXT:    .cfi_def_cfa sp, 16 
415+ ; CHECK-NEXT:    addi sp, sp, 16 
416+ ; CHECK-NEXT:    .cfi_def_cfa_offset 0 
417+ ; CHECK-NEXT:    ret 
418+ entry:
419+   %broadcast.splatinsert  = insertelement  <vscale x 16  x i64 > zeroinitializer , i64  %0 , i64  0 
420+   %broadcast.splat  = shufflevector  <vscale x 16  x i64 > %broadcast.splatinsert , <vscale x 16  x i64 > zeroinitializer , <vscale x 16  x i32 > zeroinitializer 
421+   br  label  %vector.body 
422+ 
423+ vector.body:                                      ; preds = %vector.body, %entry 
424+   %vec.ind  = phi  <vscale x 16  x i64 > [ zeroinitializer , %entry  ], [ %vec.ind.next , %vector.body  ]
425+   %3  = and  <vscale x 16  x i64 > %vec.ind , %broadcast.splat 
426+   %4  = icmp  ne  <vscale x 16  x i64 > %3 , zeroinitializer 
427+   store  <vscale x 16  x i64 > %broadcast.splat , ptr  %p 
428+   %5  = tail  call  i1  @llvm.vector.reduce.or.nxv16i1 (<vscale x 16  x i1 > %4 )
429+   %vec.ind.next  = or  <vscale x 16  x i64 > %vec.ind , %1 
430+   br  i1  %5 , label  %middle.block , label  %vector.body 
431+ 
432+ middle.block:                                     ; preds = %vector.body 
433+   %and.i  = and  i64  1 , %0 
434+   ret  i64  %and.i 
435+ }
0 commit comments