@@ -301,3 +301,135 @@ define void @vfmv.s.f(ptr %p, double %x) {
301
301
store volatile double %x , ptr %p
302
302
ret void
303
303
}
304
+
305
+ ; This test is fairly fragile, but it's trying to cover the case which
306
+ ; caused the revert of bba9172 due to interaction with how rematerialize
307
+ ; instructions are pruned from the original live interval. In the result
308
+ ; below, we remat the vmv.v.x into the loop, but fail to remat the vmv.v.x
309
+ ; a second time after further splitting it's live range. We shouldn't need
310
+ ; to spill it to the stack at all.
311
+ define i64 @dual_remat (i64 %0 , <vscale x 16 x i64 > %1 , <vscale x 16 x i64 > %2 , ptr %p ) #0 {
312
+ ; CHECK-LABEL: dual_remat:
313
+ ; CHECK: # %bb.0: # %entry
314
+ ; CHECK-NEXT: addi sp, sp, -16
315
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
316
+ ; CHECK-NEXT: csrr a1, vlenb
317
+ ; CHECK-NEXT: slli a2, a1, 5
318
+ ; CHECK-NEXT: add a1, a2, a1
319
+ ; CHECK-NEXT: sub sp, sp, a1
320
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x21, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 33 * vlenb
321
+ ; CHECK-NEXT: csrr a1, vlenb
322
+ ; CHECK-NEXT: slli a1, a1, 3
323
+ ; CHECK-NEXT: add a1, sp, a1
324
+ ; CHECK-NEXT: addi a1, a1, 16
325
+ ; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
326
+ ; CHECK-NEXT: addi a1, sp, 16
327
+ ; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
328
+ ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
329
+ ; CHECK-NEXT: vmv.v.i v16, 0
330
+ ; CHECK-NEXT: csrr a2, vlenb
331
+ ; CHECK-NEXT: srli a1, a2, 3
332
+ ; CHECK-NEXT: slli a2, a2, 3
333
+ ; CHECK-NEXT: add a2, a3, a2
334
+ ; CHECK-NEXT: vmv.v.i v0, 0
335
+ ; CHECK-NEXT: .LBB8_1: # %vector.body
336
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
337
+ ; CHECK-NEXT: csrr a4, vlenb
338
+ ; CHECK-NEXT: mv a5, a4
339
+ ; CHECK-NEXT: slli a4, a4, 3
340
+ ; CHECK-NEXT: add a5, a5, a4
341
+ ; CHECK-NEXT: slli a4, a4, 1
342
+ ; CHECK-NEXT: add a4, a4, a5
343
+ ; CHECK-NEXT: add a4, sp, a4
344
+ ; CHECK-NEXT: addi a4, a4, 16
345
+ ; CHECK-NEXT: vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
346
+ ; CHECK-NEXT: vmv.v.x v8, a0
347
+ ; CHECK-NEXT: csrr a4, vlenb
348
+ ; CHECK-NEXT: slli a5, a4, 4
349
+ ; CHECK-NEXT: add a4, a5, a4
350
+ ; CHECK-NEXT: add a4, sp, a4
351
+ ; CHECK-NEXT: addi a4, a4, 16
352
+ ; CHECK-NEXT: vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
353
+ ; CHECK-NEXT: csrr a4, vlenb
354
+ ; CHECK-NEXT: mv a5, a4
355
+ ; CHECK-NEXT: slli a4, a4, 3
356
+ ; CHECK-NEXT: add a5, a5, a4
357
+ ; CHECK-NEXT: slli a4, a4, 1
358
+ ; CHECK-NEXT: add a4, a4, a5
359
+ ; CHECK-NEXT: add a4, sp, a4
360
+ ; CHECK-NEXT: addi a4, a4, 16
361
+ ; CHECK-NEXT: vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
362
+ ; CHECK-NEXT: vand.vv v16, v16, v8
363
+ ; CHECK-NEXT: vmsne.vi v24, v16, 0
364
+ ; CHECK-NEXT: csrr a4, vlenb
365
+ ; CHECK-NEXT: slli a4, a4, 4
366
+ ; CHECK-NEXT: add a4, sp, a4
367
+ ; CHECK-NEXT: addi a4, a4, 16
368
+ ; CHECK-NEXT: vs1r.v v24, (a4) # vscale x 8-byte Folded Spill
369
+ ; CHECK-NEXT: vand.vv v16, v0, v8
370
+ ; CHECK-NEXT: vmsne.vi v8, v16, 0
371
+ ; CHECK-NEXT: csrr a4, vlenb
372
+ ; CHECK-NEXT: mv a5, a4
373
+ ; CHECK-NEXT: slli a4, a4, 3
374
+ ; CHECK-NEXT: add a5, a5, a4
375
+ ; CHECK-NEXT: slli a4, a4, 1
376
+ ; CHECK-NEXT: add a4, a4, a5
377
+ ; CHECK-NEXT: add a4, sp, a4
378
+ ; CHECK-NEXT: addi a4, a4, 16
379
+ ; CHECK-NEXT: vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
380
+ ; CHECK-NEXT: csrr a4, vlenb
381
+ ; CHECK-NEXT: slli a4, a4, 4
382
+ ; CHECK-NEXT: add a4, sp, a4
383
+ ; CHECK-NEXT: addi a4, a4, 16
384
+ ; CHECK-NEXT: vl1r.v v9, (a4) # vscale x 8-byte Folded Reload
385
+ ; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
386
+ ; CHECK-NEXT: vslideup.vx v9, v8, a1
387
+ ; CHECK-NEXT: vsetvli a4, zero, e8, m2, ta, ma
388
+ ; CHECK-NEXT: vcpop.m a4, v9
389
+ ; CHECK-NEXT: csrr a5, vlenb
390
+ ; CHECK-NEXT: slli a6, a5, 4
391
+ ; CHECK-NEXT: add a5, a6, a5
392
+ ; CHECK-NEXT: add a5, sp, a5
393
+ ; CHECK-NEXT: addi a5, a5, 16
394
+ ; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
395
+ ; CHECK-NEXT: vs8r.v v8, (a3)
396
+ ; CHECK-NEXT: vs8r.v v8, (a2)
397
+ ; CHECK-NEXT: addi a5, sp, 16
398
+ ; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
399
+ ; CHECK-NEXT: vsetvli a5, zero, e64, m8, ta, ma
400
+ ; CHECK-NEXT: vor.vv v16, v16, v8
401
+ ; CHECK-NEXT: csrr a5, vlenb
402
+ ; CHECK-NEXT: slli a5, a5, 3
403
+ ; CHECK-NEXT: add a5, sp, a5
404
+ ; CHECK-NEXT: addi a5, a5, 16
405
+ ; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
406
+ ; CHECK-NEXT: vor.vv v0, v0, v8
407
+ ; CHECK-NEXT: beqz a4, .LBB8_1
408
+ ; CHECK-NEXT: # %bb.2: # %middle.block
409
+ ; CHECK-NEXT: andi a0, a0, 1
410
+ ; CHECK-NEXT: csrr a1, vlenb
411
+ ; CHECK-NEXT: slli a2, a1, 5
412
+ ; CHECK-NEXT: add a1, a2, a1
413
+ ; CHECK-NEXT: add sp, sp, a1
414
+ ; CHECK-NEXT: .cfi_def_cfa sp, 16
415
+ ; CHECK-NEXT: addi sp, sp, 16
416
+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
417
+ ; CHECK-NEXT: ret
418
+ entry:
419
+ %broadcast.splatinsert = insertelement <vscale x 16 x i64 > zeroinitializer , i64 %0 , i64 0
420
+ %broadcast.splat = shufflevector <vscale x 16 x i64 > %broadcast.splatinsert , <vscale x 16 x i64 > zeroinitializer , <vscale x 16 x i32 > zeroinitializer
421
+ br label %vector.body
422
+
423
+ vector.body: ; preds = %vector.body, %entry
424
+ %vec.ind = phi <vscale x 16 x i64 > [ zeroinitializer , %entry ], [ %vec.ind.next , %vector.body ]
425
+ %3 = and <vscale x 16 x i64 > %vec.ind , %broadcast.splat
426
+ %4 = icmp ne <vscale x 16 x i64 > %3 , zeroinitializer
427
+ store <vscale x 16 x i64 > %broadcast.splat , ptr %p
428
+ %5 = tail call i1 @llvm.vector.reduce.or.nxv16i1 (<vscale x 16 x i1 > %4 )
429
+ %vec.ind.next = or <vscale x 16 x i64 > %vec.ind , %1
430
+ br i1 %5 , label %middle.block , label %vector.body
431
+
432
+ middle.block: ; preds = %vector.body
433
+ %and.i = and i64 1 , %0
434
+ ret i64 %and.i
435
+ }
0 commit comments