@@ -384,6 +384,103 @@ define <16 x i8> @vadd_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
384
384
ret <16 x i8 > %v
385
385
}
386
386
387
+ declare <256 x i8 > @llvm.vp.add.v258i8 (<256 x i8 >, <256 x i8 >, <256 x i1 >, i32 )
388
+
389
+ define <256 x i8 > @vadd_vi_v258i8 (<256 x i8 > %va , <256 x i1 > %m , i32 zeroext %evl ) {
390
+ ; CHECK-LABEL: vadd_vi_v258i8:
391
+ ; CHECK: # %bb.0:
392
+ ; CHECK-NEXT: addi a2, zero, 128
393
+ ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
394
+ ; CHECK-NEXT: vle1.v v25, (a0)
395
+ ; CHECK-NEXT: addi a0, a1, -128
396
+ ; CHECK-NEXT: vmv1r.v v26, v0
397
+ ; CHECK-NEXT: mv a3, zero
398
+ ; CHECK-NEXT: bltu a1, a0, .LBB30_2
399
+ ; CHECK-NEXT: # %bb.1:
400
+ ; CHECK-NEXT: mv a3, a0
401
+ ; CHECK-NEXT: .LBB30_2:
402
+ ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, mu
403
+ ; CHECK-NEXT: vmv1r.v v0, v25
404
+ ; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
405
+ ; CHECK-NEXT: bltu a1, a2, .LBB30_4
406
+ ; CHECK-NEXT: # %bb.3:
407
+ ; CHECK-NEXT: addi a1, zero, 128
408
+ ; CHECK-NEXT: .LBB30_4:
409
+ ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
410
+ ; CHECK-NEXT: vmv1r.v v0, v26
411
+ ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
412
+ ; CHECK-NEXT: ret
413
+ %elt.head = insertelement <256 x i8 > undef , i8 -1 , i32 0
414
+ %vb = shufflevector <256 x i8 > %elt.head , <256 x i8 > undef , <256 x i32 > zeroinitializer
415
+ %v = call <256 x i8 > @llvm.vp.add.v258i8 (<256 x i8 > %va , <256 x i8 > %vb , <256 x i1 > %m , i32 %evl )
416
+ ret <256 x i8 > %v
417
+ }
418
+
419
+ define <256 x i8 > @vadd_vi_v258i8_unmasked (<256 x i8 > %va , i32 zeroext %evl ) {
420
+ ; CHECK-LABEL: vadd_vi_v258i8_unmasked:
421
+ ; CHECK: # %bb.0:
422
+ ; CHECK-NEXT: addi a1, a0, -128
423
+ ; CHECK-NEXT: mv a2, zero
424
+ ; CHECK-NEXT: bltu a0, a1, .LBB31_2
425
+ ; CHECK-NEXT: # %bb.1:
426
+ ; CHECK-NEXT: mv a2, a1
427
+ ; CHECK-NEXT: .LBB31_2:
428
+ ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
429
+ ; CHECK-NEXT: addi a1, zero, 128
430
+ ; CHECK-NEXT: vadd.vi v16, v16, -1
431
+ ; CHECK-NEXT: bltu a0, a1, .LBB31_4
432
+ ; CHECK-NEXT: # %bb.3:
433
+ ; CHECK-NEXT: addi a0, zero, 128
434
+ ; CHECK-NEXT: .LBB31_4:
435
+ ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
436
+ ; CHECK-NEXT: vadd.vi v8, v8, -1
437
+ ; CHECK-NEXT: ret
438
+ %elt.head = insertelement <256 x i8 > undef , i8 -1 , i32 0
439
+ %vb = shufflevector <256 x i8 > %elt.head , <256 x i8 > undef , <256 x i32 > zeroinitializer
440
+ %head = insertelement <256 x i1 > undef , i1 true , i32 0
441
+ %m = shufflevector <256 x i1 > %head , <256 x i1 > undef , <256 x i32 > zeroinitializer
442
+ %v = call <256 x i8 > @llvm.vp.add.v258i8 (<256 x i8 > %va , <256 x i8 > %vb , <256 x i1 > %m , i32 %evl )
443
+ ret <256 x i8 > %v
444
+ }
445
+
446
+ ; Test splitting when the %evl is a known constant.
447
+
448
+ define <256 x i8 > @vadd_vi_v258i8_evl129 (<256 x i8 > %va , <256 x i1 > %m ) {
449
+ ; CHECK-LABEL: vadd_vi_v258i8_evl129:
450
+ ; CHECK: # %bb.0:
451
+ ; CHECK-NEXT: addi a1, zero, 128
452
+ ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
453
+ ; CHECK-NEXT: vle1.v v25, (a0)
454
+ ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
455
+ ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, mu
456
+ ; CHECK-NEXT: vmv1r.v v0, v25
457
+ ; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
458
+ ; CHECK-NEXT: ret
459
+ %elt.head = insertelement <256 x i8 > undef , i8 -1 , i32 0
460
+ %vb = shufflevector <256 x i8 > %elt.head , <256 x i8 > undef , <256 x i32 > zeroinitializer
461
+ %v = call <256 x i8 > @llvm.vp.add.v258i8 (<256 x i8 > %va , <256 x i8 > %vb , <256 x i1 > %m , i32 129 )
462
+ ret <256 x i8 > %v
463
+ }
464
+
465
+ ; FIXME: The upper half is doing nothing.
466
+
467
+ define <256 x i8 > @vadd_vi_v258i8_evl128 (<256 x i8 > %va , <256 x i1 > %m ) {
468
+ ; CHECK-LABEL: vadd_vi_v258i8_evl128:
469
+ ; CHECK: # %bb.0:
470
+ ; CHECK-NEXT: addi a1, zero, 128
471
+ ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
472
+ ; CHECK-NEXT: vle1.v v25, (a0)
473
+ ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
474
+ ; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, mu
475
+ ; CHECK-NEXT: vmv1r.v v0, v25
476
+ ; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
477
+ ; CHECK-NEXT: ret
478
+ %elt.head = insertelement <256 x i8 > undef , i8 -1 , i32 0
479
+ %vb = shufflevector <256 x i8 > %elt.head , <256 x i8 > undef , <256 x i32 > zeroinitializer
480
+ %v = call <256 x i8 > @llvm.vp.add.v258i8 (<256 x i8 > %va , <256 x i8 > %vb , <256 x i1 > %m , i32 128 )
481
+ ret <256 x i8 > %v
482
+ }
483
+
387
484
declare <2 x i16 > @llvm.vp.add.v2i16 (<2 x i16 >, <2 x i16 >, <2 x i1 >, i32 )
388
485
389
486
define <2 x i16 > @vadd_vv_v2i16 (<2 x i16 > %va , <2 x i16 > %b , <2 x i1 > %m , i32 zeroext %evl ) {
@@ -1407,3 +1504,176 @@ define <16 x i64> @vadd_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
1407
1504
%v = call <16 x i64 > @llvm.vp.add.v16i64 (<16 x i64 > %va , <16 x i64 > %vb , <16 x i1 > %m , i32 %evl )
1408
1505
ret <16 x i64 > %v
1409
1506
}
1507
+
1508
+ ; Test that split-legalization works as expected.
1509
+
1510
+ declare <32 x i64 > @llvm.vp.add.v32i64 (<32 x i64 >, <32 x i64 >, <32 x i1 >, i32 )
1511
+
1512
+ define <32 x i64 > @vadd_vx_v32i64 (<32 x i64 > %va , <32 x i1 > %m , i32 zeroext %evl ) {
1513
+ ; RV32-LABEL: vadd_vx_v32i64:
1514
+ ; RV32: # %bb.0:
1515
+ ; RV32-NEXT: mv a1, zero
1516
+ ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
1517
+ ; RV32-NEXT: vmv1r.v v1, v0
1518
+ ; RV32-NEXT: vslidedown.vi v0, v0, 2
1519
+ ; RV32-NEXT: addi a2, zero, 32
1520
+ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
1521
+ ; RV32-NEXT: addi a2, a0, -16
1522
+ ; RV32-NEXT: vmv.v.i v24, -1
1523
+ ; RV32-NEXT: bltu a0, a2, .LBB106_2
1524
+ ; RV32-NEXT: # %bb.1:
1525
+ ; RV32-NEXT: mv a1, a2
1526
+ ; RV32-NEXT: .LBB106_2:
1527
+ ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
1528
+ ; RV32-NEXT: addi a1, zero, 16
1529
+ ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
1530
+ ; RV32-NEXT: bltu a0, a1, .LBB106_4
1531
+ ; RV32-NEXT: # %bb.3:
1532
+ ; RV32-NEXT: addi a0, zero, 16
1533
+ ; RV32-NEXT: .LBB106_4:
1534
+ ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu
1535
+ ; RV32-NEXT: vmv1r.v v0, v1
1536
+ ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
1537
+ ; RV32-NEXT: ret
1538
+ ;
1539
+ ; RV64-LABEL: vadd_vx_v32i64:
1540
+ ; RV64: # %bb.0:
1541
+ ; RV64-NEXT: mv a1, zero
1542
+ ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
1543
+ ; RV64-NEXT: addi a2, a0, -16
1544
+ ; RV64-NEXT: vmv1r.v v25, v0
1545
+ ; RV64-NEXT: vslidedown.vi v0, v0, 2
1546
+ ; RV64-NEXT: bltu a0, a2, .LBB106_2
1547
+ ; RV64-NEXT: # %bb.1:
1548
+ ; RV64-NEXT: mv a1, a2
1549
+ ; RV64-NEXT: .LBB106_2:
1550
+ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
1551
+ ; RV64-NEXT: addi a1, zero, 16
1552
+ ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
1553
+ ; RV64-NEXT: bltu a0, a1, .LBB106_4
1554
+ ; RV64-NEXT: # %bb.3:
1555
+ ; RV64-NEXT: addi a0, zero, 16
1556
+ ; RV64-NEXT: .LBB106_4:
1557
+ ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu
1558
+ ; RV64-NEXT: vmv1r.v v0, v25
1559
+ ; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
1560
+ ; RV64-NEXT: ret
1561
+ %elt.head = insertelement <32 x i64 > undef , i64 -1 , i32 0
1562
+ %vb = shufflevector <32 x i64 > %elt.head , <32 x i64 > undef , <32 x i32 > zeroinitializer
1563
+ %v = call <32 x i64 > @llvm.vp.add.v32i64 (<32 x i64 > %va , <32 x i64 > %vb , <32 x i1 > %m , i32 %evl )
1564
+ ret <32 x i64 > %v
1565
+ }
1566
+
1567
+ define <32 x i64 > @vadd_vi_v32i64_unmasked (<32 x i64 > %va , i32 zeroext %evl ) {
1568
+ ; RV32-LABEL: vadd_vi_v32i64_unmasked:
1569
+ ; RV32: # %bb.0:
1570
+ ; RV32-NEXT: mv a1, zero
1571
+ ; RV32-NEXT: addi a2, zero, 32
1572
+ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
1573
+ ; RV32-NEXT: addi a2, a0, -16
1574
+ ; RV32-NEXT: vmv.v.i v24, -1
1575
+ ; RV32-NEXT: bltu a0, a2, .LBB107_2
1576
+ ; RV32-NEXT: # %bb.1:
1577
+ ; RV32-NEXT: mv a1, a2
1578
+ ; RV32-NEXT: .LBB107_2:
1579
+ ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
1580
+ ; RV32-NEXT: addi a1, zero, 16
1581
+ ; RV32-NEXT: vadd.vv v16, v16, v24
1582
+ ; RV32-NEXT: bltu a0, a1, .LBB107_4
1583
+ ; RV32-NEXT: # %bb.3:
1584
+ ; RV32-NEXT: addi a0, zero, 16
1585
+ ; RV32-NEXT: .LBB107_4:
1586
+ ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu
1587
+ ; RV32-NEXT: vadd.vv v8, v8, v24
1588
+ ; RV32-NEXT: ret
1589
+ ;
1590
+ ; RV64-LABEL: vadd_vi_v32i64_unmasked:
1591
+ ; RV64: # %bb.0:
1592
+ ; RV64-NEXT: addi a1, a0, -16
1593
+ ; RV64-NEXT: mv a2, zero
1594
+ ; RV64-NEXT: bltu a0, a1, .LBB107_2
1595
+ ; RV64-NEXT: # %bb.1:
1596
+ ; RV64-NEXT: mv a2, a1
1597
+ ; RV64-NEXT: .LBB107_2:
1598
+ ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu
1599
+ ; RV64-NEXT: addi a1, zero, 16
1600
+ ; RV64-NEXT: vadd.vi v16, v16, -1
1601
+ ; RV64-NEXT: bltu a0, a1, .LBB107_4
1602
+ ; RV64-NEXT: # %bb.3:
1603
+ ; RV64-NEXT: addi a0, zero, 16
1604
+ ; RV64-NEXT: .LBB107_4:
1605
+ ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu
1606
+ ; RV64-NEXT: vadd.vi v8, v8, -1
1607
+ ; RV64-NEXT: ret
1608
+ %elt.head = insertelement <32 x i64 > undef , i64 -1 , i32 0
1609
+ %vb = shufflevector <32 x i64 > %elt.head , <32 x i64 > undef , <32 x i32 > zeroinitializer
1610
+ %head = insertelement <32 x i1 > undef , i1 true , i32 0
1611
+ %m = shufflevector <32 x i1 > %head , <32 x i1 > undef , <32 x i32 > zeroinitializer
1612
+ %v = call <32 x i64 > @llvm.vp.add.v32i64 (<32 x i64 > %va , <32 x i64 > %vb , <32 x i1 > %m , i32 %evl )
1613
+ ret <32 x i64 > %v
1614
+ }
1615
+
1616
+ ; FIXME: After splitting, the "high" vadd.vv is doing nothing; could be
1617
+ ; replaced by undef.
1618
+
1619
+ define <32 x i64 > @vadd_vx_v32i64_evl12 (<32 x i64 > %va , <32 x i1 > %m ) {
1620
+ ; RV32-LABEL: vadd_vx_v32i64_evl12:
1621
+ ; RV32: # %bb.0:
1622
+ ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
1623
+ ; RV32-NEXT: vslidedown.vi v1, v0, 2
1624
+ ; RV32-NEXT: addi a0, zero, 32
1625
+ ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, mu
1626
+ ; RV32-NEXT: vmv.v.i v24, -1
1627
+ ; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, mu
1628
+ ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
1629
+ ; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, mu
1630
+ ; RV32-NEXT: vmv1r.v v0, v1
1631
+ ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
1632
+ ; RV32-NEXT: ret
1633
+ ;
1634
+ ; RV64-LABEL: vadd_vx_v32i64_evl12:
1635
+ ; RV64: # %bb.0:
1636
+ ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
1637
+ ; RV64-NEXT: vslidedown.vi v25, v0, 2
1638
+ ; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, mu
1639
+ ; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
1640
+ ; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, mu
1641
+ ; RV64-NEXT: vmv1r.v v0, v25
1642
+ ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
1643
+ ; RV64-NEXT: ret
1644
+ %elt.head = insertelement <32 x i64 > undef , i64 -1 , i32 0
1645
+ %vb = shufflevector <32 x i64 > %elt.head , <32 x i64 > undef , <32 x i32 > zeroinitializer
1646
+ %v = call <32 x i64 > @llvm.vp.add.v32i64 (<32 x i64 > %va , <32 x i64 > %vb , <32 x i1 > %m , i32 12 )
1647
+ ret <32 x i64 > %v
1648
+ }
1649
+
1650
+ define <32 x i64 > @vadd_vx_v32i64_evl27 (<32 x i64 > %va , <32 x i1 > %m ) {
1651
+ ; RV32-LABEL: vadd_vx_v32i64_evl27:
1652
+ ; RV32: # %bb.0:
1653
+ ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
1654
+ ; RV32-NEXT: vslidedown.vi v1, v0, 2
1655
+ ; RV32-NEXT: addi a0, zero, 32
1656
+ ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, mu
1657
+ ; RV32-NEXT: vmv.v.i v24, -1
1658
+ ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
1659
+ ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
1660
+ ; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, mu
1661
+ ; RV32-NEXT: vmv1r.v v0, v1
1662
+ ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t
1663
+ ; RV32-NEXT: ret
1664
+ ;
1665
+ ; RV64-LABEL: vadd_vx_v32i64_evl27:
1666
+ ; RV64: # %bb.0:
1667
+ ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
1668
+ ; RV64-NEXT: vslidedown.vi v25, v0, 2
1669
+ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
1670
+ ; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
1671
+ ; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, mu
1672
+ ; RV64-NEXT: vmv1r.v v0, v25
1673
+ ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
1674
+ ; RV64-NEXT: ret
1675
+ %elt.head = insertelement <32 x i64 > undef , i64 -1 , i32 0
1676
+ %vb = shufflevector <32 x i64 > %elt.head , <32 x i64 > undef , <32 x i32 > zeroinitializer
1677
+ %v = call <32 x i64 > @llvm.vp.add.v32i64 (<32 x i64 > %va , <32 x i64 > %vb , <32 x i1 > %m , i32 27 )
1678
+ ret <32 x i64 > %v
1679
+ }
0 commit comments