Skip to content

Commit 6b16b31

Browse files
authored
[llvm][RISCV] Support P extension CodeGen (#167882)
This patch support PADD_W, PSUB_W, PSADD_W, PSADDU_W, PSSUB_W, PSSUBU_W, PAADD_W and PAADDU_W
1 parent 6a80728 commit 6b16b31

File tree

2 files changed

+190
-0
lines changed

2 files changed

+190
-0
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoP.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1539,6 +1539,20 @@ let Predicates = [HasStdExtP, IsRV64] in {
15391539
// 32-bit PLI SD node pattern
15401540
def: Pat<(v2i32 (riscv_pli simm10:$imm10)), (PLI_W simm10:$imm10)>;
15411541

1542+
// Basic 32-bit arithmetic patterns
1543+
def: Pat<(v2i32 (add GPR:$rs1, GPR:$rs2)), (PADD_W GPR:$rs1, GPR:$rs2)>;
1544+
def: Pat<(v2i32 (sub GPR:$rs1, GPR:$rs2)), (PSUB_W GPR:$rs1, GPR:$rs2)>;
1545+
1546+
// 32-bit saturating add/sub patterns
1547+
def: Pat<(v2i32 (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_W GPR:$rs1, GPR:$rs2)>;
1548+
def: Pat<(v2i32 (uaddsat GPR:$rs1, GPR:$rs2)), (PSADDU_W GPR:$rs1, GPR:$rs2)>;
1549+
def: Pat<(v2i32 (ssubsat GPR:$rs1, GPR:$rs2)), (PSSUB_W GPR:$rs1, GPR:$rs2)>;
1550+
def: Pat<(v2i32 (usubsat GPR:$rs1, GPR:$rs2)), (PSSUBU_W GPR:$rs1, GPR:$rs2)>;
1551+
1552+
// 32-bit averaging patterns
1553+
def: Pat<(v2i32 (avgfloors GPR:$rs1, GPR:$rs2)), (PAADD_W GPR:$rs1, GPR:$rs2)>;
1554+
def: Pat<(v2i32 (avgflooru GPR:$rs1, GPR:$rs2)), (PAADDU_W GPR:$rs1, GPR:$rs2)>;
1555+
15421556
// 32-bit averaging-sub patterns
15431557
def: Pat<(v2i32 (riscv_pasub GPR:$rs1, GPR:$rs2)), (PASUB_W GPR:$rs1, GPR:$rs2)>;
15441558
def: Pat<(v2i32 (riscv_pasubu GPR:$rs1, GPR:$rs2)), (PASUBU_W GPR:$rs1, GPR:$rs2)>;

llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,182 @@ define void @test_extract_vector_32(ptr %ret_ptr, ptr %a_ptr) {
495495
ret void
496496
}
497497

498+
; Test basic add/sub operations for v2i32 (RV64 only)
499+
define void @test_padd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
500+
; CHECK-LABEL: test_padd_w:
501+
; CHECK: # %bb.0:
502+
; CHECK-NEXT: ld a1, 0(a1)
503+
; CHECK-NEXT: ld a2, 0(a2)
504+
; CHECK-NEXT: padd.w a1, a1, a2
505+
; CHECK-NEXT: sd a1, 0(a0)
506+
; CHECK-NEXT: ret
507+
%a = load <2 x i32>, ptr %a_ptr
508+
%b = load <2 x i32>, ptr %b_ptr
509+
%res = add <2 x i32> %a, %b
510+
store <2 x i32> %res, ptr %ret_ptr
511+
ret void
512+
}
513+
514+
define void @test_psub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
515+
; CHECK-LABEL: test_psub_w:
516+
; CHECK: # %bb.0:
517+
; CHECK-NEXT: ld a1, 0(a1)
518+
; CHECK-NEXT: ld a2, 0(a2)
519+
; CHECK-NEXT: psub.w a1, a1, a2
520+
; CHECK-NEXT: sd a1, 0(a0)
521+
; CHECK-NEXT: ret
522+
%a = load <2 x i32>, ptr %a_ptr
523+
%b = load <2 x i32>, ptr %b_ptr
524+
%res = sub <2 x i32> %a, %b
525+
store <2 x i32> %res, ptr %ret_ptr
526+
ret void
527+
}
528+
529+
; Test saturating add operations for v2i32 (RV64 only)
530+
define void @test_psadd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
531+
; CHECK-LABEL: test_psadd_w:
532+
; CHECK: # %bb.0:
533+
; CHECK-NEXT: ld a1, 0(a1)
534+
; CHECK-NEXT: ld a2, 0(a2)
535+
; CHECK-NEXT: psadd.w a1, a1, a2
536+
; CHECK-NEXT: sd a1, 0(a0)
537+
; CHECK-NEXT: ret
538+
%a = load <2 x i32>, ptr %a_ptr
539+
%b = load <2 x i32>, ptr %b_ptr
540+
%res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
541+
store <2 x i32> %res, ptr %ret_ptr
542+
ret void
543+
}
544+
545+
define void @test_psaddu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
546+
; CHECK-LABEL: test_psaddu_w:
547+
; CHECK: # %bb.0:
548+
; CHECK-NEXT: ld a1, 0(a1)
549+
; CHECK-NEXT: ld a2, 0(a2)
550+
; CHECK-NEXT: psaddu.w a1, a1, a2
551+
; CHECK-NEXT: sd a1, 0(a0)
552+
; CHECK-NEXT: ret
553+
%a = load <2 x i32>, ptr %a_ptr
554+
%b = load <2 x i32>, ptr %b_ptr
555+
%res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
556+
store <2 x i32> %res, ptr %ret_ptr
557+
ret void
558+
}
559+
560+
; Test saturating sub operations for v2i32 (RV64 only)
561+
define void @test_pssub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
562+
; CHECK-LABEL: test_pssub_w:
563+
; CHECK: # %bb.0:
564+
; CHECK-NEXT: ld a1, 0(a1)
565+
; CHECK-NEXT: ld a2, 0(a2)
566+
; CHECK-NEXT: pssub.w a1, a1, a2
567+
; CHECK-NEXT: sd a1, 0(a0)
568+
; CHECK-NEXT: ret
569+
%a = load <2 x i32>, ptr %a_ptr
570+
%b = load <2 x i32>, ptr %b_ptr
571+
%res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
572+
store <2 x i32> %res, ptr %ret_ptr
573+
ret void
574+
}
575+
576+
define void @test_pssubu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
577+
; CHECK-LABEL: test_pssubu_w:
578+
; CHECK: # %bb.0:
579+
; CHECK-NEXT: ld a1, 0(a1)
580+
; CHECK-NEXT: ld a2, 0(a2)
581+
; CHECK-NEXT: pssubu.w a1, a1, a2
582+
; CHECK-NEXT: sd a1, 0(a0)
583+
; CHECK-NEXT: ret
584+
%a = load <2 x i32>, ptr %a_ptr
585+
%b = load <2 x i32>, ptr %b_ptr
586+
%res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
587+
store <2 x i32> %res, ptr %ret_ptr
588+
ret void
589+
}
590+
591+
; Test averaging floor signed operations for v2i32 (RV64 only)
592+
; avgfloors pattern: (a + b) arithmetic shift right 1
593+
define void @test_paadd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
594+
; CHECK-LABEL: test_paadd_w:
595+
; CHECK: # %bb.0:
596+
; CHECK-NEXT: ld a1, 0(a1)
597+
; CHECK-NEXT: ld a2, 0(a2)
598+
; CHECK-NEXT: paadd.w a1, a1, a2
599+
; CHECK-NEXT: sd a1, 0(a0)
600+
; CHECK-NEXT: ret
601+
%a = load <2 x i32>, ptr %a_ptr
602+
%b = load <2 x i32>, ptr %b_ptr
603+
%ext.a = sext <2 x i32> %a to <2 x i64>
604+
%ext.b = sext <2 x i32> %b to <2 x i64>
605+
%add = add nsw <2 x i64> %ext.a, %ext.b
606+
%shift = ashr <2 x i64> %add, <i64 1, i64 1>
607+
%res = trunc <2 x i64> %shift to <2 x i32>
608+
store <2 x i32> %res, ptr %ret_ptr
609+
ret void
610+
}
611+
612+
; Test averaging floor unsigned operations for v2i32 (RV64 only)
613+
; avgflooru pattern: (a & b) + ((a ^ b) >> 1)
614+
define void @test_paaddu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
615+
; CHECK-LABEL: test_paaddu_w:
616+
; CHECK: # %bb.0:
617+
; CHECK-NEXT: ld a1, 0(a1)
618+
; CHECK-NEXT: ld a2, 0(a2)
619+
; CHECK-NEXT: paaddu.w a1, a1, a2
620+
; CHECK-NEXT: sd a1, 0(a0)
621+
; CHECK-NEXT: ret
622+
%a = load <2 x i32>, ptr %a_ptr
623+
%b = load <2 x i32>, ptr %b_ptr
624+
%and = and <2 x i32> %a, %b
625+
%xor = xor <2 x i32> %a, %b
626+
%shift = lshr <2 x i32> %xor, <i32 1, i32 1>
627+
%res = add <2 x i32> %and, %shift
628+
store <2 x i32> %res, ptr %ret_ptr
629+
ret void
630+
}
631+
632+
; Test averaging floor subtraction signed for v2i32 (RV64 only)
633+
; pasub pattern: (a - b) arithmetic shift right 1
634+
define void @test_pasub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
635+
; CHECK-LABEL: test_pasub_w:
636+
; CHECK: # %bb.0:
637+
; CHECK-NEXT: ld a1, 0(a1)
638+
; CHECK-NEXT: ld a2, 0(a2)
639+
; CHECK-NEXT: pasub.w a1, a1, a2
640+
; CHECK-NEXT: sd a1, 0(a0)
641+
; CHECK-NEXT: ret
642+
%a = load <2 x i32>, ptr %a_ptr
643+
%b = load <2 x i32>, ptr %b_ptr
644+
%a_ext = sext <2 x i32> %a to <2 x i64>
645+
%b_ext = sext <2 x i32> %b to <2 x i64>
646+
%sub = sub <2 x i64> %a_ext, %b_ext
647+
%res = ashr <2 x i64> %sub, <i64 1, i64 1>
648+
%res_trunc = trunc <2 x i64> %res to <2 x i32>
649+
store <2 x i32> %res_trunc, ptr %ret_ptr
650+
ret void
651+
}
652+
653+
; Test averaging floor subtraction unsigned for v2i32 (RV64 only)
654+
; pasubu pattern: (a - b) logical shift right 1
655+
define void @test_pasubu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
656+
; CHECK-LABEL: test_pasubu_w:
657+
; CHECK: # %bb.0:
658+
; CHECK-NEXT: ld a1, 0(a1)
659+
; CHECK-NEXT: ld a2, 0(a2)
660+
; CHECK-NEXT: pasubu.w a1, a1, a2
661+
; CHECK-NEXT: sd a1, 0(a0)
662+
; CHECK-NEXT: ret
663+
%a = load <2 x i32>, ptr %a_ptr
664+
%b = load <2 x i32>, ptr %b_ptr
665+
%a_ext = zext <2 x i32> %a to <2 x i64>
666+
%b_ext = zext <2 x i32> %b to <2 x i64>
667+
%sub = sub <2 x i64> %a_ext, %b_ext
668+
%res = lshr <2 x i64> %sub, <i64 1, i64 1>
669+
%res_trunc = trunc <2 x i64> %res to <2 x i32>
670+
store <2 x i32> %res_trunc, ptr %ret_ptr
671+
ret void
672+
}
673+
498674
; Intrinsic declarations
499675
declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>)
500676
declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>)

0 commit comments

Comments
 (0)