@@ -495,6 +495,182 @@ define void @test_extract_vector_32(ptr %ret_ptr, ptr %a_ptr) {
495495 ret void
496496}
497497
498+ ; Test basic add/sub operations for v2i32 (RV64 only)
499+ define void @test_padd_w (ptr %ret_ptr , ptr %a_ptr , ptr %b_ptr ) {
500+ ; CHECK-LABEL: test_padd_w:
501+ ; CHECK: # %bb.0:
502+ ; CHECK-NEXT: ld a1, 0(a1)
503+ ; CHECK-NEXT: ld a2, 0(a2)
504+ ; CHECK-NEXT: padd.w a1, a1, a2
505+ ; CHECK-NEXT: sd a1, 0(a0)
506+ ; CHECK-NEXT: ret
507+ %a = load <2 x i32 >, ptr %a_ptr
508+ %b = load <2 x i32 >, ptr %b_ptr
509+ %res = add <2 x i32 > %a , %b
510+ store <2 x i32 > %res , ptr %ret_ptr
511+ ret void
512+ }
513+
514+ define void @test_psub_w (ptr %ret_ptr , ptr %a_ptr , ptr %b_ptr ) {
515+ ; CHECK-LABEL: test_psub_w:
516+ ; CHECK: # %bb.0:
517+ ; CHECK-NEXT: ld a1, 0(a1)
518+ ; CHECK-NEXT: ld a2, 0(a2)
519+ ; CHECK-NEXT: psub.w a1, a1, a2
520+ ; CHECK-NEXT: sd a1, 0(a0)
521+ ; CHECK-NEXT: ret
522+ %a = load <2 x i32 >, ptr %a_ptr
523+ %b = load <2 x i32 >, ptr %b_ptr
524+ %res = sub <2 x i32 > %a , %b
525+ store <2 x i32 > %res , ptr %ret_ptr
526+ ret void
527+ }
528+
529+ ; Test saturating add operations for v2i32 (RV64 only)
530+ define void @test_psadd_w (ptr %ret_ptr , ptr %a_ptr , ptr %b_ptr ) {
531+ ; CHECK-LABEL: test_psadd_w:
532+ ; CHECK: # %bb.0:
533+ ; CHECK-NEXT: ld a1, 0(a1)
534+ ; CHECK-NEXT: ld a2, 0(a2)
535+ ; CHECK-NEXT: psadd.w a1, a1, a2
536+ ; CHECK-NEXT: sd a1, 0(a0)
537+ ; CHECK-NEXT: ret
538+ %a = load <2 x i32 >, ptr %a_ptr
539+ %b = load <2 x i32 >, ptr %b_ptr
540+ %res = call <2 x i32 > @llvm.sadd.sat.v2i32 (<2 x i32 > %a , <2 x i32 > %b )
541+ store <2 x i32 > %res , ptr %ret_ptr
542+ ret void
543+ }
544+
545+ define void @test_psaddu_w (ptr %ret_ptr , ptr %a_ptr , ptr %b_ptr ) {
546+ ; CHECK-LABEL: test_psaddu_w:
547+ ; CHECK: # %bb.0:
548+ ; CHECK-NEXT: ld a1, 0(a1)
549+ ; CHECK-NEXT: ld a2, 0(a2)
550+ ; CHECK-NEXT: psaddu.w a1, a1, a2
551+ ; CHECK-NEXT: sd a1, 0(a0)
552+ ; CHECK-NEXT: ret
553+ %a = load <2 x i32 >, ptr %a_ptr
554+ %b = load <2 x i32 >, ptr %b_ptr
555+ %res = call <2 x i32 > @llvm.uadd.sat.v2i32 (<2 x i32 > %a , <2 x i32 > %b )
556+ store <2 x i32 > %res , ptr %ret_ptr
557+ ret void
558+ }
559+
560+ ; Test saturating sub operations for v2i32 (RV64 only)
561+ define void @test_pssub_w (ptr %ret_ptr , ptr %a_ptr , ptr %b_ptr ) {
562+ ; CHECK-LABEL: test_pssub_w:
563+ ; CHECK: # %bb.0:
564+ ; CHECK-NEXT: ld a1, 0(a1)
565+ ; CHECK-NEXT: ld a2, 0(a2)
566+ ; CHECK-NEXT: pssub.w a1, a1, a2
567+ ; CHECK-NEXT: sd a1, 0(a0)
568+ ; CHECK-NEXT: ret
569+ %a = load <2 x i32 >, ptr %a_ptr
570+ %b = load <2 x i32 >, ptr %b_ptr
571+ %res = call <2 x i32 > @llvm.ssub.sat.v2i32 (<2 x i32 > %a , <2 x i32 > %b )
572+ store <2 x i32 > %res , ptr %ret_ptr
573+ ret void
574+ }
575+
576+ define void @test_pssubu_w (ptr %ret_ptr , ptr %a_ptr , ptr %b_ptr ) {
577+ ; CHECK-LABEL: test_pssubu_w:
578+ ; CHECK: # %bb.0:
579+ ; CHECK-NEXT: ld a1, 0(a1)
580+ ; CHECK-NEXT: ld a2, 0(a2)
581+ ; CHECK-NEXT: pssubu.w a1, a1, a2
582+ ; CHECK-NEXT: sd a1, 0(a0)
583+ ; CHECK-NEXT: ret
584+ %a = load <2 x i32 >, ptr %a_ptr
585+ %b = load <2 x i32 >, ptr %b_ptr
586+ %res = call <2 x i32 > @llvm.usub.sat.v2i32 (<2 x i32 > %a , <2 x i32 > %b )
587+ store <2 x i32 > %res , ptr %ret_ptr
588+ ret void
589+ }
590+
591+ ; Test averaging floor signed operations for v2i32 (RV64 only)
592+ ; avgfloors pattern: (a + b) arithmetic shift right 1
593+ define void @test_paadd_w (ptr %ret_ptr , ptr %a_ptr , ptr %b_ptr ) {
594+ ; CHECK-LABEL: test_paadd_w:
595+ ; CHECK: # %bb.0:
596+ ; CHECK-NEXT: ld a1, 0(a1)
597+ ; CHECK-NEXT: ld a2, 0(a2)
598+ ; CHECK-NEXT: paadd.w a1, a1, a2
599+ ; CHECK-NEXT: sd a1, 0(a0)
600+ ; CHECK-NEXT: ret
601+ %a = load <2 x i32 >, ptr %a_ptr
602+ %b = load <2 x i32 >, ptr %b_ptr
603+ %ext.a = sext <2 x i32 > %a to <2 x i64 >
604+ %ext.b = sext <2 x i32 > %b to <2 x i64 >
605+ %add = add nsw <2 x i64 > %ext.a , %ext.b
606+ %shift = ashr <2 x i64 > %add , <i64 1 , i64 1 >
607+ %res = trunc <2 x i64 > %shift to <2 x i32 >
608+ store <2 x i32 > %res , ptr %ret_ptr
609+ ret void
610+ }
611+
612+ ; Test averaging floor unsigned operations for v2i32 (RV64 only)
613+ ; avgflooru pattern: (a & b) + ((a ^ b) >> 1)
614+ define void @test_paaddu_w (ptr %ret_ptr , ptr %a_ptr , ptr %b_ptr ) {
615+ ; CHECK-LABEL: test_paaddu_w:
616+ ; CHECK: # %bb.0:
617+ ; CHECK-NEXT: ld a1, 0(a1)
618+ ; CHECK-NEXT: ld a2, 0(a2)
619+ ; CHECK-NEXT: paaddu.w a1, a1, a2
620+ ; CHECK-NEXT: sd a1, 0(a0)
621+ ; CHECK-NEXT: ret
622+ %a = load <2 x i32 >, ptr %a_ptr
623+ %b = load <2 x i32 >, ptr %b_ptr
624+ %and = and <2 x i32 > %a , %b
625+ %xor = xor <2 x i32 > %a , %b
626+ %shift = lshr <2 x i32 > %xor , <i32 1 , i32 1 >
627+ %res = add <2 x i32 > %and , %shift
628+ store <2 x i32 > %res , ptr %ret_ptr
629+ ret void
630+ }
631+
632+ ; Test averaging floor subtraction signed for v2i32 (RV64 only)
633+ ; pasub pattern: (a - b) arithmetic shift right 1
634+ define void @test_pasub_w (ptr %ret_ptr , ptr %a_ptr , ptr %b_ptr ) {
635+ ; CHECK-LABEL: test_pasub_w:
636+ ; CHECK: # %bb.0:
637+ ; CHECK-NEXT: ld a1, 0(a1)
638+ ; CHECK-NEXT: ld a2, 0(a2)
639+ ; CHECK-NEXT: pasub.w a1, a1, a2
640+ ; CHECK-NEXT: sd a1, 0(a0)
641+ ; CHECK-NEXT: ret
642+ %a = load <2 x i32 >, ptr %a_ptr
643+ %b = load <2 x i32 >, ptr %b_ptr
644+ %a_ext = sext <2 x i32 > %a to <2 x i64 >
645+ %b_ext = sext <2 x i32 > %b to <2 x i64 >
646+ %sub = sub <2 x i64 > %a_ext , %b_ext
647+ %res = ashr <2 x i64 > %sub , <i64 1 , i64 1 >
648+ %res_trunc = trunc <2 x i64 > %res to <2 x i32 >
649+ store <2 x i32 > %res_trunc , ptr %ret_ptr
650+ ret void
651+ }
652+
653+ ; Test averaging floor subtraction unsigned for v2i32 (RV64 only)
654+ ; pasubu pattern: (a - b) logical shift right 1
655+ define void @test_pasubu_w (ptr %ret_ptr , ptr %a_ptr , ptr %b_ptr ) {
656+ ; CHECK-LABEL: test_pasubu_w:
657+ ; CHECK: # %bb.0:
658+ ; CHECK-NEXT: ld a1, 0(a1)
659+ ; CHECK-NEXT: ld a2, 0(a2)
660+ ; CHECK-NEXT: pasubu.w a1, a1, a2
661+ ; CHECK-NEXT: sd a1, 0(a0)
662+ ; CHECK-NEXT: ret
663+ %a = load <2 x i32 >, ptr %a_ptr
664+ %b = load <2 x i32 >, ptr %b_ptr
665+ %a_ext = zext <2 x i32 > %a to <2 x i64 >
666+ %b_ext = zext <2 x i32 > %b to <2 x i64 >
667+ %sub = sub <2 x i64 > %a_ext , %b_ext
668+ %res = lshr <2 x i64 > %sub , <i64 1 , i64 1 >
669+ %res_trunc = trunc <2 x i64 > %res to <2 x i32 >
670+ store <2 x i32 > %res_trunc , ptr %ret_ptr
671+ ret void
672+ }
673+
498674; Intrinsic declarations
499675declare <4 x i16 > @llvm.sadd.sat.v4i16 (<4 x i16 >, <4 x i16 >)
500676declare <4 x i16 > @llvm.uadd.sat.v4i16 (<4 x i16 >, <4 x i16 >)
0 commit comments