@@ -818,6 +818,71 @@ define <2 x i16> @test_trunc_2xi32(<2 x i32> %a) #0 {
818818 ret <2 x i16 > %r
819819}
820820
821+ define <2 x i16 > @test_trunc_2xi32_muliple_use0 (<2 x i32 > %a , ptr %p ) #0 {
822+ ; I16x2-LABEL: test_trunc_2xi32_muliple_use0(
823+ ; I16x2: {
824+ ; I16x2-NEXT: .reg .b32 %r<7>;
825+ ; I16x2-NEXT: .reg .b64 %rd<2>;
826+ ; I16x2-EMPTY:
827+ ; I16x2-NEXT: // %bb.0:
828+ ; I16x2-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_trunc_2xi32_muliple_use0_param_0];
829+ ; I16x2-NEXT: ld.param.u64 %rd1, [test_trunc_2xi32_muliple_use0_param_1];
830+ ; I16x2-NEXT: prmt.b32 %r3, %r1, %r2, 0x5410U;
831+ ; I16x2-NEXT: mov.b32 %r5, 65537;
832+ ; I16x2-NEXT: add.s16x2 %r6, %r3, %r5;
833+ ; I16x2-NEXT: st.u32 [%rd1], %r6;
834+ ; I16x2-NEXT: st.param.b32 [func_retval0], %r3;
835+ ; I16x2-NEXT: ret;
836+ ;
837+ ; NO-I16x2-LABEL: test_trunc_2xi32_muliple_use0(
838+ ; NO-I16x2: {
839+ ; NO-I16x2-NEXT: .reg .b16 %rs<5>;
840+ ; NO-I16x2-NEXT: .reg .b32 %r<5>;
841+ ; NO-I16x2-NEXT: .reg .b64 %rd<2>;
842+ ; NO-I16x2-EMPTY:
843+ ; NO-I16x2-NEXT: // %bb.0:
844+ ; NO-I16x2-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_trunc_2xi32_muliple_use0_param_0];
845+ ; NO-I16x2-NEXT: ld.param.u64 %rd1, [test_trunc_2xi32_muliple_use0_param_1];
846+ ; NO-I16x2-NEXT: cvt.u16.u32 %rs1, %r2;
847+ ; NO-I16x2-NEXT: cvt.u16.u32 %rs2, %r1;
848+ ; NO-I16x2-NEXT: mov.b32 %r3, {%rs2, %rs1};
849+ ; NO-I16x2-NEXT: add.s16 %rs3, %rs1, 1;
850+ ; NO-I16x2-NEXT: add.s16 %rs4, %rs2, 1;
851+ ; NO-I16x2-NEXT: mov.b32 %r4, {%rs4, %rs3};
852+ ; NO-I16x2-NEXT: st.u32 [%rd1], %r4;
853+ ; NO-I16x2-NEXT: st.param.b32 [func_retval0], %r3;
854+ ; NO-I16x2-NEXT: ret;
855+ %r = trunc <2 x i32 > %a to <2 x i16 >
856+ ; Reuse the truncate - optimizing to PRMT when we don't have i16x2 vectors
857+ ; would increase register pressure
858+ %s = add <2 x i16 > %r , splat (i16 1 )
859+ store <2 x i16 > %s , ptr %p
860+ ret <2 x i16 > %r
861+ }
862+
863+ define <2 x i16 > @test_trunc_2xi32_muliple_use1 (<2 x i32 > %a , ptr %p ) #0 {
864+ ; COMMON-LABEL: test_trunc_2xi32_muliple_use1(
865+ ; COMMON: {
866+ ; COMMON-NEXT: .reg .b32 %r<7>;
867+ ; COMMON-NEXT: .reg .b64 %rd<2>;
868+ ; COMMON-EMPTY:
869+ ; COMMON-NEXT: // %bb.0:
870+ ; COMMON-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_trunc_2xi32_muliple_use1_param_0];
871+ ; COMMON-NEXT: ld.param.u64 %rd1, [test_trunc_2xi32_muliple_use1_param_1];
872+ ; COMMON-NEXT: prmt.b32 %r3, %r1, %r2, 0x5410U;
873+ ; COMMON-NEXT: add.s32 %r5, %r2, 1;
874+ ; COMMON-NEXT: add.s32 %r6, %r1, 1;
875+ ; COMMON-NEXT: st.v2.u32 [%rd1], {%r6, %r5};
876+ ; COMMON-NEXT: st.param.b32 [func_retval0], %r3;
877+ ; COMMON-NEXT: ret;
878+ %r = trunc <2 x i32 > %a to <2 x i16 >
879+ ; Reuse the original value - optimizing to PRMT does not increase register
880+ ; pressure
881+ %s = add <2 x i32 > %a , splat (i32 1 )
882+ store <2 x i32 > %s , ptr %p
883+ ret <2 x i16 > %r
884+ }
885+
821886define <2 x i16 > @test_trunc_2xi64 (<2 x i64 > %a ) #0 {
822887; COMMON-LABEL: test_trunc_2xi64(
823888; COMMON: {
0 commit comments