@@ -788,6 +788,63 @@ define amdgpu_kernel void @v_ashr_32_i64(ptr addrspace(1) %out, ptr addrspace(1)
788788 ret void
789789}
790790
791+ define amdgpu_kernel void @s_ashr_33_i64 (ptr addrspace (1 ) %out , [8 x i32 ], i64 %a , [8 x i32 ], i64 %b ) {
792+ ; SI-LABEL: s_ashr_33_i64:
793+ ; SI: ; %bb.0:
794+ ; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
795+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
796+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x1d
797+ ; SI-NEXT: s_mov_b32 s3, 0xf000
798+ ; SI-NEXT: s_mov_b32 s2, -1
799+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
800+ ; SI-NEXT: s_ashr_i32 s6, s7, 31
801+ ; SI-NEXT: s_ashr_i32 s7, s7, 1
802+ ; SI-NEXT: s_add_u32 s4, s7, s4
803+ ; SI-NEXT: s_addc_u32 s5, s6, s5
804+ ; SI-NEXT: v_mov_b32_e32 v0, s4
805+ ; SI-NEXT: v_mov_b32_e32 v1, s5
806+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
807+ ; SI-NEXT: s_endpgm
808+ ;
809+ ; VI-LABEL: s_ashr_33_i64:
810+ ; VI: ; %bb.0:
811+ ; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
812+ ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
813+ ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x74
814+ ; VI-NEXT: s_mov_b32 s3, 0xf000
815+ ; VI-NEXT: s_mov_b32 s2, -1
816+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
817+ ; VI-NEXT: s_ashr_i32 s6, s7, 31
818+ ; VI-NEXT: s_ashr_i32 s7, s7, 1
819+ ; VI-NEXT: s_add_u32 s4, s7, s4
820+ ; VI-NEXT: s_addc_u32 s5, s6, s5
821+ ; VI-NEXT: v_mov_b32_e32 v0, s4
822+ ; VI-NEXT: v_mov_b32_e32 v1, s5
823+ ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
824+ ; VI-NEXT: s_endpgm
825+ ;
826+ ; EG-LABEL: s_ashr_33_i64:
827+ ; EG: ; %bb.0:
828+ ; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[]
829+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
830+ ; EG-NEXT: CF_END
831+ ; EG-NEXT: PAD
832+ ; EG-NEXT: ALU clause starting at 4:
833+ ; EG-NEXT: ASHR T0.W, KC0[5].X, 1,
834+ ; EG-NEXT: ASHR * T1.W, KC0[5].X, literal.x,
835+ ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
836+ ; EG-NEXT: ADD_INT T1.W, PS, KC0[7].Z,
837+ ; EG-NEXT: ADDC_UINT * T2.W, PV.W, KC0[7].Y,
838+ ; EG-NEXT: ADD_INT * T0.Y, PV.W, PS,
839+ ; EG-NEXT: ADD_INT T0.X, T0.W, KC0[7].Y,
840+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
841+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
842+ %result = ashr i64 %a , 33
843+ %add = add i64 %result , %b
844+ store i64 %add , ptr addrspace (1 ) %out
845+ ret void
846+ }
847+
791848define amdgpu_kernel void @v_ashr_33_i64 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
792849; SI-LABEL: v_ashr_33_i64:
793850; SI: ; %bb.0:
@@ -854,6 +911,63 @@ define amdgpu_kernel void @v_ashr_33_i64(ptr addrspace(1) %out, ptr addrspace(1)
854911 ret void
855912}
856913
914+ define amdgpu_kernel void @s_ashr_62_i64 (ptr addrspace (1 ) %out , [8 x i32 ], i64 %a , [8 x i32 ], i64 %b ) {
915+ ; SI-LABEL: s_ashr_62_i64:
916+ ; SI: ; %bb.0:
917+ ; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x13
918+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
919+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x1d
920+ ; SI-NEXT: s_mov_b32 s3, 0xf000
921+ ; SI-NEXT: s_mov_b32 s2, -1
922+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
923+ ; SI-NEXT: s_ashr_i32 s6, s7, 31
924+ ; SI-NEXT: s_ashr_i32 s7, s7, 30
925+ ; SI-NEXT: s_add_u32 s4, s7, s4
926+ ; SI-NEXT: s_addc_u32 s5, s6, s5
927+ ; SI-NEXT: v_mov_b32_e32 v0, s4
928+ ; SI-NEXT: v_mov_b32_e32 v1, s5
929+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
930+ ; SI-NEXT: s_endpgm
931+ ;
932+ ; VI-LABEL: s_ashr_62_i64:
933+ ; VI: ; %bb.0:
934+ ; VI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
935+ ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
936+ ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x74
937+ ; VI-NEXT: s_mov_b32 s3, 0xf000
938+ ; VI-NEXT: s_mov_b32 s2, -1
939+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
940+ ; VI-NEXT: s_ashr_i32 s6, s7, 31
941+ ; VI-NEXT: s_ashr_i32 s7, s7, 30
942+ ; VI-NEXT: s_add_u32 s4, s7, s4
943+ ; VI-NEXT: s_addc_u32 s5, s6, s5
944+ ; VI-NEXT: v_mov_b32_e32 v0, s4
945+ ; VI-NEXT: v_mov_b32_e32 v1, s5
946+ ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
947+ ; VI-NEXT: s_endpgm
948+ ;
949+ ; EG-LABEL: s_ashr_62_i64:
950+ ; EG: ; %bb.0:
951+ ; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[]
952+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
953+ ; EG-NEXT: CF_END
954+ ; EG-NEXT: PAD
955+ ; EG-NEXT: ALU clause starting at 4:
956+ ; EG-NEXT: ASHR T0.W, KC0[5].X, literal.x,
957+ ; EG-NEXT: ASHR * T1.W, KC0[5].X, literal.y,
958+ ; EG-NEXT: 30(4.203895e-44), 31(4.344025e-44)
959+ ; EG-NEXT: ADD_INT T1.W, PS, KC0[7].Z,
960+ ; EG-NEXT: ADDC_UINT * T2.W, PV.W, KC0[7].Y,
961+ ; EG-NEXT: ADD_INT * T0.Y, PV.W, PS,
962+ ; EG-NEXT: ADD_INT T0.X, T0.W, KC0[7].Y,
963+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
964+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
965+ %result = ashr i64 %a , 62
966+ %add = add i64 %result , %b
967+ store i64 %add , ptr addrspace (1 ) %out
968+ ret void
969+ }
970+
857971define amdgpu_kernel void @v_ashr_62_i64 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
858972; SI-LABEL: v_ashr_62_i64:
859973; SI: ; %bb.0:
0 commit comments