@@ -5512,15 +5512,16 @@ define amdgpu_kernel void @udiv_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3
55125512; GFX6-LABEL: udiv_v2i32_pow2k_denom:
55135513; GFX6: ; %bb.0:
55145514; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
5515+ ; GFX6-NEXT: s_mov_b32 s7, 0xf000
5516+ ; GFX6-NEXT: s_mov_b32 s6, -1
55155517; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5516- ; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
5517- ; GFX6-NEXT: s_lshr_b32 s4, s4, 12
5518- ; GFX6-NEXT: s_lshr_b32 s5, s5, 12
5519- ; GFX6-NEXT: s_mov_b32 s3, 0xf000
5520- ; GFX6-NEXT: s_mov_b32 s2, -1
5521- ; GFX6-NEXT: v_mov_b32_e32 v0, s4
5522- ; GFX6-NEXT: v_mov_b32_e32 v1, s5
5523- ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
5518+ ; GFX6-NEXT: s_mov_b32 s4, s0
5519+ ; GFX6-NEXT: s_mov_b32 s5, s1
5520+ ; GFX6-NEXT: s_lshr_b32 s0, s2, 12
5521+ ; GFX6-NEXT: s_lshr_b32 s1, s3, 12
5522+ ; GFX6-NEXT: v_mov_b32_e32 v0, s0
5523+ ; GFX6-NEXT: v_mov_b32_e32 v1, s1
5524+ ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
55245525; GFX6-NEXT: s_endpgm
55255526;
55265527; GFX9-LABEL: udiv_v2i32_pow2k_denom:
@@ -5554,18 +5555,19 @@ define amdgpu_kernel void @udiv_v2i32_mixed_pow2k_denom(ptr addrspace(1) %out, <
55545555; GFX6: ; %bb.0:
55555556; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
55565557; GFX6-NEXT: v_mov_b32_e32 v0, 0x100101
5558+ ; GFX6-NEXT: s_mov_b32 s7, 0xf000
5559+ ; GFX6-NEXT: s_mov_b32 s6, -1
55575560; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5558- ; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
5559- ; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0
5560- ; GFX6-NEXT: s_lshr_b32 s4, s4, 12
5561- ; GFX6-NEXT: s_mov_b32 s3, 0xf000
5562- ; GFX6-NEXT: s_mov_b32 s2, -1
5563- ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v0
5561+ ; GFX6-NEXT: v_mul_hi_u32 v0, s3, v0
5562+ ; GFX6-NEXT: s_mov_b32 s4, s0
5563+ ; GFX6-NEXT: s_lshr_b32 s0, s2, 12
5564+ ; GFX6-NEXT: s_mov_b32 s5, s1
5565+ ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s3, v0
55645566; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
55655567; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0
55665568; GFX6-NEXT: v_lshrrev_b32_e32 v1, 11, v0
5567- ; GFX6-NEXT: v_mov_b32_e32 v0, s4
5568- ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3 ], 0
5569+ ; GFX6-NEXT: v_mov_b32_e32 v0, s0
5570+ ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7 ], 0
55695571; GFX6-NEXT: s_endpgm
55705572;
55715573; GFX9-LABEL: udiv_v2i32_mixed_pow2k_denom:
@@ -5906,15 +5908,16 @@ define amdgpu_kernel void @urem_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3
59065908; GFX6-LABEL: urem_v2i32_pow2k_denom:
59075909; GFX6: ; %bb.0:
59085910; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
5911+ ; GFX6-NEXT: s_mov_b32 s7, 0xf000
5912+ ; GFX6-NEXT: s_mov_b32 s6, -1
59095913; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5910- ; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
5911- ; GFX6-NEXT: s_and_b32 s4, s4, 0xfff
5912- ; GFX6-NEXT: s_and_b32 s5, s5, 0xfff
5913- ; GFX6-NEXT: s_mov_b32 s3, 0xf000
5914- ; GFX6-NEXT: s_mov_b32 s2, -1
5915- ; GFX6-NEXT: v_mov_b32_e32 v0, s4
5916- ; GFX6-NEXT: v_mov_b32_e32 v1, s5
5917- ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
5914+ ; GFX6-NEXT: s_mov_b32 s4, s0
5915+ ; GFX6-NEXT: s_mov_b32 s5, s1
5916+ ; GFX6-NEXT: s_and_b32 s0, s2, 0xfff
5917+ ; GFX6-NEXT: s_and_b32 s1, s3, 0xfff
5918+ ; GFX6-NEXT: v_mov_b32_e32 v0, s0
5919+ ; GFX6-NEXT: v_mov_b32_e32 v1, s1
5920+ ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
59185921; GFX6-NEXT: s_endpgm
59195922;
59205923; GFX9-LABEL: urem_v2i32_pow2k_denom:
@@ -6288,21 +6291,22 @@ define amdgpu_kernel void @sdiv_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3
62886291; GFX6-LABEL: sdiv_v2i32_pow2k_denom:
62896292; GFX6: ; %bb.0:
62906293; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
6294+ ; GFX6-NEXT: s_mov_b32 s7, 0xf000
6295+ ; GFX6-NEXT: s_mov_b32 s6, -1
62916296; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6292- ; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
6293- ; GFX6-NEXT: s_ashr_i32 s6, s4, 31
6294- ; GFX6-NEXT: s_ashr_i32 s7, s5, 31
6295- ; GFX6-NEXT: s_lshr_b32 s6, s6, 20
6296- ; GFX6-NEXT: s_lshr_b32 s7, s7, 20
6297- ; GFX6-NEXT: s_add_i32 s4, s4, s6
6298- ; GFX6-NEXT: s_add_i32 s5, s5, s7
6299- ; GFX6-NEXT: s_ashr_i32 s4, s4, 12
6300- ; GFX6-NEXT: s_ashr_i32 s5, s5, 12
6301- ; GFX6-NEXT: s_mov_b32 s3, 0xf000
6302- ; GFX6-NEXT: s_mov_b32 s2, -1
6303- ; GFX6-NEXT: v_mov_b32_e32 v0, s4
6304- ; GFX6-NEXT: v_mov_b32_e32 v1, s5
6305- ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
6297+ ; GFX6-NEXT: s_mov_b32 s4, s0
6298+ ; GFX6-NEXT: s_mov_b32 s5, s1
6299+ ; GFX6-NEXT: s_ashr_i32 s0, s2, 31
6300+ ; GFX6-NEXT: s_ashr_i32 s1, s3, 31
6301+ ; GFX6-NEXT: s_lshr_b32 s0, s0, 20
6302+ ; GFX6-NEXT: s_lshr_b32 s1, s1, 20
6303+ ; GFX6-NEXT: s_add_i32 s0, s2, s0
6304+ ; GFX6-NEXT: s_add_i32 s1, s3, s1
6305+ ; GFX6-NEXT: s_ashr_i32 s0, s0, 12
6306+ ; GFX6-NEXT: s_ashr_i32 s1, s1, 12
6307+ ; GFX6-NEXT: v_mov_b32_e32 v0, s0
6308+ ; GFX6-NEXT: v_mov_b32_e32 v1, s1
6309+ ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
63066310; GFX6-NEXT: s_endpgm
63076311;
63086312; GFX9-LABEL: sdiv_v2i32_pow2k_denom:
@@ -6342,21 +6346,22 @@ define amdgpu_kernel void @ssdiv_v2i32_mixed_pow2k_denom(ptr addrspace(1) %out,
63426346; GFX6: ; %bb.0:
63436347; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
63446348; GFX6-NEXT: v_mov_b32_e32 v0, 0x80080081
6349+ ; GFX6-NEXT: s_mov_b32 s7, 0xf000
6350+ ; GFX6-NEXT: s_mov_b32 s6, -1
63456351; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6346- ; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
6347- ; GFX6-NEXT: v_mul_hi_i32 v0, s5, v0
6348- ; GFX6-NEXT: s_ashr_i32 s6, s4 , 31
6349- ; GFX6-NEXT: s_lshr_b32 s6, s6 , 20
6350- ; GFX6-NEXT: s_add_i32 s4, s4, s6
6351- ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s5 , v0
6352- ; GFX6-NEXT: s_ashr_i32 s4, s4 , 12
6352+ ; GFX6-NEXT: v_mul_hi_i32 v0, s3, v0
6353+ ; GFX6-NEXT: s_mov_b32 s4, s0
6354+ ; GFX6-NEXT: s_ashr_i32 s0, s2 , 31
6355+ ; GFX6-NEXT: s_lshr_b32 s0, s0 , 20
6356+ ; GFX6-NEXT: s_add_i32 s0, s2, s0
6357+ ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s3 , v0
6358+ ; GFX6-NEXT: s_ashr_i32 s0, s0 , 12
63536359; GFX6-NEXT: v_lshrrev_b32_e32 v1, 31, v0
63546360; GFX6-NEXT: v_ashrrev_i32_e32 v0, 11, v0
6355- ; GFX6-NEXT: s_mov_b32 s3, 0xf000
6356- ; GFX6-NEXT: s_mov_b32 s2, -1
6361+ ; GFX6-NEXT: s_mov_b32 s5, s1
63576362; GFX6-NEXT: v_add_i32_e32 v1, vcc, v0, v1
6358- ; GFX6-NEXT: v_mov_b32_e32 v0, s4
6359- ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3 ], 0
6363+ ; GFX6-NEXT: v_mov_b32_e32 v0, s0
6364+ ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7 ], 0
63606365; GFX6-NEXT: s_endpgm
63616366;
63626367; GFX9-LABEL: ssdiv_v2i32_mixed_pow2k_denom:
@@ -6793,23 +6798,24 @@ define amdgpu_kernel void @srem_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3
67936798; GFX6-LABEL: srem_v2i32_pow2k_denom:
67946799; GFX6: ; %bb.0:
67956800; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
6801+ ; GFX6-NEXT: s_mov_b32 s7, 0xf000
6802+ ; GFX6-NEXT: s_mov_b32 s6, -1
67966803; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6797- ; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
6798- ; GFX6-NEXT: s_ashr_i32 s6, s4, 31
6799- ; GFX6-NEXT: s_lshr_b32 s6, s6, 20
6800- ; GFX6-NEXT: s_ashr_i32 s7, s5, 31
6801- ; GFX6-NEXT: s_add_i32 s6, s4, s6
6802- ; GFX6-NEXT: s_lshr_b32 s7, s7, 20
6803- ; GFX6-NEXT: s_and_b32 s6, s6, 0xfffff000
6804- ; GFX6-NEXT: s_sub_i32 s4, s4, s6
6805- ; GFX6-NEXT: s_add_i32 s6, s5, s7
6806- ; GFX6-NEXT: s_and_b32 s6, s6, 0xfffff000
6807- ; GFX6-NEXT: s_sub_i32 s5, s5, s6
6808- ; GFX6-NEXT: s_mov_b32 s3, 0xf000
6809- ; GFX6-NEXT: s_mov_b32 s2, -1
6810- ; GFX6-NEXT: v_mov_b32_e32 v0, s4
6811- ; GFX6-NEXT: v_mov_b32_e32 v1, s5
6812- ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
6804+ ; GFX6-NEXT: s_mov_b32 s4, s0
6805+ ; GFX6-NEXT: s_mov_b32 s5, s1
6806+ ; GFX6-NEXT: s_ashr_i32 s0, s2, 31
6807+ ; GFX6-NEXT: s_ashr_i32 s1, s3, 31
6808+ ; GFX6-NEXT: s_lshr_b32 s0, s0, 20
6809+ ; GFX6-NEXT: s_lshr_b32 s1, s1, 20
6810+ ; GFX6-NEXT: s_add_i32 s0, s2, s0
6811+ ; GFX6-NEXT: s_add_i32 s1, s3, s1
6812+ ; GFX6-NEXT: s_and_b32 s0, s0, 0xfffff000
6813+ ; GFX6-NEXT: s_and_b32 s1, s1, 0xfffff000
6814+ ; GFX6-NEXT: s_sub_i32 s0, s2, s0
6815+ ; GFX6-NEXT: s_sub_i32 s1, s3, s1
6816+ ; GFX6-NEXT: v_mov_b32_e32 v0, s0
6817+ ; GFX6-NEXT: v_mov_b32_e32 v1, s1
6818+ ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
68136819; GFX6-NEXT: s_endpgm
68146820;
68156821; GFX9-LABEL: srem_v2i32_pow2k_denom:
0 commit comments