@@ -377,7 +377,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i8_with_select(ptr addrspace(1) noa
377377; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
378378; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
379379; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
380- ; GFX9-GISEL-NEXT: s_lshr_b32 s0, s4, 24
380+ ; GFX9-GISEL-NEXT: s_lshl_b32 s0, s4, 24
381381; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
382382; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
383383; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[2:3]
@@ -452,7 +452,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no
452452; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
453453; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
454454; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
455- ; GFX9-GISEL-NEXT: s_lshr_b32 s0, s4, 16
455+ ; GFX9-GISEL-NEXT: s_lshl_b32 s0, s4, 16
456456; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
457457; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
458458; GFX9-GISEL-NEXT: global_store_short v1, v0, s[2:3]
@@ -655,7 +655,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_with_select(ptr addrspace(1) noa
655655; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
656656; GFX9-GISEL-NEXT: global_load_ubyte v1, v0, s[2:3]
657657; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
658- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v1
658+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v1
659+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
659660; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
660661; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
661662; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 32, v2, vcc
@@ -760,7 +761,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no
760761; GFX9-GISEL-NEXT: global_load_ubyte v2, v0, s[2:3] offset:1
761762; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
762763; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v2, 8, v1
763- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v1
764+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v1
765+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
764766; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
765767; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
766768; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 32, v2, vcc
@@ -1167,7 +1169,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8(ptr addrspace(1) noalias %out, p
11671169; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
11681170; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
11691171; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1170- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0
1172+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 24, v0
1173+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
11711174; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[0:1]
11721175; GFX9-GISEL-NEXT: s_endpgm
11731176 %tid = call i32 @llvm.amdgcn.workitem.id.x ()
@@ -1705,8 +1708,9 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(ptr addrspace(1) noa
17051708; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
17061709; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
17071710; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1708- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v2, v0
1709- ; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa s[2:3], v0, v1
1711+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v0
1712+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
1713+ ; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa s[2:3], v0, v1 src0_sel:BYTE_0 src1_sel:DWORD
17101714; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1, s[2:3]
17111715; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[0:1]
17121716; GFX9-GISEL-NEXT: s_endpgm
@@ -2186,7 +2190,7 @@ define i7 @v_ctlz_zero_undef_i7(i7 %val) {
21862190; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i7:
21872191; GFX9-GISEL: ; %bb.0:
21882192; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2189- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 25, v0
2193+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 25, v0
21902194; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
21912195; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
21922196 %ctlz = call i7 @llvm.ctlz.i7 (i7 %val , i1 true )
@@ -2278,7 +2282,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i18(ptr addrspace(1) noalias %out,
22782282; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
22792283; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
22802284; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2281- ; GFX9-GISEL-NEXT: s_lshr_b32 s0, s4, 14
2285+ ; GFX9-GISEL-NEXT: s_lshl_b32 s0, s4, 14
22822286; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0
22832287; GFX9-GISEL-NEXT: s_and_b32 s0, s0, 0x3ffff
22842288; GFX9-GISEL-NEXT: s_lshr_b32 s1, s0, 16
@@ -2317,7 +2321,7 @@ define i18 @v_ctlz_zero_undef_i18(i18 %val) {
23172321; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i18:
23182322; GFX9-GISEL: ; %bb.0:
23192323; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2320- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 14, v0
2324+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 14, v0
23212325; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
23222326; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
23232327 %ctlz = call i18 @llvm.ctlz.i18 (i18 %val , i1 true )
@@ -2355,8 +2359,8 @@ define <2 x i18> @v_ctlz_zero_undef_v2i18(<2 x i18> %val) {
23552359; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i18:
23562360; GFX9-GISEL: ; %bb.0:
23572361; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2358- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 14, v0
2359- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 14, v1
2362+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 14, v0
2363+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 14, v1
23602364; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
23612365; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
23622366; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -2394,10 +2398,13 @@ define <2 x i16> @v_ctlz_zero_undef_v2i16(<2 x i16> %val) {
23942398; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i16:
23952399; GFX9-GISEL: ; %bb.0:
23962400; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2397- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2398- ; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2401+ ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
2402+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2403+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2404+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2405+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
23992406; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2400- ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4 , 16, v0
2407+ ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1 , 16, v0
24012408; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
24022409 %ctlz = call <2 x i16 > @llvm.ctlz.v2i16 (<2 x i16 > %val , i1 true )
24032410 ret <2 x i16 > %ctlz
@@ -2439,11 +2446,15 @@ define <3 x i16> @v_ctlz_zero_undef_v3i16(<3 x i16> %val) {
24392446; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v3i16:
24402447; GFX9-GISEL: ; %bb.0:
24412448; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2442- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2443- ; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2449+ ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
2450+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2451+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2452+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
2453+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
2454+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
24442455; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
2445- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2446- ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4 , 16, v0
2456+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2457+ ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2 , 16, v0
24472458; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
24482459 %ctlz = call <3 x i16 > @llvm.ctlz.v3i16 (<3 x i16 > %val , i1 true )
24492460 ret <3 x i16 > %ctlz
@@ -2492,13 +2503,20 @@ define <4 x i16> @v_ctlz_zero_undef_v4i16(<4 x i16> %val) {
24922503; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v4i16:
24932504; GFX9-GISEL: ; %bb.0:
24942505; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2495- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2496- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2497- ; GFX9-GISEL-NEXT: s_flbit_i32_b32 s4, 0
2506+ ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
2507+ ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
2508+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2509+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
2510+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2511+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
2512+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
2513+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2514+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
2515+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v3, v3
24982516; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
24992517; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
2500- ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, s4 , 16, v0
2501- ; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, s4 , 16, v1
2518+ ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2 , 16, v0
2519+ ; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v3 , 16, v1
25022520; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
25032521 %ctlz = call <4 x i16 > @llvm.ctlz.v4i16 (<4 x i16 > %val , i1 true )
25042522 ret <4 x i16 > %ctlz
@@ -2536,8 +2554,10 @@ define <2 x i8> @v_ctlz_zero_undef_v2i8(<2 x i8> %val) {
25362554; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i8:
25372555; GFX9-GISEL: ; %bb.0:
25382556; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2539- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
2540- ; GFX9-GISEL-NEXT: v_ffbh_u32_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
2557+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 24, v0
2558+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v1
2559+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
2560+ ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
25412561; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
25422562 %ctlz = call <2 x i8 > @llvm.ctlz.v2i8 (<2 x i8 > %val , i1 true )
25432563 ret <2 x i8 > %ctlz
@@ -2579,8 +2599,8 @@ define <2 x i7> @v_ctlz_zero_undef_v2i7(<2 x i7> %val) {
25792599; GFX9-GISEL-LABEL: v_ctlz_zero_undef_v2i7:
25802600; GFX9-GISEL: ; %bb.0:
25812601; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2582- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v0, 25, v0
2583- ; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 25, v1
2602+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 25, v0
2603+ ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 25, v1
25842604; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
25852605; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1
25862606; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
0 commit comments