@@ -941,11 +941,11 @@ define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(ptr addrspace(1) no
941941; VI-NEXT: flat_load_ubyte v13, v[2:3]
942942; VI-NEXT: flat_load_ubyte v4, v[4:5]
943943; VI-NEXT: flat_load_ubyte v5, v[6:7]
944+ ; VI-NEXT: flat_load_ubyte v6, v[8:9]
945+ ; VI-NEXT: flat_load_ubyte v7, v[10:11]
944946; VI-NEXT: v_mov_b32_e32 v0, s4
945947; VI-NEXT: s_add_u32 s2, s2, 2
946948; VI-NEXT: v_mov_b32_e32 v1, s5
947- ; VI-NEXT: flat_load_ubyte v6, v[8:9]
948- ; VI-NEXT: flat_load_ubyte v7, v[10:11]
949949; VI-NEXT: s_addc_u32 s3, s3, 0
950950; VI-NEXT: v_mov_b32_e32 v2, s2
951951; VI-NEXT: v_mov_b32_e32 v3, s3
@@ -954,20 +954,20 @@ define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(ptr addrspace(1) no
954954; VI-NEXT: v_mov_b32_e32 v0, s0
955955; VI-NEXT: v_mov_b32_e32 v1, s1
956956; VI-NEXT: s_waitcnt vmcnt(7)
957- ; VI-NEXT: v_lshlrev_b32_e32 v4 , 8, v10
957+ ; VI-NEXT: v_lshlrev_b32_e32 v3 , 8, v12
958958; VI-NEXT: s_waitcnt vmcnt(6)
959- ; VI-NEXT: v_or_b32_e32 v4, v4, v11
959+ ; VI-NEXT: v_or_b32_e32 v3, v3, v13
960960; VI-NEXT: s_waitcnt vmcnt(5)
961- ; VI-NEXT: v_lshlrev_b32_e32 v5 , 8, v12
961+ ; VI-NEXT: v_lshlrev_b32_e32 v4 , 8, v4
962962; VI-NEXT: s_waitcnt vmcnt(4)
963963; VI-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
964964; VI-NEXT: v_or_b32_e32 v3, v4, v3
965- ; VI-NEXT: v_ffbl_b32_e32 v3, v3
966- ; VI-NEXT: v_add_u32_e32 v3, vcc, 32, v3
967965; VI-NEXT: s_waitcnt vmcnt(3)
968966; VI-NEXT: v_lshlrev_b32_e32 v4, 8, v6
969967; VI-NEXT: s_waitcnt vmcnt(2)
970968; VI-NEXT: v_or_b32_e32 v4, v4, v7
969+ ; VI-NEXT: v_ffbl_b32_e32 v3, v3
970+ ; VI-NEXT: v_add_u32_e32 v3, vcc, 32, v3
971971; VI-NEXT: s_waitcnt vmcnt(1)
972972; VI-NEXT: v_lshlrev_b32_e32 v5, 8, v8
973973; VI-NEXT: s_waitcnt vmcnt(0)
@@ -1452,19 +1452,14 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
14521452; VI-LABEL: v_cttz_i8_sel_eq_neg1:
14531453; VI: ; %bb.0:
14541454; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1455- ; VI-NEXT: v_mov_b32_e32 v3, 0xff
14561455; VI-NEXT: s_waitcnt lgkmcnt(0)
14571456; VI-NEXT: v_mov_b32_e32 v0, s2
14581457; VI-NEXT: v_mov_b32_e32 v1, s3
14591458; VI-NEXT: flat_load_ubyte v2, v[0:1]
14601459; VI-NEXT: v_mov_b32_e32 v0, s0
14611460; VI-NEXT: v_mov_b32_e32 v1, s1
14621461; VI-NEXT: s_waitcnt vmcnt(0)
1463- ; VI-NEXT: v_or_b32_e32 v4, 0x100, v2
1464- ; VI-NEXT: v_and_b32_e32 v2, 0xffff, v2
1465- ; VI-NEXT: v_ffbl_b32_e32 v4, v4
1466- ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
1467- ; VI-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc
1462+ ; VI-NEXT: v_ffbl_b32_e32 v2, v2
14681463; VI-NEXT: flat_store_byte v[0:1], v2
14691464; VI-NEXT: s_endpgm
14701465;
@@ -1546,26 +1541,19 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
15461541; VI-NEXT: s_waitcnt lgkmcnt(0)
15471542; VI-NEXT: s_add_u32 s4, s2, 1
15481543; VI-NEXT: s_addc_u32 s5, s3, 0
1549- ; VI-NEXT: v_mov_b32_e32 v2, s4
1550- ; VI-NEXT: v_mov_b32_e32 v0, s2
1551- ; VI-NEXT: v_mov_b32_e32 v3, s5
1552- ; VI-NEXT: v_mov_b32_e32 v1, s3
1544+ ; VI-NEXT: v_mov_b32_e32 v0, s4
1545+ ; VI-NEXT: v_mov_b32_e32 v1, s5
1546+ ; VI-NEXT: v_mov_b32_e32 v2, s2
1547+ ; VI-NEXT: v_mov_b32_e32 v3, s3
1548+ ; VI-NEXT: flat_load_ubyte v4, v[0:1]
15531549; VI-NEXT: flat_load_ubyte v2, v[2:3]
1554- ; VI-NEXT: flat_load_ubyte v3, v[0:1]
15551550; VI-NEXT: v_mov_b32_e32 v0, s0
15561551; VI-NEXT: v_mov_b32_e32 v1, s1
15571552; VI-NEXT: s_waitcnt vmcnt(1)
1558- ; VI-NEXT: v_readfirstlane_b32 s0, v2
1553+ ; VI-NEXT: v_lshlrev_b32_e32 v3, 8, v4
15591554; VI-NEXT: s_waitcnt vmcnt(0)
1560- ; VI-NEXT: v_readfirstlane_b32 s1, v3
1561- ; VI-NEXT: s_lshl_b32 s0, s0, 8
1562- ; VI-NEXT: s_or_b32 s0, s0, s1
1563- ; VI-NEXT: s_or_b32 s1, s0, 0x10000
1564- ; VI-NEXT: s_and_b32 s0, s0, 0xffff
1565- ; VI-NEXT: s_ff1_i32_b32 s1, s1
1566- ; VI-NEXT: s_cmp_lg_u32 s0, 0
1567- ; VI-NEXT: s_cselect_b32 s0, s1, 0xffff
1568- ; VI-NEXT: v_mov_b32_e32 v2, s0
1555+ ; VI-NEXT: v_or_b32_e32 v2, v3, v2
1556+ ; VI-NEXT: v_ffbl_b32_e32 v2, v2
15691557; VI-NEXT: flat_store_short v[0:1], v2
15701558; VI-NEXT: s_endpgm
15711559;
0 commit comments