@@ -103,16 +103,16 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
103103; SI: ; %bb.0:
104104; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105105; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
106- ; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
107- ; SI-NEXT: v_and_b32_e32 v0, v1, v0
106+ ; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
107+ ; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
108108; SI-NEXT: s_setpc_b64 s[30:31]
109109;
110110; VI-LABEL: bzhi32_c0:
111111; VI: ; %bb.0:
112112; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113113; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
114- ; VI-NEXT: v_lshrrev_b32_e64 v1 , v1, -1
115- ; VI-NEXT: v_and_b32_e32 v0, v1, v0
114+ ; VI-NEXT: v_lshlrev_b32_e32 v0 , v1, v0
115+ ; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
116116; VI-NEXT: s_setpc_b64 s[30:31]
117117 %numhighbits = sub i32 32 , %numlowbits
118118 %mask = lshr i32 -1 , %numhighbits
@@ -121,12 +121,23 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
121121}
122122
123123define i32 @bzhi32_c0_clamp (i32 %val , i32 %numlowbits ) nounwind {
124- ; GCN-LABEL: bzhi32_c0_clamp:
125- ; GCN: ; %bb.0:
126- ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127- ; GCN-NEXT: v_and_b32_e32 v1, 31, v1
128- ; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
129- ; GCN-NEXT: s_setpc_b64 s[30:31]
124+ ; SI-LABEL: bzhi32_c0_clamp:
125+ ; SI: ; %bb.0:
126+ ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127+ ; SI-NEXT: v_and_b32_e32 v1, 31, v1
128+ ; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
129+ ; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
130+ ; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
131+ ; SI-NEXT: s_setpc_b64 s[30:31]
132+ ;
133+ ; VI-LABEL: bzhi32_c0_clamp:
134+ ; VI: ; %bb.0:
135+ ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136+ ; VI-NEXT: v_and_b32_e32 v1, 31, v1
137+ ; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
138+ ; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
139+ ; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
140+ ; VI-NEXT: s_setpc_b64 s[30:31]
130141 %low5bits = and i32 %numlowbits , 31
131142 %numhighbits = sub i32 32 , %low5bits
132143 %mask = lshr i32 -1 , %numhighbits
@@ -139,16 +150,16 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
139150; SI: ; %bb.0:
140151; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141152; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
142- ; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
143- ; SI-NEXT: v_and_b32_e32 v0, v1, v0
153+ ; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
154+ ; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
144155; SI-NEXT: s_setpc_b64 s[30:31]
145156;
146157; VI-LABEL: bzhi32_c1_indexzext:
147158; VI: ; %bb.0:
148159; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149160; VI-NEXT: v_sub_u16_e32 v1, 32, v1
150- ; VI-NEXT: v_lshrrev_b32_e64 v1 , v1, -1
151- ; VI-NEXT: v_and_b32_e32 v0, v1, v0
161+ ; VI-NEXT: v_lshlrev_b32_e32 v0 , v1, v0
162+ ; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
152163; VI-NEXT: s_setpc_b64 s[30:31]
153164 %numhighbits = sub i8 32 , %numlowbits
154165 %sh_prom = zext i8 %numhighbits to i32
@@ -162,16 +173,16 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
162173; SI: ; %bb.0:
163174; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164175; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
165- ; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
166- ; SI-NEXT: v_and_b32_e32 v0, v0, v1
176+ ; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
177+ ; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
167178; SI-NEXT: s_setpc_b64 s[30:31]
168179;
169180; VI-LABEL: bzhi32_c4_commutative:
170181; VI: ; %bb.0:
171182; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172183; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
173- ; VI-NEXT: v_lshrrev_b32_e64 v1 , v1, -1
174- ; VI-NEXT: v_and_b32_e32 v0, v0, v1
184+ ; VI-NEXT: v_lshlrev_b32_e32 v0 , v1, v0
185+ ; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
175186; VI-NEXT: s_setpc_b64 s[30:31]
176187 %numhighbits = sub i32 32 , %numlowbits
177188 %mask = lshr i32 -1 , %numhighbits
0 commit comments