@@ -99,15 +99,13 @@ define i32 @v_orn2_i32(i32 %src0, i32 %src1) {
9999; GCN-LABEL: v_orn2_i32:
100100; GCN: ; %bb.0:
101101; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102- ; GCN-NEXT: v_not_b32_e32 v1, v1
103- ; GCN-NEXT: v_or_b32_e32 v0, v0, v1
102+ ; GCN-NEXT: v_bfi_b32 v0, v1, v0, -1
104103; GCN-NEXT: s_setpc_b64 s[30:31]
105104;
106105; GFX10PLUS-LABEL: v_orn2_i32:
107106; GFX10PLUS: ; %bb.0:
108107; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109- ; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1
110- ; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v1
108+ ; GFX10PLUS-NEXT: v_bfi_b32 v0, v1, v0, -1
111109; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
112110 %not.src1 = xor i32 %src1 , -1
113111 %or = or i32 %src0 , %not.src1
@@ -117,14 +115,12 @@ define i32 @v_orn2_i32(i32 %src0, i32 %src1) {
117115define amdgpu_ps float @v_orn2_i32_sv (i32 inreg %src0 , i32 %src1 ) {
118116; GCN-LABEL: v_orn2_i32_sv:
119117; GCN: ; %bb.0:
120- ; GCN-NEXT: v_not_b32_e32 v0, v0
121- ; GCN-NEXT: v_or_b32_e32 v0, s2, v0
118+ ; GCN-NEXT: v_bfi_b32 v0, v0, s2, -1
122119; GCN-NEXT: ; return to shader part epilog
123120;
124121; GFX10PLUS-LABEL: v_orn2_i32_sv:
125122; GFX10PLUS: ; %bb.0:
126- ; GFX10PLUS-NEXT: v_not_b32_e32 v0, v0
127- ; GFX10PLUS-NEXT: v_or_b32_e32 v0, s2, v0
123+ ; GFX10PLUS-NEXT: v_bfi_b32 v0, v0, s2, -1
128124; GFX10PLUS-NEXT: ; return to shader part epilog
129125 %not.src1 = xor i32 %src1 , -1
130126 %or = or i32 %src0 , %not.src1
@@ -135,14 +131,12 @@ define amdgpu_ps float @v_orn2_i32_sv(i32 inreg %src0, i32 %src1) {
135131define amdgpu_ps float @v_orn2_i32_vs (i32 %src0 , i32 inreg %src1 ) {
136132; GCN-LABEL: v_orn2_i32_vs:
137133; GCN: ; %bb.0:
138- ; GCN-NEXT: s_not_b32 s0, s2
139- ; GCN-NEXT: v_or_b32_e32 v0, s0, v0
134+ ; GCN-NEXT: v_bfi_b32 v0, s2, v0, -1
140135; GCN-NEXT: ; return to shader part epilog
141136;
142137; GFX10PLUS-LABEL: v_orn2_i32_vs:
143138; GFX10PLUS: ; %bb.0:
144- ; GFX10PLUS-NEXT: s_not_b32 s0, s2
145- ; GFX10PLUS-NEXT: v_or_b32_e32 v0, s0, v0
139+ ; GFX10PLUS-NEXT: v_bfi_b32 v0, s2, v0, -1
146140; GFX10PLUS-NEXT: ; return to shader part epilog
147141 %not.src1 = xor i32 %src1 , -1
148142 %or = or i32 %src0 , %not.src1
@@ -247,19 +241,15 @@ define i64 @v_orn2_i64(i64 %src0, i64 %src1) {
247241; GCN-LABEL: v_orn2_i64:
248242; GCN: ; %bb.0:
249243; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250- ; GCN-NEXT: v_not_b32_e32 v2, v2
251- ; GCN-NEXT: v_not_b32_e32 v3, v3
252- ; GCN-NEXT: v_or_b32_e32 v0, v0, v2
253- ; GCN-NEXT: v_or_b32_e32 v1, v1, v3
244+ ; GCN-NEXT: v_bfi_b32 v0, v2, v0, -1
245+ ; GCN-NEXT: v_bfi_b32 v1, v3, v1, -1
254246; GCN-NEXT: s_setpc_b64 s[30:31]
255247;
256248; GFX10PLUS-LABEL: v_orn2_i64:
257249; GFX10PLUS: ; %bb.0:
258250; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
259- ; GFX10PLUS-NEXT: v_not_b32_e32 v2, v2
260- ; GFX10PLUS-NEXT: v_not_b32_e32 v3, v3
261- ; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v2
262- ; GFX10PLUS-NEXT: v_or_b32_e32 v1, v1, v3
251+ ; GFX10PLUS-NEXT: v_bfi_b32 v0, v2, v0, -1
252+ ; GFX10PLUS-NEXT: v_bfi_b32 v1, v3, v1, -1
263253; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
264254 %not.src1 = xor i64 %src1 , -1
265255 %or = or i64 %src0 , %not.src1
@@ -269,18 +259,14 @@ define i64 @v_orn2_i64(i64 %src0, i64 %src1) {
269259define amdgpu_ps <2 x float > @v_orn2_i64_sv (i64 inreg %src0 , i64 %src1 ) {
270260; GCN-LABEL: v_orn2_i64_sv:
271261; GCN: ; %bb.0:
272- ; GCN-NEXT: v_not_b32_e32 v0, v0
273- ; GCN-NEXT: v_not_b32_e32 v1, v1
274- ; GCN-NEXT: v_or_b32_e32 v0, s2, v0
275- ; GCN-NEXT: v_or_b32_e32 v1, s3, v1
262+ ; GCN-NEXT: v_bfi_b32 v0, v0, s2, -1
263+ ; GCN-NEXT: v_bfi_b32 v1, v1, s3, -1
276264; GCN-NEXT: ; return to shader part epilog
277265;
278266; GFX10PLUS-LABEL: v_orn2_i64_sv:
279267; GFX10PLUS: ; %bb.0:
280- ; GFX10PLUS-NEXT: v_not_b32_e32 v0, v0
281- ; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1
282- ; GFX10PLUS-NEXT: v_or_b32_e32 v0, s2, v0
283- ; GFX10PLUS-NEXT: v_or_b32_e32 v1, s3, v1
268+ ; GFX10PLUS-NEXT: v_bfi_b32 v0, v0, s2, -1
269+ ; GFX10PLUS-NEXT: v_bfi_b32 v1, v1, s3, -1
284270; GFX10PLUS-NEXT: ; return to shader part epilog
285271 %not.src1 = xor i64 %src1 , -1
286272 %or = or i64 %src0 , %not.src1
0 commit comments