@@ -1920,9 +1920,7 @@ define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo_scalar_hi(ptr addrspa
19201920; PACKED-GISEL-NEXT: ds_read_b32 v5, v5 offset:8
19211921; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
19221922; PACKED-GISEL-NEXT: v_pk_mul_f32 v[4:5], 1.0, v[4:5] op_sel_hi:[0,1]
1923- ; PACKED-GISEL-NEXT: v_xor_b32_e32 v4, 0x80000000, v4
1924- ; PACKED-GISEL-NEXT: v_xor_b32_e32 v5, 0x80000000, v5
1925- ; PACKED-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], v[2:3], v[4:5]
1923+ ; PACKED-GISEL-NEXT: v_pk_fma_f32 v[0:1], v[0:1], v[2:3], v[4:5] neg_lo:[0,0,1] neg_hi:[0,0,1]
19261924; PACKED-GISEL-NEXT: v_mov_b32_e32 v2, 0
19271925; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
19281926; PACKED-GISEL-NEXT: s_endpgm
@@ -1984,9 +1982,7 @@ define amdgpu_kernel void @shuffle_add_f32(ptr addrspace(1) %out, ptr addrspace(
19841982; PACKED-GISEL-NEXT: ds_read_b64 v[2:3], v2 offset:8
19851983; PACKED-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
19861984; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1987- ; PACKED-GISEL-NEXT: v_mov_b32_e32 v4, v3
1988- ; PACKED-GISEL-NEXT: v_mov_b32_e32 v5, v2
1989- ; PACKED-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], v[4:5]
1985+ ; PACKED-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], v[2:3] op_sel:[0,1] op_sel_hi:[1,0]
19901986; PACKED-GISEL-NEXT: v_mov_b32_e32 v2, 0
19911987; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
19921988; PACKED-GISEL-NEXT: s_endpgm
@@ -2048,12 +2044,8 @@ define amdgpu_kernel void @shuffle_neg_add_f32(ptr addrspace(1) %out, ptr addrsp
20482044; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
20492045; PACKED-GISEL-NEXT: ds_read_b64 v[2:3], v2 offset:8
20502046; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2051- ; PACKED-GISEL-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
2052- ; PACKED-GISEL-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
2053- ; PACKED-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1]
2054- ; PACKED-GISEL-NEXT: v_mov_b32_e32 v4, v3
2055- ; PACKED-GISEL-NEXT: v_mov_b32_e32 v5, v2
2056- ; PACKED-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], v[4:5]
2047+ ; PACKED-GISEL-NEXT: v_pk_mul_f32 v[2:3], 1.0, v[2:3] op_sel_hi:[0,1] neg_lo:[0,1] neg_hi:[0,1]
2048+ ; PACKED-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], v[2:3] op_sel:[0,1] op_sel_hi:[1,0]
20572049; PACKED-GISEL-NEXT: v_mov_b32_e32 v2, 0
20582050; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
20592051; PACKED-GISEL-NEXT: s_endpgm
@@ -2283,9 +2275,7 @@ define amdgpu_kernel void @fneg_v2f32_vec(ptr addrspace(1) %a) {
22832275; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
22842276; PACKED-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
22852277; PACKED-GISEL-NEXT: s_waitcnt vmcnt(0)
2286- ; PACKED-GISEL-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
2287- ; PACKED-GISEL-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
2288- ; PACKED-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1]
2278+ ; PACKED-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, v[0:1] op_sel_hi:[0,1] neg_lo:[0,1] neg_hi:[0,1]
22892279; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
22902280; PACKED-GISEL-NEXT: s_endpgm
22912281 %id = tail call i32 @llvm.amdgcn.workitem.id.x ()
@@ -2326,9 +2316,7 @@ define amdgpu_kernel void @fneg_v2f32_scalar(ptr addrspace(1) %a, <2 x float> %x
23262316; PACKED-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
23272317; PACKED-GISEL-NEXT: v_mov_b32_e32 v2, 0
23282318; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2329- ; PACKED-GISEL-NEXT: s_xor_b32 s2, s2, 0x80000000
2330- ; PACKED-GISEL-NEXT: s_xor_b32 s3, s3, 0x80000000
2331- ; PACKED-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, s[2:3] op_sel_hi:[0,1]
2319+ ; PACKED-GISEL-NEXT: v_pk_mul_f32 v[0:1], 1.0, s[2:3] op_sel_hi:[0,1] neg_lo:[0,1] neg_hi:[0,1]
23322320; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
23332321; PACKED-GISEL-NEXT: s_endpgm
23342322 %fneg = fsub <2 x float > <float -0 .0 , float -0 .0 >, %x
0 commit comments