Skip to content

Commit b247c47

Browse files
committed
update fabs
1 parent d6e880b commit b247c47

File tree

8 files changed

+50
-63
lines changed

8 files changed

+50
-63
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19007,11 +19007,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
1900719007
if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
1900819008
return C;
1900919009

19010-
// fold (fabs (fabs x)) -> (fabs x)
19011-
if (N0.getOpcode() == ISD::FABS)
19012-
return N->getOperand(0);
19013-
19014-
if (SimplifyDemandedBits(N0))
19010+
if (SimplifyDemandedBits(SDValue(N, 0)))
1901519011
return SDValue(N, 0);
1901619012

1901719013
if (SDValue Cast = foldSignChangeInBitcast(N))

llvm/test/CodeGen/AMDGPU/bf16.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18639,17 +18639,17 @@ define bfloat @v_fabs_bf16(bfloat %a) {
1863918639
; GCN: ; %bb.0:
1864018640
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1864118641
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
18642-
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
18643-
; GCN-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
18642+
; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
18643+
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
1864418644
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1864518645
; GCN-NEXT: s_setpc_b64 s[30:31]
1864618646
;
1864718647
; GFX7-LABEL: v_fabs_bf16:
1864818648
; GFX7: ; %bb.0:
1864918649
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1865018650
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
18651-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
18652-
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
18651+
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
18652+
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
1865318653
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1865418654
; GFX7-NEXT: s_setpc_b64 s[30:31]
1865518655
;
@@ -18832,8 +18832,8 @@ define bfloat @v_fneg_fabs_bf16(bfloat %a) {
1883218832
; GCN: ; %bb.0:
1883318833
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1883418834
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
18835-
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
18836-
; GCN-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
18835+
; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
18836+
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
1883718837
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1883818838
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
1883918839
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
@@ -18843,8 +18843,8 @@ define bfloat @v_fneg_fabs_bf16(bfloat %a) {
1884318843
; GFX7: ; %bb.0:
1884418844
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1884518845
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
18846-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
18847-
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
18846+
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
18847+
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
1884818848
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1884918849
; GFX7-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
1885018850
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
@@ -18889,23 +18889,23 @@ define amdgpu_ps i32 @s_fneg_fabs_bf16(bfloat inreg %a) {
1888918889
; GCN-LABEL: s_fneg_fabs_bf16:
1889018890
; GCN: ; %bb.0:
1889118891
; GCN-NEXT: v_mul_f32_e64 v0, 1.0, s0
18892+
; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
18893+
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
18894+
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
18895+
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
18896+
; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1889218897
; GCN-NEXT: v_readfirstlane_b32 s0, v0
18893-
; GCN-NEXT: s_and_b32 s0, s0, 0xffff0000
18894-
; GCN-NEXT: s_bitset0_b32 s0, 31
18895-
; GCN-NEXT: s_and_b32 s0, s0, 0xffff0000
18896-
; GCN-NEXT: s_xor_b32 s0, s0, 0x80000000
18897-
; GCN-NEXT: s_lshr_b32 s0, s0, 16
1889818898
; GCN-NEXT: ; return to shader part epilog
1889918899
;
1890018900
; GFX7-LABEL: s_fneg_fabs_bf16:
1890118901
; GFX7: ; %bb.0:
1890218902
; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, s0
18903+
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
18904+
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
18905+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
18906+
; GFX7-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
18907+
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1890318908
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
18904-
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff0000
18905-
; GFX7-NEXT: s_bitset0_b32 s0, 31
18906-
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff0000
18907-
; GFX7-NEXT: s_xor_b32 s0, s0, 0x80000000
18908-
; GFX7-NEXT: s_lshr_b32 s0, s0, 16
1890918909
; GFX7-NEXT: ; return to shader part epilog
1891018910
;
1891118911
; GFX8-LABEL: s_fneg_fabs_bf16:

llvm/test/CodeGen/AMDGPU/fabs.bf16.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -220,10 +220,10 @@ define amdgpu_kernel void @s_fabs_v4bf16(ptr addrspace(1) %out, <4 x bfloat> %in
220220
; CI-NEXT: s_waitcnt lgkmcnt(0)
221221
; CI-NEXT: s_and_b32 s4, s3, 0xffff0000
222222
; CI-NEXT: s_lshl_b32 s3, s3, 16
223-
; CI-NEXT: s_and_b32 s5, s2, 0xffff0000
223+
; CI-NEXT: s_and_b32 s5, s2, 0x7fff0000
224224
; CI-NEXT: v_mul_f32_e64 v0, 1.0, |s4|
225225
; CI-NEXT: v_mul_f32_e64 v1, 1.0, |s3|
226-
; CI-NEXT: v_mul_f32_e64 v2, 1.0, |s5|
226+
; CI-NEXT: v_mul_f32_e64 v2, 1.0, s5
227227
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
228228
; CI-NEXT: s_lshl_b32 s2, s2, 16
229229
; CI-NEXT: v_alignbit_b32 v1, v0, v1, 16
@@ -944,9 +944,9 @@ define amdgpu_kernel void @v_extract_fabs_fold_v2bf16(ptr addrspace(1) %in) #0 {
944944
; CI-NEXT: flat_load_dword v0, v[0:1]
945945
; CI-NEXT: s_waitcnt vmcnt(0)
946946
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v0
947-
; CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
947+
; CI-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
948948
; CI-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
949-
; CI-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
949+
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
950950
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
951951
; CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
952952
; CI-NEXT: v_mul_f32_e32 v1, 4.0, v1

llvm/test/CodeGen/AMDGPU/fneg-fabs.bf16.ll

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -481,17 +481,16 @@ define amdgpu_kernel void @s_fneg_fabs_v2bf16_non_bc_src(ptr addrspace(1) %out,
481481
; CI-NEXT: s_lshl_b32 s2, s2, 16
482482
; CI-NEXT: v_add_f32_e64 v0, s3, 2.0
483483
; CI-NEXT: v_add_f32_e64 v1, s2, 1.0
484-
; CI-NEXT: v_readfirstlane_b32 s2, v0
485-
; CI-NEXT: s_and_b32 s2, s2, 0xffff0000
486-
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
487-
; CI-NEXT: s_bitset0_b32 s2, 31
488-
; CI-NEXT: v_and_b32_e32 v0, 0x7fffffff, v1
489-
; CI-NEXT: s_and_b32 s2, s2, 0xffff0000
490-
; CI-NEXT: s_xor_b32 s2, s2, 0x80000000
484+
; CI-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
485+
; CI-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
486+
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
487+
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
491488
; CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
492-
; CI-NEXT: s_lshr_b32 s2, s2, 16
493489
; CI-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
494-
; CI-NEXT: v_alignbit_b32 v2, s2, v0, 16
490+
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
491+
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
492+
; CI-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
493+
; CI-NEXT: v_alignbit_b32 v2, v0, v1, 16
495494
; CI-NEXT: v_mov_b32_e32 v0, s0
496495
; CI-NEXT: v_mov_b32_e32 v1, s1
497496
; CI-NEXT: flat_store_dword v[0:1], v2
@@ -676,8 +675,8 @@ define amdgpu_kernel void @fneg_fabs_v4bf16(ptr addrspace(1) %out, <4 x bfloat>
676675
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
677676
; CI-NEXT: s_waitcnt lgkmcnt(0)
678677
; CI-NEXT: s_lshl_b32 s4, s2, 16
679-
; CI-NEXT: s_and_b32 s2, s2, 0xffff0000
680-
; CI-NEXT: v_mul_f32_e64 v2, 1.0, |s2|
678+
; CI-NEXT: s_and_b32 s2, s2, 0x7fff0000
679+
; CI-NEXT: v_mul_f32_e64 v2, 1.0, s2
681680
; CI-NEXT: s_and_b32 s2, s3, 0xffff0000
682681
; CI-NEXT: s_lshl_b32 s5, s3, 16
683682
; CI-NEXT: v_mul_f32_e64 v3, 1.0, |s2|

llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,7 @@ define half @add_select_fabs_negk_negk_f16(i32 %c, half %x) {
452452
; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
453453
; CI-NEXT: v_cndmask_b32_e64 v0, -1.0, -2.0, vcc
454454
; CI-NEXT: v_cvt_f32_f16_e32 v1, v1
455-
; CI-NEXT: v_add_f32_e64 v0, |v0|, v1
455+
; CI-NEXT: v_sub_f32_e32 v0, v1, v0
456456
; CI-NEXT: s_setpc_b64 s[30:31]
457457
;
458458
; VI-LABEL: add_select_fabs_negk_negk_f16:
@@ -462,7 +462,7 @@ define half @add_select_fabs_negk_negk_f16(i32 %c, half %x) {
462462
; VI-NEXT: v_mov_b32_e32 v3, 0xc000
463463
; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
464464
; VI-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
465-
; VI-NEXT: v_add_f16_e64 v0, |v0|, v1
465+
; VI-NEXT: v_sub_f16_e32 v0, v1, v0
466466
; VI-NEXT: s_setpc_b64 s[30:31]
467467
;
468468
; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_negk_f16:
@@ -472,7 +472,7 @@ define half @add_select_fabs_negk_negk_f16(i32 %c, half %x) {
472472
; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
473473
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
474474
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
475-
; GFX11-SAFE-TRUE16-NEXT: v_add_f16_e64 v0.l, |v0.l|, v1.l
475+
; GFX11-SAFE-TRUE16-NEXT: v_sub_f16_e32 v0.l, v1.l, v0.l
476476
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
477477
;
478478
; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_negk_f16:
@@ -482,7 +482,7 @@ define half @add_select_fabs_negk_negk_f16(i32 %c, half %x) {
482482
; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
483483
; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
484484
; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
485-
; GFX11-SAFE-FAKE16-NEXT: v_add_f16_e64 v0, |v0|, v1
485+
; GFX11-SAFE-FAKE16-NEXT: v_sub_f16_e32 v0, v1, v0
486486
; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
487487
;
488488
; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_negk_f16:
@@ -492,7 +492,7 @@ define half @add_select_fabs_negk_negk_f16(i32 %c, half %x) {
492492
; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
493493
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
494494
; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
495-
; GFX11-NSZ-TRUE16-NEXT: v_add_f16_e64 v0.l, |v0.l|, v1.l
495+
; GFX11-NSZ-TRUE16-NEXT: v_sub_f16_e32 v0.l, v1.l, v0.l
496496
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
497497
;
498498
; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_negk_f16:
@@ -502,7 +502,7 @@ define half @add_select_fabs_negk_negk_f16(i32 %c, half %x) {
502502
; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
503503
; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
504504
; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
505-
; GFX11-NSZ-FAKE16-NEXT: v_add_f16_e64 v0, |v0|, v1
505+
; GFX11-NSZ-FAKE16-NEXT: v_sub_f16_e32 v0, v1, v0
506506
; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
507507
%cmp = icmp eq i32 %c, 0
508508
%select = select i1 %cmp, half -2.0, half -1.0

llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,11 @@ define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 {
132132
ret void
133133
}
134134

135-
; FIXME: fabs should fold away
136135
; GCN-LABEL: {{^}}add_select_fabs_negk_negk_f32:
137136
; GCN: buffer_load_dword [[X:v[0-9]+]]
138137

139138
; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
140-
; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[X]]
139+
; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]]
141140
define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) #0 {
142141
%x = load volatile float, ptr addrspace(1) poison
143142
%cmp = icmp eq i32 %c, 0

llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -812,8 +812,7 @@ define <2 x half> @add_select_fabs_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
812812
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
813813
; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
814814
; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1
815-
; GFX9-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
816-
; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
815+
; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
817816
; GFX9-NEXT: s_setpc_b64 s[30:31]
818817
;
819818
; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
@@ -827,9 +826,7 @@ define <2 x half> @add_select_fabs_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
827826
; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v3.l, s0
828827
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
829828
; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
830-
; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
831-
; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
832-
; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
829+
; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
833830
; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
834831
;
835832
; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
@@ -842,9 +839,8 @@ define <2 x half> @add_select_fabs_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
842839
; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
843840
; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
844841
; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
845-
; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
846-
; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
847-
; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
842+
; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
843+
; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
848844
; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
849845
;
850846
; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
@@ -858,9 +854,7 @@ define <2 x half> @add_select_fabs_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
858854
; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v3.l, s0
859855
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
860856
; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
861-
; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
862-
; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
863-
; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
857+
; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
864858
; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
865859
;
866860
; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
@@ -873,9 +867,8 @@ define <2 x half> @add_select_fabs_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
873867
; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
874868
; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
875869
; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
876-
; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
877-
; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
878-
; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
870+
; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
871+
; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
879872
; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
880873
%cmp = icmp eq <2 x i32> %c, zeroinitializer
881874
%select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>

llvm/test/CodeGen/AMDGPU/udiv.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,7 +1515,7 @@ define amdgpu_kernel void @v_udiv_i8(ptr addrspace(1) %out, ptr addrspace(1) %in
15151515
; EG-NEXT: TRUNC * T0.W, PV.W,
15161516
; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.X,
15171517
; EG-NEXT: TRUNC * T0.W, PV.W,
1518-
; EG-NEXT: SETGE * T1.W, |PV.W|, |T0.Y|,
1518+
; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y,
15191519
; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x,
15201520
; EG-NEXT: FLT_TO_UINT * T0.X, T0.W,
15211521
; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
@@ -1658,7 +1658,7 @@ define amdgpu_kernel void @v_udiv_i16(ptr addrspace(1) %out, ptr addrspace(1) %i
16581658
; EG-NEXT: TRUNC * T0.W, PV.W,
16591659
; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.X,
16601660
; EG-NEXT: TRUNC * T0.W, PV.W,
1661-
; EG-NEXT: SETGE * T1.W, |PV.W|, |T0.Y|,
1661+
; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y,
16621662
; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x,
16631663
; EG-NEXT: FLT_TO_UINT * T0.X, T0.W,
16641664
; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
@@ -1858,7 +1858,7 @@ define amdgpu_kernel void @v_udiv_i23(ptr addrspace(1) %out, ptr addrspace(1) %i
18581858
; EG-NEXT: TRUNC * T0.W, PV.W,
18591859
; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.X, T0.Z,
18601860
; EG-NEXT: TRUNC * T0.W, PV.W,
1861-
; EG-NEXT: SETGE * T1.W, |PV.W|, |T0.X|,
1861+
; EG-NEXT: SETGE * T1.W, |PV.W|, T0.X,
18621862
; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x,
18631863
; EG-NEXT: FLT_TO_UINT * T0.X, T0.W,
18641864
; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)

0 commit comments

Comments
 (0)