Skip to content

Commit ef08d31

Browse files
committed
fix fabs DemandedBits
1 parent 9949c6a commit ef08d31

File tree

4 files changed

+35
-34
lines changed

4 files changed

+35
-34
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2973,8 +2973,8 @@ bool TargetLowering::SimplifyDemandedBits(
29732973
if (!DemandedBits.intersects(SignMask))
29742974
return TLO.CombineTo(Op, Op0);
29752975

2976-
if (SimplifyDemandedBits(Op0, ~SignMask & DemandedBits, DemandedElts, Known,
2977-
TLO, Depth + 1))
2976+
if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
2977+
Depth + 1))
29782978
return true;
29792979

29802980
if (Known.isNonNegative())

llvm/test/CodeGen/AMDGPU/bf16.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18639,17 +18639,17 @@ define bfloat @v_fabs_bf16(bfloat %a) {
1863918639
; GCN: ; %bb.0:
1864018640
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1864118641
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
18642-
; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
18643-
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
18642+
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
18643+
; GCN-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
1864418644
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1864518645
; GCN-NEXT: s_setpc_b64 s[30:31]
1864618646
;
1864718647
; GFX7-LABEL: v_fabs_bf16:
1864818648
; GFX7: ; %bb.0:
1864918649
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1865018650
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
18651-
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
18652-
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
18651+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
18652+
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
1865318653
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1865418654
; GFX7-NEXT: s_setpc_b64 s[30:31]
1865518655
;
@@ -18832,8 +18832,8 @@ define bfloat @v_fneg_fabs_bf16(bfloat %a) {
1883218832
; GCN: ; %bb.0:
1883318833
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1883418834
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
18835-
; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
18836-
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
18835+
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
18836+
; GCN-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
1883718837
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1883818838
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
1883918839
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
@@ -18843,8 +18843,8 @@ define bfloat @v_fneg_fabs_bf16(bfloat %a) {
1884318843
; GFX7: ; %bb.0:
1884418844
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1884518845
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
18846-
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
18847-
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
18846+
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
18847+
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
1884818848
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
1884918849
; GFX7-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
1885018850
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
@@ -18889,23 +18889,23 @@ define amdgpu_ps i32 @s_fneg_fabs_bf16(bfloat inreg %a) {
1888918889
; GCN-LABEL: s_fneg_fabs_bf16:
1889018890
; GCN: ; %bb.0:
1889118891
; GCN-NEXT: v_mul_f32_e64 v0, 1.0, s0
18892-
; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
18893-
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
18894-
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
18895-
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
18896-
; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1889718892
; GCN-NEXT: v_readfirstlane_b32 s0, v0
18893+
; GCN-NEXT: s_and_b32 s0, s0, 0xffff0000
18894+
; GCN-NEXT: s_bitset0_b32 s0, 31
18895+
; GCN-NEXT: s_and_b32 s0, s0, 0xffff0000
18896+
; GCN-NEXT: s_xor_b32 s0, s0, 0x80000000
18897+
; GCN-NEXT: s_lshr_b32 s0, s0, 16
1889818898
; GCN-NEXT: ; return to shader part epilog
1889918899
;
1890018900
; GFX7-LABEL: s_fneg_fabs_bf16:
1890118901
; GFX7: ; %bb.0:
1890218902
; GFX7-NEXT: v_mul_f32_e64 v0, 1.0, s0
18903-
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
18904-
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
18905-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
18906-
; GFX7-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
18907-
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1890818903
; GFX7-NEXT: v_readfirstlane_b32 s0, v0
18904+
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff0000
18905+
; GFX7-NEXT: s_bitset0_b32 s0, 31
18906+
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff0000
18907+
; GFX7-NEXT: s_xor_b32 s0, s0, 0x80000000
18908+
; GFX7-NEXT: s_lshr_b32 s0, s0, 16
1890918909
; GFX7-NEXT: ; return to shader part epilog
1891018910
;
1891118911
; GFX8-LABEL: s_fneg_fabs_bf16:

llvm/test/CodeGen/AMDGPU/fabs.bf16.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -220,10 +220,10 @@ define amdgpu_kernel void @s_fabs_v4bf16(ptr addrspace(1) %out, <4 x bfloat> %in
220220
; CI-NEXT: s_waitcnt lgkmcnt(0)
221221
; CI-NEXT: s_and_b32 s4, s3, 0xffff0000
222222
; CI-NEXT: s_lshl_b32 s3, s3, 16
223-
; CI-NEXT: s_and_b32 s5, s2, 0x7fff0000
223+
; CI-NEXT: s_and_b32 s5, s2, 0xffff0000
224224
; CI-NEXT: v_mul_f32_e64 v0, 1.0, |s4|
225225
; CI-NEXT: v_mul_f32_e64 v1, 1.0, |s3|
226-
; CI-NEXT: v_mul_f32_e64 v2, 1.0, s5
226+
; CI-NEXT: v_mul_f32_e64 v2, 1.0, |s5|
227227
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
228228
; CI-NEXT: s_lshl_b32 s2, s2, 16
229229
; CI-NEXT: v_alignbit_b32 v1, v0, v1, 16
@@ -944,9 +944,9 @@ define amdgpu_kernel void @v_extract_fabs_fold_v2bf16(ptr addrspace(1) %in) #0 {
944944
; CI-NEXT: flat_load_dword v0, v[0:1]
945945
; CI-NEXT: s_waitcnt vmcnt(0)
946946
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v0
947-
; CI-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
947+
; CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
948948
; CI-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
949-
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
949+
; CI-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
950950
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
951951
; CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
952952
; CI-NEXT: v_mul_f32_e32 v1, 4.0, v1

llvm/test/CodeGen/AMDGPU/fneg-fabs.bf16.ll

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -481,16 +481,17 @@ define amdgpu_kernel void @s_fneg_fabs_v2bf16_non_bc_src(ptr addrspace(1) %out,
481481
; CI-NEXT: s_lshl_b32 s2, s2, 16
482482
; CI-NEXT: v_add_f32_e64 v0, s3, 2.0
483483
; CI-NEXT: v_add_f32_e64 v1, s2, 1.0
484-
; CI-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
485-
; CI-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
486-
; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0
487-
; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1
484+
; CI-NEXT: v_readfirstlane_b32 s2, v0
485+
; CI-NEXT: s_and_b32 s2, s2, 0xffff0000
486+
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
487+
; CI-NEXT: s_bitset0_b32 s2, 31
488+
; CI-NEXT: v_and_b32_e32 v0, 0x7fffffff, v1
489+
; CI-NEXT: s_and_b32 s2, s2, 0xffff0000
490+
; CI-NEXT: s_xor_b32 s2, s2, 0x80000000
488491
; CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
492+
; CI-NEXT: s_lshr_b32 s2, s2, 16
489493
; CI-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
490-
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
491-
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
492-
; CI-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
493-
; CI-NEXT: v_alignbit_b32 v2, v0, v1, 16
494+
; CI-NEXT: v_alignbit_b32 v2, s2, v0, 16
494495
; CI-NEXT: v_mov_b32_e32 v0, s0
495496
; CI-NEXT: v_mov_b32_e32 v1, s1
496497
; CI-NEXT: flat_store_dword v[0:1], v2
@@ -675,8 +676,8 @@ define amdgpu_kernel void @fneg_fabs_v4bf16(ptr addrspace(1) %out, <4 x bfloat>
675676
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
676677
; CI-NEXT: s_waitcnt lgkmcnt(0)
677678
; CI-NEXT: s_lshl_b32 s4, s2, 16
678-
; CI-NEXT: s_and_b32 s2, s2, 0x7fff0000
679-
; CI-NEXT: v_mul_f32_e64 v2, 1.0, s2
679+
; CI-NEXT: s_and_b32 s2, s2, 0xffff0000
680+
; CI-NEXT: v_mul_f32_e64 v2, 1.0, |s2|
680681
; CI-NEXT: s_and_b32 s2, s3, 0xffff0000
681682
; CI-NEXT: s_lshl_b32 s5, s3, 16
682683
; CI-NEXT: v_mul_f32_e64 v3, 1.0, |s2|

0 commit comments

Comments
 (0)