Skip to content

Commit f4c47fe

Browse files
committed
fix tests
1 parent 7b544d4 commit f4c47fe

File tree

3 files changed

+24
-19
lines changed

3 files changed

+24
-19
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2991,14 +2991,20 @@ bool TargetLowering::SimplifyDemandedBits(
29912991
case ISD::FCOPYSIGN: {
29922992
SDValue Op0 = Op.getOperand(0);
29932993
SDValue Op1 = Op.getOperand(1);
2994-
APInt SignMask = APInt::getSignMask(BitWidth);
29952994

2996-
if (!DemandedBits.intersects(SignMask))
2995+
unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
2996+
unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
2997+
APInt SignMask0 = APInt::getSignMask(BitWidth0);
2998+
APInt SignMask1 = APInt::getSignMask(BitWidth1);
2999+
3000+
if (!DemandedBits.intersects(SignMask0))
29973001
return TLO.CombineTo(Op, Op0);
29983002

2999-
if (SimplifyDemandedBits(Op0, ~SignMask & DemandedBits, DemandedElts, Known,
3000-
TLO, Depth + 1) ||
3001-
SimplifyDemandedBits(Op1, SignMask, DemandedElts, Known2, TLO,
3003+
APInt ScalarDemandedBits = DemandedBits.trunc(BitWidth0);
3004+
3005+
if (SimplifyDemandedBits(Op0, ~SignMask0 & ScalarDemandedBits, DemandedElts,
3006+
Known, TLO, Depth + 1) ||
3007+
SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
30023008
Depth + 1))
30033009
return true;
30043010

@@ -3011,8 +3017,8 @@ bool TargetLowering::SimplifyDemandedBits(
30113017
Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
30123018

30133019
if (Known2.isNegative()) {
3014-
Known.One |= SignMask;
3015-
Known.Zero &= ~SignMask;
3020+
Known.One |= SignMask0;
3021+
Known.Zero &= ~SignMask0;
30163022
}
30173023

30183024
break;

llvm/test/CodeGen/AMDGPU/fabs.bf16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ define amdgpu_kernel void @s_fabs_v4bf16(ptr addrspace(1) %out, <4 x bfloat> %in
220220
; CI-NEXT: s_waitcnt lgkmcnt(0)
221221
; CI-NEXT: s_and_b32 s4, s3, 0xffff0000
222222
; CI-NEXT: s_lshl_b32 s3, s3, 16
223-
; CI-NEXT: s_and_b32 s5, s2, 0xffff0000
223+
; CI-NEXT: s_and_b32 s5, s2, 0x7fff0000
224224
; CI-NEXT: v_mul_f32_e64 v0, 1.0, |s4|
225225
; CI-NEXT: v_mul_f32_e64 v1, 1.0, |s3|
226226
; CI-NEXT: v_mul_f32_e64 v2, 1.0, |s5|
@@ -944,7 +944,7 @@ define amdgpu_kernel void @v_extract_fabs_fold_v2bf16(ptr addrspace(1) %in) #0 {
944944
; CI-NEXT: flat_load_dword v0, v[0:1]
945945
; CI-NEXT: s_waitcnt vmcnt(0)
946946
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v0
947-
; CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
947+
; CI-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
948948
; CI-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
949949
; CI-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
950950
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1

llvm/test/CodeGen/AMDGPU/fneg-fabs.bf16.ll

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -481,17 +481,16 @@ define amdgpu_kernel void @s_fneg_fabs_v2bf16_non_bc_src(ptr addrspace(1) %out,
481481
; CI-NEXT: s_lshl_b32 s2, s2, 16
482482
; CI-NEXT: v_add_f32_e64 v0, s3, 2.0
483483
; CI-NEXT: v_add_f32_e64 v1, s2, 1.0
484-
; CI-NEXT: v_readfirstlane_b32 s2, v0
485-
; CI-NEXT: s_and_b32 s2, s2, 0xffff0000
486-
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
487-
; CI-NEXT: s_bitset0_b32 s2, 31
488-
; CI-NEXT: v_and_b32_e32 v0, 0x7fffffff, v1
489-
; CI-NEXT: s_and_b32 s2, s2, 0xffff0000
490-
; CI-NEXT: s_xor_b32 s2, s2, 0x80000000
484+
; CI-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
485+
; CI-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
486+
; CI-NEXT: v_mul_f32_e64 v0, 1.0, |v0|
487+
; CI-NEXT: v_mul_f32_e64 v1, 1.0, |v1|
491488
; CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
492-
; CI-NEXT: s_lshr_b32 s2, s2, 16
493489
; CI-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
494-
; CI-NEXT: v_alignbit_b32 v2, s2, v0, 16
490+
; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
491+
; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
492+
; CI-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
493+
; CI-NEXT: v_alignbit_b32 v2, v0, v1, 16
495494
; CI-NEXT: v_mov_b32_e32 v0, s0
496495
; CI-NEXT: v_mov_b32_e32 v1, s1
497496
; CI-NEXT: flat_store_dword v[0:1], v2
@@ -676,7 +675,7 @@ define amdgpu_kernel void @fneg_fabs_v4bf16(ptr addrspace(1) %out, <4 x bfloat>
676675
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
677676
; CI-NEXT: s_waitcnt lgkmcnt(0)
678677
; CI-NEXT: s_lshl_b32 s4, s2, 16
679-
; CI-NEXT: s_and_b32 s2, s2, 0xffff0000
678+
; CI-NEXT: s_and_b32 s2, s2, 0x7fff0000
680679
; CI-NEXT: v_mul_f32_e64 v2, 1.0, |s2|
681680
; CI-NEXT: s_and_b32 s2, s3, 0xffff0000
682681
; CI-NEXT: s_lshl_b32 s5, s3, 16

0 commit comments

Comments
 (0)