Skip to content

Commit 068ba30

Browse files
committed
fix condition
1 parent 0ed5341 commit 068ba30

File tree

3 files changed

+37
-17
lines changed

3 files changed

+37
-17
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,6 @@ bool AMDGPUCombinerHelper::matchConstantIs32BitMask(Register Reg) const {
528528
if (!isShiftedMask_64(Val, MaskIdx, MaskLen))
529529
return false;
530530

531-
// Check if high 32 bits or low 32 bits are all ones.
532-
return (MaskLen == 64 - MaskIdx) || (MaskIdx == 0 && MaskLen >= 32);
531+
// Check if low 32 bits or high 32 bits are all ones.
532+
return MaskLen >= 32 && ((MaskIdx == 0) || (MaskIdx == 64 - MaskLen));
533533
}

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-binop-s64-with-s32-mask.mir

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,26 @@ body: |
6666
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
6767
...
6868
---
69+
name: test_and_mask_hi_16bit_mask_rhs
70+
tracksRegLiveness: true
71+
body: |
72+
bb.0:
73+
liveins: $sgpr0_sgpr1, $sgpr2
74+
; CHECK-LABEL: name: test_and_mask_hi_16bit_mask_rhs
75+
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
76+
; CHECK-NEXT: {{ $}}
77+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
78+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -281474976710656
79+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
80+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY [[AND]](s64)
81+
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
82+
%0:_(s64) = COPY $sgpr0_sgpr1
83+
%1:_(s64) = G_CONSTANT i64 -281474976710656
84+
%2:_(s64) = G_AND %0, %1
85+
$sgpr0_sgpr1 = COPY %2(s64)
86+
SI_RETURN_TO_EPILOG implicit $sgpr0_sgpr1
87+
...
88+
---
6989
name: test_and_mask_lo_rhs
7090
tracksRegLiveness: true
7191
body: |

llvm/test/CodeGen/AMDGPU/lround.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,12 @@ define i32 @intrinsic_lround_i32_f64(double %arg) {
114114
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115115
; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
116116
; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000
117+
; GFX9-GISEL-NEXT: s_brev_b32 s4, 1
117118
; GFX9-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
118-
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
119+
; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
119120
; GFX9-GISEL-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
120-
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v5, 1
121121
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
122-
; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, v5, v4
122+
; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, s4, v4
123123
; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1]
124124
; GFX9-GISEL-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
125125
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -142,7 +142,7 @@ define i32 @intrinsic_lround_i32_f64(double %arg) {
142142
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143143
; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
144144
; GFX10-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
145-
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, 0
145+
; GFX10-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
146146
; GFX10-GISEL-NEXT: v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
147147
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
148148
; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v4
@@ -172,7 +172,7 @@ define i32 @intrinsic_lround_i32_f64(double %arg) {
172172
; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
173173
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
174174
; GFX11-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
175-
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
175+
; GFX11-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
176176
; GFX11-GISEL-NEXT: v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
177177
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
178178
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
@@ -372,12 +372,12 @@ define i64 @intrinsic_lround_i64_f64(double %arg) {
372372
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373373
; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
374374
; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000
375-
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v7, 1
375+
; GFX9-GISEL-NEXT: s_brev_b32 s4, 1
376376
; GFX9-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
377-
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
377+
; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
378378
; GFX9-GISEL-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
379379
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
380-
; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, v7, v4
380+
; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, s4, v4
381381
; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1]
382382
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
383383
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3df00000
@@ -414,7 +414,7 @@ define i64 @intrinsic_lround_i64_f64(double %arg) {
414414
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415415
; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
416416
; GFX10-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
417-
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, 0
417+
; GFX10-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
418418
; GFX10-GISEL-NEXT: v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
419419
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
420420
; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v4
@@ -456,7 +456,7 @@ define i64 @intrinsic_lround_i64_f64(double %arg) {
456456
; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
457457
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
458458
; GFX11-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
459-
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
459+
; GFX11-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
460460
; GFX11-GISEL-NEXT: v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
461461
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
462462
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
@@ -663,12 +663,12 @@ define i64 @intrinsic_llround_i64_f64(double %arg) {
663663
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
664664
; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
665665
; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000
666-
; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v7, 1
666+
; GFX9-GISEL-NEXT: s_brev_b32 s4, 1
667667
; GFX9-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
668-
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
668+
; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
669669
; GFX9-GISEL-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
670670
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc
671-
; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, v7, v4
671+
; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, s4, v4
672672
; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1]
673673
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
674674
; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3df00000
@@ -705,7 +705,7 @@ define i64 @intrinsic_llround_i64_f64(double %arg) {
705705
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
706706
; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
707707
; GFX10-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
708-
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, 0
708+
; GFX10-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
709709
; GFX10-GISEL-NEXT: v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
710710
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
711711
; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v4
@@ -747,7 +747,7 @@ define i64 @intrinsic_llround_i64_f64(double %arg) {
747747
; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1]
748748
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
749749
; GFX11-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3]
750-
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0
750+
; GFX11-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0
751751
; GFX11-GISEL-NEXT: v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
752752
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
753753
; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0

0 commit comments

Comments
 (0)