Skip to content

Commit 1ba6191

Browse files
committed
Do not substitute +0/-0 imm with register from comparison.
1 parent ed642aa commit 1ba6191

File tree

2 files changed

+183
-23
lines changed

2 files changed

+183
-23
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,11 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
14541454
if (!SubstOp || !SubstOp->isReg())
14551455
return false;
14561456

1457+
// Do not substitute +0/-0 imm with register from comparison.
1458+
if ((CmpValImm == 0 || CmpValImm == 0xffffffff80000000) &&
1459+
AMDGPU::isSISrcFPOperand(PredI->getDesc(), SubstOp->getOperandNo()))
1460+
return false;
1461+
14571462
LLVM_DEBUG(dbgs() << "Folded " << MI << " into ");
14581463
SrcOp->setReg(SubstOp->getReg());
14591464
LLVM_DEBUG(dbgs() << MI);

llvm/test/CodeGen/AMDGPU/fold-cndmask-select.ll

Lines changed: 178 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,29 +2,6 @@
22
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=GFX9
33
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck %s -check-prefix=GFX10
44

5-
define bfloat @bf16_oeq_v_i(bfloat %arg, bfloat %arg1) {
6-
; GFX9-LABEL: bf16_oeq_v_i:
7-
; GFX9: ; %bb.0: ; %bb
8-
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9-
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0
10-
; GFX9-NEXT: s_mov_b32 s4, 0x42420000
11-
; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, s4, v2
12-
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
13-
; GFX9-NEXT: s_setpc_b64 s[30:31]
14-
;
15-
; GFX10-LABEL: bf16_oeq_v_i:
16-
; GFX10: ; %bb.0: ; %bb
17-
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18-
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v0
19-
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x42420000, v2
20-
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
21-
; GFX10-NEXT: s_setpc_b64 s[30:31]
22-
bb:
23-
%fcmp = fcmp oeq bfloat %arg, 0xR4242
24-
%select = select i1 %fcmp, bfloat %arg, bfloat %arg1
25-
ret bfloat %select
26-
}
27-
285
define float @f32_oeq_v_i(float %arg, float %arg1) {
296
; GFX9-LABEL: f32_oeq_v_i:
307
; GFX9: ; %bb.0: ; %bb
@@ -440,3 +417,181 @@ bb:
440417
%select = select i1 %fcmp, half %arg, half %arg1
441418
ret half %select
442419
}
420+
421+
define float @f32_oeq_negz_i(float %arg, float %arg1) {
422+
; GFX9-LABEL: f32_oeq_negz_i:
423+
; GFX9: ; %bb.0: ; %bb
424+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425+
; GFX9-NEXT: s_brev_b32 s4, 1
426+
; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
427+
; GFX9-NEXT: v_cmp_neq_f32_e32 vcc, s4, v0
428+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
429+
; GFX9-NEXT: s_setpc_b64 s[30:31]
430+
;
431+
; GFX10-LABEL: f32_oeq_negz_i:
432+
; GFX10: ; %bb.0: ; %bb
433+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434+
; GFX10-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0x80000000, v0
435+
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x80000000, v1, vcc_lo
436+
; GFX10-NEXT: s_setpc_b64 s[30:31]
437+
bb:
438+
%fcmp = fcmp oeq float %arg, -0.000000e+00
439+
%select = select i1 %fcmp, float -0.000000e+00, float %arg1
440+
ret float %select
441+
}
442+
443+
define float @f32_oeq_negz_z(float %arg, float %arg1) {
444+
; GFX9-LABEL: f32_oeq_negz_z:
445+
; GFX9: ; %bb.0: ; %bb
446+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
447+
; GFX9-NEXT: s_brev_b32 s4, 1
448+
; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, s4, v0
449+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
450+
; GFX9-NEXT: s_setpc_b64 s[30:31]
451+
;
452+
; GFX10-LABEL: f32_oeq_negz_z:
453+
; GFX10: ; %bb.0: ; %bb
454+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
455+
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0x80000000, v0
456+
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
457+
; GFX10-NEXT: s_setpc_b64 s[30:31]
458+
bb:
459+
%fcmp = fcmp oeq float %arg, -0.000000e+00
460+
%select = select i1 %fcmp, float %arg, float %arg1
461+
ret float %select
462+
}
463+
464+
define half @f16_oeq_negz_i(half %arg, half %arg1) {
465+
; GFX9-LABEL: f16_oeq_negz_i:
466+
; GFX9: ; %bb.0: ; %bb
467+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
468+
; GFX9-NEXT: s_mov_b32 s4, 0x8000
469+
; GFX9-NEXT: v_cmp_neq_f16_e32 vcc, s4, v0
470+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
471+
; GFX9-NEXT: s_setpc_b64 s[30:31]
472+
;
473+
; GFX10-LABEL: f16_oeq_negz_i:
474+
; GFX10: ; %bb.0: ; %bb
475+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476+
; GFX10-NEXT: v_cmp_neq_f16_e32 vcc_lo, 0x8000, v0
477+
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x8000, v1, vcc_lo
478+
; GFX10-NEXT: s_setpc_b64 s[30:31]
479+
bb:
480+
%fcmp = fcmp oeq half %arg, -0.000000e+00
481+
%select = select i1 %fcmp, half -0.000000e+00, half %arg1
482+
ret half %select
483+
}
484+
485+
define half @f16_oeq_negz_z(half %arg, half %arg1) {
486+
; GFX9-LABEL: f16_oeq_negz_z:
487+
; GFX9: ; %bb.0: ; %bb
488+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
489+
; GFX9-NEXT: s_mov_b32 s4, 0x8000
490+
; GFX9-NEXT: v_cmp_eq_f16_e32 vcc, s4, v0
491+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
492+
; GFX9-NEXT: s_setpc_b64 s[30:31]
493+
;
494+
; GFX10-LABEL: f16_oeq_negz_z:
495+
; GFX10: ; %bb.0: ; %bb
496+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
497+
; GFX10-NEXT: v_cmp_eq_f16_e32 vcc_lo, 0x8000, v0
498+
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
499+
; GFX10-NEXT: s_setpc_b64 s[30:31]
500+
bb:
501+
%fcmp = fcmp oeq half %arg, -0.000000e+00
502+
%select = select i1 %fcmp, half %arg, half %arg1
503+
ret half %select
504+
}
505+
506+
define double @f64_oeq_z_i(double %arg, double %arg1) {
507+
; GFX9-LABEL: f64_oeq_z_i:
508+
; GFX9: ; %bb.0: ; %bb
509+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510+
; GFX9-NEXT: v_cmp_neq_f64_e32 vcc, 0, v[0:1]
511+
; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
512+
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
513+
; GFX9-NEXT: s_setpc_b64 s[30:31]
514+
;
515+
; GFX10-LABEL: f64_oeq_z_i:
516+
; GFX10: ; %bb.0: ; %bb
517+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
518+
; GFX10-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0, v[0:1]
519+
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
520+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc_lo
521+
; GFX10-NEXT: s_setpc_b64 s[30:31]
522+
bb:
523+
%fcmp = fcmp oeq double %arg, 0.000000e+00
524+
%select = select i1 %fcmp, double 0.000000e+00, double %arg1
525+
ret double %select
526+
}
527+
528+
define double @f64_oeq_z_z(double %arg, double %arg1) {
529+
; GFX9-LABEL: f64_oeq_z_z:
530+
; GFX9: ; %bb.0: ; %bb
531+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
532+
; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, 0, v[0:1]
533+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
534+
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
535+
; GFX9-NEXT: s_setpc_b64 s[30:31]
536+
;
537+
; GFX10-LABEL: f64_oeq_z_z:
538+
; GFX10: ; %bb.0: ; %bb
539+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
540+
; GFX10-NEXT: v_cmp_eq_f64_e32 vcc_lo, 0, v[0:1]
541+
; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
542+
; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
543+
; GFX10-NEXT: s_setpc_b64 s[30:31]
544+
bb:
545+
%fcmp = fcmp oeq double %arg, 0.000000e+00
546+
%select = select i1 %fcmp, double %arg, double %arg1
547+
ret double %select
548+
}
549+
550+
define double @f64_oeq_negz_i(double %arg, double %arg1) {
551+
; GFX9-LABEL: f64_oeq_negz_i:
552+
; GFX9: ; %bb.0: ; %bb
553+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554+
; GFX9-NEXT: s_mov_b32 s4, 0
555+
; GFX9-NEXT: s_brev_b32 s5, 1
556+
; GFX9-NEXT: v_cmp_neq_f64_e32 vcc, s[4:5], v[0:1]
557+
; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
558+
; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc
559+
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
560+
; GFX9-NEXT: s_setpc_b64 s[30:31]
561+
;
562+
; GFX10-LABEL: f64_oeq_negz_i:
563+
; GFX10: ; %bb.0: ; %bb
564+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
565+
; GFX10-NEXT: v_cmp_neq_f64_e32 vcc_lo, 0x80000000, v[0:1]
566+
; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc_lo
567+
; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x80000000, v3, vcc_lo
568+
; GFX10-NEXT: s_setpc_b64 s[30:31]
569+
bb:
570+
%fcmp = fcmp oeq double %arg, -0.000000e+00
571+
%select = select i1 %fcmp, double -0.000000e+00, double %arg1
572+
ret double %select
573+
}
574+
575+
define double @f64_oeq_negz_z(double %arg, double %arg1) {
576+
; GFX9-LABEL: f64_oeq_negz_z:
577+
; GFX9: ; %bb.0: ; %bb
578+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579+
; GFX9-NEXT: s_mov_b32 s4, 0
580+
; GFX9-NEXT: s_brev_b32 s5, 1
581+
; GFX9-NEXT: v_cmp_eq_f64_e32 vcc, s[4:5], v[0:1]
582+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
583+
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
584+
; GFX9-NEXT: s_setpc_b64 s[30:31]
585+
;
586+
; GFX10-LABEL: f64_oeq_negz_z:
587+
; GFX10: ; %bb.0: ; %bb
588+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
589+
; GFX10-NEXT: v_cmp_eq_f64_e32 vcc_lo, 0x80000000, v[0:1]
590+
; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
591+
; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
592+
; GFX10-NEXT: s_setpc_b64 s[30:31]
593+
bb:
594+
%fcmp = fcmp oeq double %arg, -0.000000e+00
595+
%select = select i1 %fcmp, double %arg, double %arg1
596+
ret double %select
597+
}

0 commit comments

Comments
 (0)