Skip to content

Commit cbab59c

Browse files
committed
[AMDGPU] Ensure divergence for trunc -> v_alignbit pattern
Change-Id: Ie919caa7e7707ab3102f2e352eae83edbb27dbf5
1 parent 47c7afd commit cbab59c

40 files changed

+34123
-30735
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2660,11 +2660,11 @@ let True16Predicate = NotHasTrue16BitInsts in {
26602660
let SubtargetPredicate = isNotGFX9Plus in {
26612661
def : ROTRPattern <V_ALIGNBIT_B32_e64>;
26622662

2663-
def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
2663+
def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (and i32:$src1, (i32 31))))),
26642664
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
26652665
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
26662666

2667-
def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
2667+
def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
26682668
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
26692669
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
26702670
} // isNotGFX9Plus
@@ -2678,8 +2678,8 @@ def : GCNPat <
26782678
$src1, /* clamp */ 0, /* op_sel */ 0)
26792679
>;
26802680

2681-
foreach pat = [(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
2682-
(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1))))] in
2681+
foreach pat = [(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (and i32:$src1, (i32 31))))),
2682+
(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:$src1))))] in
26832683
def : GCNPat<pat,
26842684
(V_ALIGNBIT_B32_opsel_e64 0, /* src0_modifiers */
26852685
(i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
@@ -2708,7 +2708,7 @@ def : GCNPat <
27082708
/* clamp */ 0, /* op_sel */ 0)
27092709
>;
27102710

2711-
def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
2711+
def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
27122712
(V_ALIGNBIT_B32_t16_e64 0, /* src0_modifiers */
27132713
(i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
27142714
0, /* src1_modifiers */
@@ -2734,7 +2734,7 @@ def : GCNPat <
27342734
$src1, /* clamp */ 0, /* op_sel */ 0)
27352735
>;
27362736

2737-
def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
2737+
def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (and i32:$src1, (i32 31))))),
27382738
(V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
27392739
(i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
27402740
0, /* src1_modifiers */
@@ -2743,7 +2743,7 @@ def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
27432743
$src1, /* clamp */ 0, /* op_sel */ 0)
27442744
>;
27452745

2746-
def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
2746+
def : GCNPat<(i32 (DivergentUnaryFrag<trunc> (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
27472747
(V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
27482748
(i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
27492749
0, /* src1_modifiers */

llvm/test/CodeGen/AMDGPU/alignbit-pat.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@ define amdgpu_kernel void @alignbit_shr_pat(ptr addrspace(1) nocapture readonly
1414
; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
1515
; GCN-NEXT: s_mov_b32 s4, s2
1616
; GCN-NEXT: s_mov_b32 s5, s3
17+
; GCN-NEXT: s_and_b32 s0, s8, 31
1718
; GCN-NEXT: s_waitcnt vmcnt(0)
18-
; GCN-NEXT: v_alignbit_b32 v0, v1, v0, s8
19+
; GCN-NEXT: v_lshr_b64 v[0:1], v[0:1], s0
1920
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
2021
; GCN-NEXT: s_endpgm
2122
bb:
@@ -128,7 +129,7 @@ define amdgpu_kernel void @alignbit_shr_pat_const30(ptr addrspace(1) nocapture r
128129
; GCN-NEXT: s_mov_b32 s4, s2
129130
; GCN-NEXT: s_mov_b32 s5, s3
130131
; GCN-NEXT: s_waitcnt vmcnt(0)
131-
; GCN-NEXT: v_alignbit_b32 v0, v1, v0, 30
132+
; GCN-NEXT: v_lshr_b64 v[0:1], v[0:1], 30
132133
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
133134
; GCN-NEXT: s_endpgm
134135
bb:

0 commit comments

Comments
 (0)