Skip to content

Commit 670d0a0

Browse files
committed
[AMDGPU][True16][CodeGen] use vgpr16 for zext patterns (llvm#153894)
Update true16 mode with zext patterns using vgpr16 for 16bit data types. This stop isel from inserting invalid "vgpr32 = copy vgpr16"
1 parent 8429f7f commit 670d0a0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+12689
-14019
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3056,6 +3056,8 @@ def : GCNPat<
30563056
}
30573057
} // AddedComplexity = 1
30583058

3059+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3060+
let True16Predicate = p in {
30593061
def : GCNPat<
30603062
(i32 (DivergentUnaryFrag<zext> i16:$src)),
30613063
(V_AND_B32_e64 (S_MOV_B32 (i32 0xffff)), $src)
@@ -3070,7 +3072,28 @@ def : GCNPat<
30703072

30713073
def : GCNPat<
30723074
(i32 (zext (i16 (bitconvert fp16_zeros_high_16bits:$src)))),
3073-
(COPY VSrc_b16:$src)>;
3075+
(COPY VSrc_b16:$src)
3076+
>;
3077+
}
3078+
3079+
let True16Predicate = UseRealTrue16Insts in {
3080+
def : GCNPat<
3081+
(i32 (DivergentUnaryFrag<zext> i16:$src)),
3082+
(REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (V_MOV_B16_t16_e64 0, (i16 0), 0), hi16)
3083+
>;
3084+
3085+
def : GCNPat<
3086+
(i64 (DivergentUnaryFrag<zext> i16:$src)),
3087+
(REG_SEQUENCE VReg_64,
3088+
(INSERT_SUBREG (i32 (V_MOV_B32_e32 (i32 0))), VGPR_16:$src, lo16), sub0,
3089+
(S_MOV_B32 (i32 0)), sub1)
3090+
>;
3091+
3092+
def : GCNPat<
3093+
(i32 (zext (i16 (bitconvert fp16_zeros_high_16bits:$src)))),
3094+
(REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (V_MOV_B16_t16_e64 0, (i16 0), 0), hi16)
3095+
>;
3096+
}
30743097

30753098
def : GCNPat <
30763099
(i32 (trunc i64:$a)),

llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ define zeroext i16 @v_mul_i16_zeroext(i16 zeroext %num, i16 zeroext %den) {
164164
; GFX11-TRUE16: ; %bb.0:
165165
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166166
; GFX11-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
167-
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
167+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0
168168
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
169169
;
170170
; GFX11-FAKE16-LABEL: v_mul_i16_zeroext:

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Lines changed: 5636 additions & 6265 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll

Lines changed: 546 additions & 602 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll

Lines changed: 620 additions & 700 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll

Lines changed: 1352 additions & 1534 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll

Lines changed: 108 additions & 132 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll

Lines changed: 2537 additions & 2877 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll

Lines changed: 287 additions & 350 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll

Lines changed: 283 additions & 311 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)