Skip to content

Commit 9d7007d

Browse files
committed
fix bfe for true16 mode
1 parent 362b9d7 commit 9d7007d

File tree

2 files changed

+43
-0
lines changed

2 files changed

+43
-0
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2623,6 +2623,8 @@ def : GCNPat<
26232623
(i32 (DivergentSextInreg<i1> i32:$src)),
26242624
(V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>;
26252625

2626+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
2627+
let True16Predicate = p in {
26262628
def : GCNPat <
26272629
(i16 (DivergentSextInreg<i1> i16:$src)),
26282630
(V_BFE_I32_e64 $src, (i32 0), (i32 1))
@@ -2632,6 +2634,23 @@ def : GCNPat <
26322634
(i16 (DivergentSextInreg<i8> i16:$src)),
26332635
(V_BFE_I32_e64 $src, (i32 0), (i32 8))
26342636
>;
2637+
}
2638+
2639+
let True16Predicate = UseRealTrue16Insts in {
2640+
def : GCNPat <
2641+
(i16 (DivergentSextInreg<i1> i16:$src)),
2642+
(V_BFE_I32_e64
2643+
(REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (i16 (IMPLICIT_DEF)), hi16),
2644+
(i32 0), (i32 1))
2645+
>;
2646+
2647+
def : GCNPat <
2648+
(i16 (DivergentSextInreg<i8> i16:$src)),
2649+
(V_BFE_I32_e64
2650+
(REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (i16 (IMPLICIT_DEF)), hi16),
2651+
(i32 0), (i32 8))
2652+
>;
2653+
}
26352654

26362655
def : GCNPat<
26372656
(i32 (DivergentSextInreg<i8> i32:$src)),

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,11 +319,21 @@ let SchedRW = [Write64Bit] in {
319319
} // End SchedRW = [Write64Bit]
320320
} // End isReMaterializable = 1
321321

322+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
323+
let True16Predicate = p in
322324
def : GCNPat<
323325
(i32 (DivergentUnaryFrag<sext> i16:$src)),
324326
(i32 (V_BFE_I32_e64 i16:$src, (i32 0), (i32 0x10)))
325327
>;
326328

329+
let True16Predicate = UseRealTrue16Insts in
330+
def : GCNPat<
331+
(i32 (DivergentUnaryFrag<sext> i16:$src)),
332+
(i32 (V_BFE_I32_e64
333+
(REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (i16 (IMPLICIT_DEF)), hi16),
334+
(i32 0), (i32 0x10)))
335+
>;
336+
327337
let isReMaterializable = 1 in {
328338
let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
329339
defm V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
@@ -423,6 +433,8 @@ def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32
423433

424434
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
425435

436+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
437+
let True16Predicate = p in
426438
def : GCNPat<
427439
(i64 (DivergentUnaryFrag<sext> i16:$src)),
428440
(REG_SEQUENCE VReg_64,
@@ -432,6 +444,18 @@ def : GCNPat<
432444
), VGPR_32)), sub1)
433445
>;
434446

447+
let True16Predicate = UseRealTrue16Insts in
448+
def : GCNPat<
449+
(i64 (DivergentUnaryFrag<sext> i16:$src)),
450+
(REG_SEQUENCE VReg_64,
451+
(i32 (V_BFE_I32_e64
452+
(REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (i16 (IMPLICIT_DEF)), hi16),
453+
(S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10)))), sub0,
454+
(i32 (COPY_TO_REGCLASS
455+
(V_ASHRREV_I32_e32 (S_MOV_B32 (i32 0x1f)), (i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10))))
456+
), VGPR_32)), sub1)
457+
>;
458+
435459
let SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC], OtherPredicates = [isNotGFX90APlus] in {
436460
def V_INTERP_P1_F32_e64 : VOP3Interp <"v_interp_p1_f32", VOP3_INTERP>;
437461
def V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>;

0 commit comments

Comments
 (0)