Skip to content

Commit e83a3b8

Browse files
authored
[AMDGPU] Introduce and use NotUseRealTrue16Insts. NFC. (#161373)
This removes ~2000 lines from both AMDGPUGenDAGISel.inc and AMDGPUGenGlobalISel.inc.
1 parent 1ff3e2e commit e83a3b8

File tree

8 files changed

+43
-67
lines changed

8 files changed

+43
-67
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2589,6 +2589,8 @@ def NotHasTrue16BitInsts : True16PredicateClass<"!Subtarget->hasTrue16BitInsts()
25892589
// only allow 32-bit registers in operands and use low halves thereof.
25902590
def UseRealTrue16Insts : True16PredicateClass<"Subtarget->useRealTrue16Insts()">,
25912591
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>;
2592+
def NotUseRealTrue16Insts : True16PredicateClass<"!Subtarget->useRealTrue16Insts()">,
2593+
AssemblerPredicate<(not (all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts))>;
25922594
def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() && "
25932595
"!Subtarget->useRealTrue16Insts()">,
25942596
AssemblerPredicate<(all_of FeatureTrue16BitInsts)>;

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -969,10 +969,9 @@ multiclass DSReadPat_t16<DS_Pseudo inst, ValueType vt, string frag> {
969969
}
970970

971971
let OtherPredicates = [NotLDSRequiresM0Init] in {
972-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
973-
let True16Predicate = p in {
974-
def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
975-
}
972+
let True16Predicate = NotUseRealTrue16Insts in {
973+
def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
974+
}
976975
let True16Predicate = UseRealTrue16Insts in {
977976
def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>;
978977
}
@@ -1050,10 +1049,9 @@ multiclass DSWritePat_t16 <DS_Pseudo inst, ValueType vt, string frag> {
10501049
}
10511050

10521051
let OtherPredicates = [NotLDSRequiresM0Init] in {
1053-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
1054-
let True16Predicate = p in {
1055-
def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
1056-
}
1052+
let True16Predicate = NotUseRealTrue16Insts in {
1053+
def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
1054+
}
10571055
let True16Predicate = UseRealTrue16Insts in {
10581056
def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>;
10591057
}

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1982,8 +1982,7 @@ defm : FlatLoadPats <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
19821982
defm : FlatLoadPats <FLAT_LOAD_SSHORT, atomic_load_sext_16_flat, i32>;
19831983
defm : FlatLoadPats <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
19841984

1985-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
1986-
let True16Predicate = p in {
1985+
let True16Predicate = NotUseRealTrue16Insts in {
19871986
defm : FlatLoadPats <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
19881987
defm : FlatLoadPats <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
19891988
defm : FlatLoadPats <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
@@ -2127,8 +2126,7 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
21272126
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
21282127
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
21292128

2130-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
2131-
let True16Predicate = p in {
2129+
let True16Predicate = NotUseRealTrue16Insts in {
21322130
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
21332131
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
21342132
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
@@ -2187,8 +2185,7 @@ defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
21872185
defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
21882186
defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>;
21892187

2190-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
2191-
let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = p in {
2188+
let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = NotUseRealTrue16Insts in {
21922189
defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
21932190
defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>;
21942191
defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>;
@@ -2356,8 +2353,7 @@ defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>;
23562353
defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>;
23572354
defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>;
23582355

2359-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
2360-
let True16Predicate = p in {
2356+
let True16Predicate = NotUseRealTrue16Insts in {
23612357
defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>;
23622358
defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>;
23632359
defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 25 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,8 +1466,7 @@ class VOPSelectPat_t16 <ValueType vt> : GCNPat <
14661466

14671467
def : VOPSelectModsPat <i32>;
14681468
def : VOPSelectModsPat <f32>;
1469-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
1470-
let True16Predicate = p in {
1469+
let True16Predicate = NotUseRealTrue16Insts in {
14711470
def : VOPSelectPat <f16>;
14721471
def : VOPSelectPat <i16>;
14731472
} // End True16Predicate = p
@@ -2137,8 +2136,7 @@ def : GCNPat <
21372136
>;
21382137

21392138
foreach fp16vt = [f16, bf16] in {
2140-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
2141-
let SubtargetPredicate = p in {
2139+
let SubtargetPredicate = NotUseRealTrue16Insts in {
21422140
def : GCNPat <
21432141
(fabs (fp16vt VGPR_32:$src)),
21442142
(V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
@@ -2230,8 +2228,7 @@ def : GCNPat <
22302228
}
22312229

22322230
foreach fp16vt = [f16, bf16] in {
2233-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
2234-
let True16Predicate = p in {
2231+
let True16Predicate = NotUseRealTrue16Insts in {
22352232
def : GCNPat <
22362233
(fcopysign fp16vt:$src0, fp16vt:$src1),
22372234
(V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1)
@@ -2354,23 +2351,21 @@ def : GCNPat <
23542351
(S_MOV_B32 $ga)
23552352
>;
23562353

2357-
foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
2358-
let True16Predicate = pred in {
2359-
def : GCNPat <
2360-
(VGPRImm<(i16 imm)>:$imm),
2361-
(V_MOV_B32_e32 imm:$imm)
2362-
>;
2354+
let True16Predicate = NotUseRealTrue16Insts in {
2355+
def : GCNPat <
2356+
(VGPRImm<(i16 imm)>:$imm),
2357+
(V_MOV_B32_e32 imm:$imm)
2358+
>;
23632359

2364-
// FIXME: Workaround for ordering issue with peephole optimizer where
2365-
// a register class copy interferes with immediate folding. Should
2366-
// use s_mov_b32, which can be shrunk to s_movk_i32
2360+
// FIXME: Workaround for ordering issue with peephole optimizer where
2361+
// a register class copy interferes with immediate folding. Should
2362+
// use s_mov_b32, which can be shrunk to s_movk_i32
23672363

2368-
foreach vt = [f16, bf16] in {
2369-
def : GCNPat <
2370-
(VGPRImm<(vt fpimm)>:$imm),
2371-
(V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm)))
2372-
>;
2373-
}
2364+
foreach vt = [f16, bf16] in {
2365+
def : GCNPat <
2366+
(VGPRImm<(vt fpimm)>:$imm),
2367+
(V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm)))
2368+
>;
23742369
}
23752370
}
23762371

@@ -2859,8 +2854,7 @@ def : GCNPat<
28592854
(i32 (DivergentSextInreg<i1> i32:$src)),
28602855
(V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>;
28612856

2862-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
2863-
let True16Predicate = p in {
2857+
let True16Predicate = NotUseRealTrue16Insts in {
28642858
def : GCNPat <
28652859
(i16 (DivergentSextInreg<i1> i16:$src)),
28662860
(V_BFE_I32_e64 $src, (i32 0), (i32 1))
@@ -3205,8 +3199,7 @@ def : GCNPat<
32053199
}
32063200
} // AddedComplexity = 1
32073201

3208-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3209-
let True16Predicate = p in {
3202+
let True16Predicate = NotUseRealTrue16Insts in {
32103203
def : GCNPat<
32113204
(i32 (DivergentUnaryFrag<zext> i16:$src)),
32123205
(V_AND_B32_e64 (S_MOV_B32 (i32 0xffff)), $src)
@@ -3416,8 +3409,7 @@ def : GCNPat <
34163409

34173410
// Magic number: 1 | (0 << 8) | (12 << 16) | (12 << 24)
34183411
// The 12s emit 0s.
3419-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3420-
let True16Predicate = p in {
3412+
let True16Predicate = NotUseRealTrue16Insts in {
34213413
def : GCNPat <
34223414
(i16 (bswap i16:$a)),
34233415
(V_PERM_B32_e64 (i32 0), VSrc_b32:$a, (S_MOV_B32 (i32 0x0c0c0001)))
@@ -3670,8 +3662,7 @@ def : GCNPat <
36703662
(S_LSHL_B32 SReg_32:$src1, (i16 16))
36713663
>;
36723664

3673-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3674-
let True16Predicate = p in {
3665+
let True16Predicate = NotUseRealTrue16Insts in {
36753666
def : GCNPat <
36763667
(v2i16 (DivergentBinFrag<build_vector> (i16 0), (i16 VGPR_32:$src1))),
36773668
(v2i16 (V_LSHLREV_B32_e64 (i16 16), VGPR_32:$src1))
@@ -3707,8 +3698,7 @@ def : GCNPat <
37073698
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
37083699
>;
37093700

3710-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3711-
let True16Predicate = p in {
3701+
let True16Predicate = NotUseRealTrue16Insts in {
37123702
def : GCNPat <
37133703
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$src0), (Ty undef))),
37143704
(COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
@@ -3735,8 +3725,7 @@ def : GCNPat <
37353725
>;
37363726

37373727
let SubtargetPredicate = HasVOP3PInsts in {
3738-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3739-
let True16Predicate = p in
3728+
let True16Predicate = NotUseRealTrue16Insts in
37403729
def : GCNPat <
37413730
(v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src0), (i16 VGPR_32:$src1))),
37423731
(v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0))))
@@ -3766,8 +3755,7 @@ def : GCNPat <
37663755
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
37673756
>;
37683757

3769-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3770-
let True16Predicate = p in {
3758+
let True16Predicate = NotUseRealTrue16Insts in {
37713759
// Take the lower 16 bits from each VGPR_32 and concat them
37723760
def : GCNPat <
37733761
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$a), (Ty VGPR_32:$b))),
@@ -3838,8 +3826,7 @@ def : GCNPat <
38383826
>;
38393827

38403828
// Take the upper 16 bits from each VGPR_32 and concat them
3841-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3842-
let True16Predicate = p in
3829+
let True16Predicate = NotUseRealTrue16Insts in
38433830
def : GCNPat <
38443831
(vecTy (DivergentBinFrag<build_vector>
38453832
(Ty !if(!eq(Ty, i16),
@@ -3881,8 +3868,7 @@ def : GCNPat <
38813868
(v2i16 (S_PACK_HL_B32_B16 SReg_32:$src0, SReg_32:$src1))
38823869
>;
38833870

3884-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3885-
let True16Predicate = p in {
3871+
let True16Predicate = NotUseRealTrue16Insts in {
38863872
def : GCNPat <
38873873
(v2f16 (scalar_to_vector f16:$src0)),
38883874
(COPY $src0)

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1561,8 +1561,7 @@ def : GCNPat <
15611561

15621562
} // End OtherPredicates = [isGFX8Plus]
15631563

1564-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
1565-
let OtherPredicates = [isGFX8Plus, p] in {
1564+
let OtherPredicates = [isGFX8Plus, NotUseRealTrue16Insts] in {
15661565
def : GCNPat<
15671566
(i32 (anyext i16:$src)),
15681567
(COPY $src)

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,8 +1378,7 @@ class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
13781378
$src)
13791379
>;
13801380

1381-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
1382-
let True16Predicate = p in {
1381+
let True16Predicate = NotUseRealTrue16Insts in {
13831382
def : GCNPat <
13841383
(and i16:$src0, i16:$src1),
13851384
(V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -387,8 +387,7 @@ let SchedRW = [Write64Bit] in {
387387
} // End SchedRW = [Write64Bit]
388388
} // End isReMaterializable = 1
389389

390-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
391-
let True16Predicate = p in
390+
let True16Predicate = NotUseRealTrue16Insts in
392391
def : GCNPat<
393392
(i32 (DivergentUnaryFrag<sext> i16:$src)),
394393
(i32 (V_BFE_I32_e64 i16:$src, (i32 0), (i32 0x10)))
@@ -501,8 +500,7 @@ def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32
501500

502501
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
503502

504-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
505-
let True16Predicate = p in
503+
let True16Predicate = NotUseRealTrue16Insts in
506504
def : GCNPat<
507505
(i64 (DivergentUnaryFrag<sext> i16:$src)),
508506
(REG_SEQUENCE VReg_64,

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -402,8 +402,7 @@ defm V_FMA_MIX_F16_t16 : VOP3_VOP3PInst_t16<"v_fma_mix_f16_t16", VOP3P_Mix_Profi
402402

403403
defm : MadFmaMixFP32Pats<fma, V_FMA_MIX_F32>;
404404

405-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
406-
let True16Predicate = p in
405+
let True16Predicate = NotUseRealTrue16Insts in
407406
defm : MadFmaMixFP16Pats<fma, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
408407
let True16Predicate = UseRealTrue16Insts in
409408
defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_F16_t16>;
@@ -428,8 +427,7 @@ defm V_FMA_MIX_BF16_t16 : VOP3_VOP3PInst_t16<"v_fma_mix_bf16_t16", VOP3P_Mix_Pro
428427
} // End isCommutable = 1
429428

430429
defm : MadFmaMixFP32Pats<fma, V_FMA_MIX_F32_BF16, bf16>;
431-
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
432-
let True16Predicate = p in
430+
let True16Predicate = NotUseRealTrue16Insts in
433431
defm : MadFmaMixFP16Pats<fma, V_FMA_MIXLO_BF16, V_FMA_MIXHI_BF16, bf16, v2bf16>;
434432
let True16Predicate = UseRealTrue16Insts in
435433
defm : MadFmaMixFP16Pats_t16<fma, V_FMA_MIX_BF16_t16>;

0 commit comments

Comments
 (0)