Skip to content

Commit 40ab0e6

Browse files
committed
remove v_maximum/minimum_f16
1 parent da5222c commit 40ab0e6

File tree

8 files changed

+213
-439
lines changed

8 files changed

+213
-439
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5483,12 +5483,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
54835483
case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_fake16_e64;
54845484
case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_fake16_e64;
54855485
case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_fake16_e64;
5486-
case AMDGPU::S_MINIMUM_F16:
5487-
return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
5488-
: AMDGPU::V_MINIMUM_F16_fake16_e64;
5489-
case AMDGPU::S_MAXIMUM_F16:
5490-
return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
5491-
: AMDGPU::V_MAXIMUM_F16_fake16_e64;
5486+
case AMDGPU::S_MINIMUM_F16: return AMDGPU::V_MINIMUM_F16_e64;
5487+
case AMDGPU::S_MAXIMUM_F16: return AMDGPU::V_MAXIMUM_F16_e64;
54925488
case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
54935489
case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
54945490
case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
@@ -7452,7 +7448,9 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
74527448
return;
74537449
}
74547450
case AMDGPU::S_MINIMUM_F32:
7455-
case AMDGPU::S_MAXIMUM_F32: {
7451+
case AMDGPU::S_MAXIMUM_F32:
7452+
case AMDGPU::S_MINIMUM_F16:
7453+
case AMDGPU::S_MAXIMUM_F16: {
74567454
const DebugLoc &DL = Inst.getDebugLoc();
74577455
Register NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
74587456
MachineInstr *NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
@@ -7469,26 +7467,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
74697467
Inst.eraseFromParent();
74707468
return;
74717469
}
7472-
case AMDGPU::S_MINIMUM_F16:
7473-
case AMDGPU::S_MAXIMUM_F16: {
7474-
const DebugLoc &DL = Inst.getDebugLoc();
7475-
Register NewDst = MRI.createVirtualRegister(ST.useRealTrue16Insts()
7476-
? &AMDGPU::VGPR_16RegClass
7477-
: &AMDGPU::VGPR_32RegClass);
7478-
MachineInstr *NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
7479-
.addImm(0) // src0_modifiers
7480-
.add(Inst.getOperand(1))
7481-
.addImm(0) // src1_modifiers
7482-
.add(Inst.getOperand(2))
7483-
.addImm(0) // clamp
7484-
.addImm(0) // omod
7485-
.addImm(0); // opsel0
7486-
MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst);
7487-
legalizeOperands(*NewInstr, MDT);
7488-
addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
7489-
Inst.eraseFromParent();
7490-
return;
7491-
}
74927470
}
74937471

74947472
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,8 @@ defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs
170170
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
171171
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
172172
defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fmaximum>>;
173-
defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, DivergentBinFrag<fminimum>>;
174-
defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, DivergentBinFrag<fmaximum>>;
173+
defm V_MINIMUM_F16 : VOP3Inst <"v_minimum_f16", VOP3_Profile<VOP_F16_F16_F16>, DivergentBinFrag<fminimum>>;
174+
defm V_MAXIMUM_F16 : VOP3Inst <"v_maximum_f16", VOP3_Profile<VOP_F16_F16_F16>, DivergentBinFrag<fmaximum>>;
175175

176176
let SchedRW = [WriteDoubleAdd] in {
177177
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
@@ -1612,8 +1612,8 @@ defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>;
16121612
defm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12<0x342>;
16131613
defm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>;
16141614
defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>;
1615-
defm V_MINIMUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x367, "v_minimum_f16">;
1616-
defm V_MAXIMUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x368, "v_maximum_f16">;
1615+
defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x367>;
1616+
defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x368>;
16171617

16181618
defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
16191619
defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;

llvm/test/MC/AMDGPU/gfx12_asm_vop3.s

Lines changed: 64 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -6289,101 +6289,101 @@ v_maximum_f32 v5, -src_scc, |vcc_lo|
62896289
v_maximum_f32 v255, -|0xaf123456|, -|vcc_hi|
62906290
// GFX12: v_maximum_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x66,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
62916291

6292-
v_minimum_f16 v5.l, v1.l, v2.l
6293-
// GFX12: v_minimum_f16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x67,0xd7,0x01,0x05,0x02,0x00]
6292+
v_minimum_f16 v5, v1, v2
6293+
// GFX12: v_minimum_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x67,0xd7,0x01,0x05,0x02,0x00]
62946294

6295-
v_minimum_f16 v5.l, v255.l, v255.l
6296-
// GFX12: v_minimum_f16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x67,0xd7,0xff,0xff,0x03,0x00]
6295+
v_minimum_f16 v5, v255, v255
6296+
// GFX12: v_minimum_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x67,0xd7,0xff,0xff,0x03,0x00]
62976297

6298-
v_minimum_f16 v5.l, s1, s2
6299-
// GFX12: v_minimum_f16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x67,0xd7,0x01,0x04,0x00,0x00]
6298+
v_minimum_f16 v5, s1, s2
6299+
// GFX12: v_minimum_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x67,0xd7,0x01,0x04,0x00,0x00]
63006300

6301-
v_minimum_f16 v5.l, s105, s105
6302-
// GFX12: v_minimum_f16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x67,0xd7,0x69,0xd2,0x00,0x00]
6301+
v_minimum_f16 v5, s105, s105
6302+
// GFX12: v_minimum_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x67,0xd7,0x69,0xd2,0x00,0x00]
63036303

6304-
v_minimum_f16 v5.l, vcc_lo, ttmp15
6305-
// GFX12: v_minimum_f16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x67,0xd7,0x6a,0xf6,0x00,0x00]
6304+
v_minimum_f16 v5, vcc_lo, ttmp15
6305+
// GFX12: v_minimum_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x67,0xd7,0x6a,0xf6,0x00,0x00]
63066306

6307-
v_minimum_f16 v5.l, vcc_hi, 0xaf12
6308-
// GFX12: v_minimum_f16 v5.l, vcc_hi, 0xaf12 ; encoding: [0x05,0x00,0x67,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00]
6307+
v_minimum_f16 v5, vcc_hi, 0xaf12
6308+
// GFX12: v_minimum_f16 v5, vcc_hi, 0xaf12 ; encoding: [0x05,0x00,0x67,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00]
63096309

6310-
v_minimum_f16 v5.l, ttmp15, src_scc
6311-
// GFX12: v_minimum_f16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x67,0xd7,0x7b,0xfa,0x01,0x00]
6310+
v_minimum_f16 v5, ttmp15, src_scc
6311+
// GFX12: v_minimum_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x67,0xd7,0x7b,0xfa,0x01,0x00]
63126312

6313-
v_minimum_f16 v5.l, m0, 0.5
6314-
// GFX12: v_minimum_f16 v5.l, m0, 0.5 ; encoding: [0x05,0x00,0x67,0xd7,0x7d,0xe0,0x01,0x00]
6313+
v_minimum_f16 v5, m0, 0.5
6314+
// GFX12: v_minimum_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x67,0xd7,0x7d,0xe0,0x01,0x00]
63156315

6316-
v_minimum_f16 v5.l, exec_lo, -1
6317-
// GFX12: v_minimum_f16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x67,0xd7,0x7e,0x82,0x01,0x00]
6316+
v_minimum_f16 v5, exec_lo, -1
6317+
// GFX12: v_minimum_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x67,0xd7,0x7e,0x82,0x01,0x00]
63186318

6319-
v_minimum_f16 v5.l, |exec_hi|, null
6320-
// GFX12: v_minimum_f16 v5.l, |exec_hi|, null ; encoding: [0x05,0x01,0x67,0xd7,0x7f,0xf8,0x00,0x00]
6319+
v_minimum_f16 v5, |exec_hi|, null
6320+
// GFX12: v_minimum_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x67,0xd7,0x7f,0xf8,0x00,0x00]
63216321

6322-
v_minimum_f16 v5.l, null, exec_lo
6323-
// GFX12: v_minimum_f16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x67,0xd7,0x7c,0xfc,0x00,0x00]
6322+
v_minimum_f16 v5, null, exec_lo
6323+
// GFX12: v_minimum_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x67,0xd7,0x7c,0xfc,0x00,0x00]
63246324

6325-
v_minimum_f16 v5.l, -1, exec_hi
6326-
// GFX12: v_minimum_f16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x67,0xd7,0xc1,0xfe,0x00,0x00]
6325+
v_minimum_f16 v5, -1, exec_hi
6326+
// GFX12: v_minimum_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x67,0xd7,0xc1,0xfe,0x00,0x00]
63276327

6328-
v_minimum_f16 v5.l, 0.5, -m0
6329-
// GFX12: v_minimum_f16 v5.l, 0.5, -m0 ; encoding: [0x05,0x00,0x67,0xd7,0xf0,0xfa,0x00,0x40]
6328+
v_minimum_f16 v5, 0.5, -m0
6329+
// GFX12: v_minimum_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x67,0xd7,0xf0,0xfa,0x00,0x40]
63306330

6331-
v_minimum_f16 v5.l, -src_scc, |vcc_lo|
6332-
// GFX12: v_minimum_f16 v5.l, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x67,0xd7,0xfd,0xd4,0x00,0x20]
6331+
v_minimum_f16 v5, -src_scc, |vcc_lo|
6332+
// GFX12: v_minimum_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x67,0xd7,0xfd,0xd4,0x00,0x20]
63336333

6334-
v_minimum_f16 v255.l, -|0xaf12|, -|vcc_hi|
6335-
// GFX12: v_minimum_f16 v255.l, -|0xaf12|, -|vcc_hi| ; encoding: [0xff,0x03,0x67,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00]
6334+
v_minimum_f16 v255, -|0xaf12|, -|vcc_hi|
6335+
// GFX12: v_minimum_f16 v255, -|0xaf12|, -|vcc_hi| ; encoding: [0xff,0x03,0x67,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00]
63366336

6337-
v_minimum_f16 v205.l, v201.l, v200.l
6338-
// GFX12: v_minimum_f16 v205.l, v201.l, v200.l ; encoding: [0xcd,0x00,0x67,0xd7,0xc9,0x91,0x03,0x00]
6337+
v_minimum_f16 v205, v201, v200
6338+
// GFX12: v_minimum_f16 v205, v201, v200 ; encoding: [0xcd,0x00,0x67,0xd7,0xc9,0x91,0x03,0x00]
63396339

6340-
v_maximum_f16 v5.l, v1.l, v2.l
6341-
// GFX12: v_maximum_f16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x00]
6340+
v_maximum_f16 v5, v1, v2
6341+
// GFX12: v_maximum_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x00]
63426342

6343-
v_maximum_f16 v5.l, v255.l, v255.l
6344-
// GFX12: v_maximum_f16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x68,0xd7,0xff,0xff,0x03,0x00]
6343+
v_maximum_f16 v5, v255, v255
6344+
// GFX12: v_maximum_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x68,0xd7,0xff,0xff,0x03,0x00]
63456345

6346-
v_maximum_f16 v5.l, s1, s2
6347-
// GFX12: v_maximum_f16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x04,0x00,0x00]
6346+
v_maximum_f16 v5, s1, s2
6347+
// GFX12: v_maximum_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x04,0x00,0x00]
63486348

6349-
v_maximum_f16 v5.l, s105, s105
6350-
// GFX12: v_maximum_f16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x68,0xd7,0x69,0xd2,0x00,0x00]
6349+
v_maximum_f16 v5, s105, s105
6350+
// GFX12: v_maximum_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x68,0xd7,0x69,0xd2,0x00,0x00]
63516351

6352-
v_maximum_f16 v5.l, vcc_lo, ttmp15
6353-
// GFX12: v_maximum_f16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x68,0xd7,0x6a,0xf6,0x00,0x00]
6352+
v_maximum_f16 v5, vcc_lo, ttmp15
6353+
// GFX12: v_maximum_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x68,0xd7,0x6a,0xf6,0x00,0x00]
63546354

6355-
v_maximum_f16 v5.l, vcc_hi, 0xaf12
6356-
// GFX12: v_maximum_f16 v5.l, vcc_hi, 0xaf12 ; encoding: [0x05,0x00,0x68,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00]
6355+
v_maximum_f16 v5, vcc_hi, 0xaf12
6356+
// GFX12: v_maximum_f16 v5, vcc_hi, 0xaf12 ; encoding: [0x05,0x00,0x68,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00]
63576357

6358-
v_maximum_f16 v5.l, ttmp15, src_scc
6359-
// GFX12: v_maximum_f16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x68,0xd7,0x7b,0xfa,0x01,0x00]
6358+
v_maximum_f16 v5, ttmp15, src_scc
6359+
// GFX12: v_maximum_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x68,0xd7,0x7b,0xfa,0x01,0x00]
63606360

6361-
v_maximum_f16 v5.l, m0, 0.5
6362-
// GFX12: v_maximum_f16 v5.l, m0, 0.5 ; encoding: [0x05,0x00,0x68,0xd7,0x7d,0xe0,0x01,0x00]
6361+
v_maximum_f16 v5, m0, 0.5
6362+
// GFX12: v_maximum_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x68,0xd7,0x7d,0xe0,0x01,0x00]
63636363

6364-
v_maximum_f16 v5.l, exec_lo, -1
6365-
// GFX12: v_maximum_f16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x68,0xd7,0x7e,0x82,0x01,0x00]
6364+
v_maximum_f16 v5, exec_lo, -1
6365+
// GFX12: v_maximum_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x68,0xd7,0x7e,0x82,0x01,0x00]
63666366

6367-
v_maximum_f16 v5.l, |exec_hi|, null
6368-
// GFX12: v_maximum_f16 v5.l, |exec_hi|, null ; encoding: [0x05,0x01,0x68,0xd7,0x7f,0xf8,0x00,0x00]
6367+
v_maximum_f16 v5, |exec_hi|, null
6368+
// GFX12: v_maximum_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x68,0xd7,0x7f,0xf8,0x00,0x00]
63696369

6370-
v_maximum_f16 v5.l, null, exec_lo
6371-
// GFX12: v_maximum_f16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x68,0xd7,0x7c,0xfc,0x00,0x00]
6370+
v_maximum_f16 v5, null, exec_lo
6371+
// GFX12: v_maximum_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x68,0xd7,0x7c,0xfc,0x00,0x00]
63726372

6373-
v_maximum_f16 v5.l, -1, exec_hi
6374-
// GFX12: v_maximum_f16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x68,0xd7,0xc1,0xfe,0x00,0x00]
6373+
v_maximum_f16 v5, -1, exec_hi
6374+
// GFX12: v_maximum_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x68,0xd7,0xc1,0xfe,0x00,0x00]
63756375

6376-
v_maximum_f16 v5.l, 0.5, -m0
6377-
// GFX12: v_maximum_f16 v5.l, 0.5, -m0 ; encoding: [0x05,0x00,0x68,0xd7,0xf0,0xfa,0x00,0x40]
6376+
v_maximum_f16 v5, 0.5, -m0
6377+
// GFX12: v_maximum_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x68,0xd7,0xf0,0xfa,0x00,0x40]
63786378

6379-
v_maximum_f16 v5.l, -src_scc, |vcc_lo|
6380-
// GFX12: v_maximum_f16 v5.l, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x68,0xd7,0xfd,0xd4,0x00,0x20]
6379+
v_maximum_f16 v5, -src_scc, |vcc_lo|
6380+
// GFX12: v_maximum_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x68,0xd7,0xfd,0xd4,0x00,0x20]
63816381

6382-
v_maximum_f16 v255.l, -|0xaf12|, -|vcc_hi|
6383-
// GFX12: v_maximum_f16 v255.l, -|0xaf12|, -|vcc_hi| ; encoding: [0xff,0x03,0x68,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00]
6382+
v_maximum_f16 v255, -|0xaf12|, -|vcc_hi|
6383+
// GFX12: v_maximum_f16 v255, -|0xaf12|, -|vcc_hi| ; encoding: [0xff,0x03,0x68,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00]
63846384

6385-
v_maximum_f16 v205.l, v201.l, v200.l
6386-
// GFX12: v_maximum_f16 v205.l, v201.l, v200.l ; encoding: [0xcd,0x00,0x68,0xd7,0xc9,0x91,0x03,0x00]
6385+
v_maximum_f16 v205, v201, v200
6386+
// GFX12: v_maximum_f16 v205, v201, v200 ; encoding: [0xcd,0x00,0x68,0xd7,0xc9,0x91,0x03,0x00]
63876387

63886388
v_minimum_f64 v[5:6], v[1:2], v[3:4]
63896389
// GFX12: v_minimum_f64 v[5:6], v[1:2], v[3:4] ; encoding: [0x05,0x00,0x41,0xd7,0x01,0x07,0x02,0x00]

0 commit comments

Comments
 (0)