Skip to content

Commit 3fe8172

Browse files
committed
true16 for v_cmp_class_f16
1 parent 5e26ff3 commit 3fe8172

29 files changed

+1940
-850
lines changed

llvm/lib/Target/AMDGPU/VOPCInstructions.td

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -870,25 +870,40 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
870870

871871
multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
872872
def NAME : VOPC_Class_Profile<sched, f16>;
873-
def _t16 : VOPC_Class_Profile<sched, f16, i16> {
873+
def _t16 : VOPC_Class_Profile_Base<sched, f16, f16> {
874874
let IsTrue16 = 1;
875875
let IsRealTrue16 = 1;
876-
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
877-
let Src1RC64 = VSrc_b32;
878-
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
879-
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
880-
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
881-
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
882-
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
883-
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
876+
let HasOpSel = 1;
877+
let HasModifiers = 1; // All instructions at least have OpSel
878+
let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
879+
let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
880+
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
881+
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
882+
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
883+
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
884+
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
885+
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
886+
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
887+
let Src0VOP3DPP = VGPRSrc_16;
888+
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
889+
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
890+
891+
let DstRC64 = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 1/*IsVOP3Encoding*/>.ret;
892+
let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
893+
let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
894+
let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
895+
let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
896+
let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
897+
let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
898+
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
899+
let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
900+
let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
884901
}
885902
def _fake16 : VOPC_Class_Profile_Base<sched, f16, f16> {
886903
let IsTrue16 = 1;
887904
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
888-
let DstRC64 = getVALUDstForVT<DstVT>.ret;
889905
let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
890906
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
891-
let Src1RC64 = VSrc_b32;
892907
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
893908
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
894909
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
@@ -898,6 +913,14 @@ multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
898913
let Src0VOP3DPP = VGPRSrc_32;
899914
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
900915
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
916+
917+
let DstRC64 = getVALUDstForVT<DstVT>.ret;
918+
let Src0RC64 = getVOP3SrcForVT<Src0VT, 0/*IsTrue16*/>.ret;
919+
let Src1RC64 = getVOP3SrcForVT<Src1VT, 0/*IsTrue16*/>.ret;
920+
let Src2RC64 = getVOP3SrcForVT<Src2VT, 0/*IsTrue16*/>.ret;
921+
let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
922+
let Src1Mod = getSrcMod<Src1VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
923+
let Src2Mod = getSrcMod<Src2VT, 0/*IsTrue16*/, 1/*IsFake16*/>.ret;
901924
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
902925
let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
903926
let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
@@ -1843,7 +1866,7 @@ defm V_CMP_NE_U64 : VOPC_Real_gfx11_gfx12<0x05d>;
18431866
defm V_CMP_GE_U64 : VOPC_Real_gfx11_gfx12<0x05e>;
18441867
defm V_CMP_T_U64 : VOPC_Real_gfx11<0x05f>;
18451868

1846-
defm V_CMP_CLASS_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
1869+
defm V_CMP_CLASS_F16 : VOPC_Real_t16_and_fake16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
18471870
defm V_CMP_CLASS_F32 : VOPC_Real_gfx11_gfx12<0x07e>;
18481871
defm V_CMP_CLASS_F64 : VOPC_Real_gfx11_gfx12<0x07f>;
18491872

0 commit comments

Comments
 (0)