Skip to content

Commit 8d3b933

Browse files
committed
[AMDGPU][True16][MC] update VOPC profile with latest vop3 true16, use
f16 for fake16 format
1 parent fd8d433 commit 8d3b933

11 files changed

+156
-89
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7343,14 +7343,16 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
73437343
auto NewInstr =
73447344
BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode), CondReg)
73457345
.setMIFlags(Inst.getFlags());
7346-
if (AMDGPU::getNamedOperandIdx(NewOpcode,
7347-
AMDGPU::OpName::src0_modifiers) >= 0) {
7346+
if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
7347+
0) {
73487348
NewInstr
73497349
.addImm(0) // src0_modifiers
73507350
.add(Inst.getOperand(0)) // src0
73517351
.addImm(0) // src1_modifiers
73527352
.add(Inst.getOperand(1)) // src1
73537353
.addImm(0); // clamp
7354+
if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::op_sel))
7355+
NewInstr.addImm(0); // op_sel0
73547356
} else {
73557357
NewInstr
73567358
.add(Inst.getOperand(0))

llvm/lib/Target/AMDGPU/VOPCInstructions.td

Lines changed: 74 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,8 @@ class VOPC_Real <VOPC_Pseudo ps, int EncodingFamily, string asm_name = ps.Pseudo
192192

193193
// copy relevant pseudo op flags
194194
let SubtargetPredicate = ps.SubtargetPredicate;
195+
let True16Predicate = ps.True16Predicate;
196+
let OtherPredicates = ps.OtherPredicates;
195197
let AsmMatchConverter = ps.AsmMatchConverter;
196198
let Constraints = ps.Constraints;
197199
let DisableEncoding = ps.DisableEncoding;
@@ -314,7 +316,7 @@ multiclass VOPC_Pseudos <string opName,
314316
let isCommutable = 1;
315317
}
316318

317-
def _e64 : VOP3_Pseudo<opName, P, getVOPCPat64<cond, P>.ret>,
319+
def _e64 : VOP3_Pseudo<opName, P, getVOPCPat64<cond, P>.ret, /*IsVOP3P*/false, P.HasOpSel>,
318320
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>,
319321
VCMPXNoSDstTable<1, opName#"_e64">,
320322
VCMPVCMPXTable<opName#"_e64"> {
@@ -373,7 +375,7 @@ multiclass VOPCX_Pseudos <string opName,
373375
let IsVCMPX = 1;
374376
}
375377

376-
def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst>,
378+
def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst, [], /*IsVOP3P*/false, P_NoSDst.HasOpSel>,
377379
Commutable_REV<revOp#"_nosdst_e64", !eq(revOp, opName)>,
378380
VCMPXNoSDstTable<0, opName#"_e64">,
379381
VCMPVCMPXTable<!subst("v_cmpx", "v_cmp", opName#"_e64")> {
@@ -799,11 +801,22 @@ defm V_CMPX_T_U64 : VOPCX_I64 <"v_cmpx_t_u64">;
799801
// Class instructions
800802
//===----------------------------------------------------------------------===//
801803

802-
class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType src1VT = i32> :
804+
class VOPC_Class_Profile_Base<list<SchedReadWrite> sched, ValueType src0VT, ValueType src1VT = i32> :
803805
VOPC_Profile<sched, src0VT, src1VT> {
806+
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
807+
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
808+
Clamp:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
809+
810+
let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel";
811+
let HasClamp = 0;
812+
let HasOMod = 0;
813+
}
814+
815+
class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType src1VT = i32> :
816+
VOPC_Class_Profile_Base<sched, src0VT, src1VT> {
804817
let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
805818
let AsmDPP16 = AsmDPP#"$fi";
806-
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
819+
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
807820
let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
808821
// DPP8 forbids modifiers and can inherit from VOPC_Profile
809822

@@ -812,15 +825,7 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
812825
let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel),
813826
(ins)));
814827
let AsmVOP3Base = "$sdst, $src0_modifiers, $src1";
815-
816-
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
817-
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
818-
Clamp:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
819-
820-
let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel";
821828
let HasSrc1Mods = 0;
822-
let HasClamp = 0;
823-
let HasOMod = 0;
824829
}
825830

826831
multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
@@ -837,16 +842,26 @@ multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> {
837842
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
838843
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
839844
}
840-
def _fake16 : VOPC_Class_Profile<sched, f16, i16> {
845+
def _fake16 : VOPC_Class_Profile_Base<sched, f16, f16> {
841846
let IsTrue16 = 1;
847+
let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
848+
let DstRC64 = getVALUDstForVT<DstVT>.ret;
849+
let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
842850
let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
843851
let Src1RC64 = VSrc_b32;
844852
let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
845853
let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
846854
let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
847-
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
848-
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
849-
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
855+
let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
856+
let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
857+
let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
858+
let Src0VOP3DPP = VGPRSrc_32;
859+
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
860+
let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
861+
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
862+
let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
863+
let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
864+
850865
}
851866
}
852867

@@ -889,17 +904,34 @@ multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> {
889904
}
890905
}
891906

892-
class getVOPCClassPat64 <VOPProfile P> {
893-
list<dag> ret =
894-
[(set i1:$sdst,
907+
multiclass VOPCClassPat64<string inst_name> {
908+
defvar inst = !cast<VOP_Pseudo>(inst_name#"_e64");
909+
defvar P = inst.Pfl;
910+
def : GCNPat <
911+
(i1:$sdst
895912
(AMDGPUfp_class
896913
(P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
897-
i32:$src1))];
914+
P.Src1VT:$src1)),
915+
(inst i32:$src0_modifiers, P.Src0VT:$src0, P.Src1VT:$src1)
916+
>;
898917
}
899918

919+
multiclass VOPCClassPat64_fake16<string inst_name> {
920+
defvar inst = !cast<VOP_Pseudo>(inst_name#"_fake16_e64");
921+
defvar P = inst.Pfl;
922+
def : GCNPat <
923+
(i1:$sdst
924+
(AMDGPUfp_class
925+
(P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
926+
i32:$src1)),
927+
(inst i32:$src0_modifiers, P.Src0VT:$src0,
928+
0 /*src1_modifiers*/, VGPR_32:$src1)
929+
>;
930+
}
900931

901-
// Special case for class instructions which only have modifiers on
902-
// the 1st source operand.
932+
// cmp_class ignores the FP mode and faithfully reports the unmodified
933+
// source value.
934+
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
903935
multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
904936
bit DefVcc = 1> {
905937
def _e32 : VOPC_Pseudo <opName, p>,
@@ -910,7 +942,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
910942
let isConvergent = DefExec;
911943
}
912944

913-
def _e64 : VOP3_Pseudo<opName, p, getVOPCClassPat64<p>.ret>,
945+
def _e64 : VOP3_Pseudo<opName, p, [], 0/*IsVOP3P*/, p.HasOpSel>,
914946
VCMPXNoSDstTable<1, opName#"_e64"> {
915947
let Defs = !if(DefExec, [EXEC], []);
916948
let SchedRW = p.Schedule;
@@ -957,7 +989,7 @@ multiclass VOPCX_Class_Pseudos <string opName,
957989
let SubtargetPredicate = HasNoSdstCMPX;
958990
}
959991

960-
def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst>,
992+
def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst, [], 0/*IsVOP3P*/, P_NoSDst.HasOpSel>,
961993
VCMPXNoSDstTable<0, opName#"_e64"> {
962994
let Defs = [EXEC];
963995
let SchedRW = P_NoSDst.Schedule;
@@ -990,6 +1022,7 @@ multiclass VOPCX_Class_Pseudos <string opName,
9901022
} // end SubtargetPredicate = isGFX11Plus
9911023
}
9921024
} // End SubtargetPredicate = HasSdstCMPX
1025+
} // End ReadsModeReg = 0, mayRaiseFPException = 0
9931026

9941027
defm VOPC_I1_F16_I16 : VOPC_Class_Profile_t16<[Write32Bit]>;
9951028
def VOPC_I1_F32_I32 : VOPC_Class_Profile<[Write32Bit], f32>;
@@ -1002,12 +1035,14 @@ def VOPC_F64_I32 : VOPC_Class_NoSdst_Profile<[Write64Bit], f64>;
10021035
multiclass VOPC_CLASS_F16 <string opName> {
10031036
let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
10041037
defm NAME : VOPC_Class_Pseudos <opName, VOPC_I1_F16_I16, 0>;
1038+
defm : VOPCClassPat64<NAME>;
10051039
}
1006-
let OtherPredicates = [UseRealTrue16Insts] in {
1040+
let True16Predicate = UseRealTrue16Insts in {
10071041
defm _t16 : VOPC_Class_Pseudos <opName#"_t16", VOPC_I1_F16_I16_t16, 0>;
10081042
}
1009-
let OtherPredicates = [UseFakeTrue16Insts] in {
1043+
let True16Predicate = UseFakeTrue16Insts in {
10101044
defm _fake16 : VOPC_Class_Pseudos <opName#"_fake16", VOPC_I1_F16_I16_fake16, 0>;
1045+
defm : VOPCClassPat64_fake16<NAME>;
10111046
}
10121047
}
10131048

@@ -1023,29 +1058,29 @@ multiclass VOPCX_CLASS_F16 <string opName> {
10231058
}
10241059
}
10251060

1026-
multiclass VOPC_CLASS_F32 <string opName> :
1027-
VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 0>;
1061+
multiclass VOPC_CLASS_F32 <string opName> {
1062+
defm NAME : VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 0>;
1063+
defm : VOPCClassPat64<NAME>;
1064+
}
10281065

10291066
multiclass VOPCX_CLASS_F32 <string opName> :
10301067
VOPCX_Class_Pseudos <opName, VOPC_I1_F32_I32, VOPC_F32_I32>;
10311068

1032-
multiclass VOPC_CLASS_F64 <string opName> :
1033-
VOPC_Class_Pseudos <opName, VOPC_I1_F64_I32, 0>;
1069+
multiclass VOPC_CLASS_F64 <string opName> {
1070+
defm NAME : VOPC_Class_Pseudos <opName, VOPC_I1_F64_I32, 0>;
1071+
defm : VOPCClassPat64<NAME>;
1072+
}
10341073

10351074
multiclass VOPCX_CLASS_F64 <string opName> :
10361075
VOPCX_Class_Pseudos <opName, VOPC_I1_F64_I32, VOPC_F64_I32>;
10371076

1038-
// cmp_class ignores the FP mode and faithfully reports the unmodified
1039-
// source value.
1040-
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
10411077
defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">;
10421078
defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">;
10431079
defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">;
10441080
defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <"v_cmpx_class_f64">;
10451081

10461082
defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">;
10471083
defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
1048-
} // End ReadsModeReg = 0, mayRaiseFPException = 0
10491084

10501085
//===----------------------------------------------------------------------===//
10511086
// V_ICMPIntrinsic Pattern.
@@ -1283,11 +1318,13 @@ class VOPC_DPP16<bits<8> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
12831318
: VOPC_DPP_Base<op, opName, ps.Pfl> {
12841319
let AssemblerPredicate = HasDPP16;
12851320
let SubtargetPredicate = HasDPP16;
1321+
let True16Predicate = ps.True16Predicate;
12861322
let hasSideEffects = ps.hasSideEffects;
12871323
let Defs = ps.Defs;
12881324
let SchedRW = ps.SchedRW;
12891325
let Uses = ps.Uses;
12901326
let OtherPredicates = ps.OtherPredicates;
1327+
let True16Predicate = ps.True16Predicate;
12911328
let Constraints = ps.Constraints;
12921329
}
12931330

@@ -1303,6 +1340,7 @@ class VOPC_DPP8<bits<8> op, VOPC_Pseudo ps, string opName = ps.OpName>
13031340
let SchedRW = ps.SchedRW;
13041341
let Uses = ps.Uses;
13051342
let OtherPredicates = ps.OtherPredicates;
1343+
let True16Predicate = ps.True16Predicate;
13061344
let Constraints = "";
13071345
}
13081346

@@ -1333,6 +1371,7 @@ class VOPC64_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
13331371
: VOPC64_DPP_Base<op, opName, ps.Pfl> {
13341372
let AssemblerPredicate = HasDPP16;
13351373
let SubtargetPredicate = HasDPP16;
1374+
let True16Predicate = ps.True16Predicate;
13361375
let hasSideEffects = ps.hasSideEffects;
13371376
let Defs = ps.Defs;
13381377
let SchedRW = ps.SchedRW;
@@ -1375,6 +1414,7 @@ class VOPC64_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
13751414
let SchedRW = ps.SchedRW;
13761415
let Uses = ps.Uses;
13771416
let OtherPredicates = ps.OtherPredicates;
1417+
let True16Predicate = ps.True16Predicate;
13781418
}
13791419

13801420
class VOPC64_DPP8_Dst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1876,12 +1876,18 @@ class ClassPat<Instruction inst, ValueType vt> : GCNPat <
18761876
(inst i32:$src0_mods, vt:$src0, (V_MOV_B32_e32 timm:$mask))
18771877
>;
18781878

1879+
class ClassPat_t16<Instruction inst, ValueType vt> : GCNPat <
1880+
(is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
1881+
(inst i32:$src0_mods, vt:$src0, SRCMODS.NONE, (V_MOV_B32_e32 timm:$mask))
1882+
>;
1883+
18791884
def : ClassPat<V_CMP_CLASS_F16_e64, f16> {
1880-
let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts];
1885+
let OtherPredicates = [Has16BitInsts];
1886+
let True16Predicate = NotHasTrue16BitInsts;
18811887
}
18821888

1883-
def : ClassPat<V_CMP_CLASS_F16_t16_e64, f16> {
1884-
let OtherPredicates = [HasTrue16BitInsts];
1889+
def : ClassPat_t16<V_CMP_CLASS_F16_fake16_e64, f16> {
1890+
let True16Predicate = UseFakeTrue16Insts;
18851891
}
18861892

18871893
def : ClassPat<V_CMP_CLASS_F32_e64, f32>;

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,15 @@ body: |
2424
; WAVE32-NEXT: {{ $}}
2525
; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
2626
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
27-
; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
27+
; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
2828
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
29+
;
2930
; WAVE64-LABEL: name: class_s16_vcc_sv
3031
; WAVE64: liveins: $sgpr0, $vgpr0
3132
; WAVE64-NEXT: {{ $}}
3233
; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
3334
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
34-
; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
35+
; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
3536
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
3637
%0:sgpr(s32) = COPY $sgpr0
3738
%1:vgpr(s32) = COPY $vgpr0
@@ -54,14 +55,15 @@ body: |
5455
; WAVE32-NEXT: {{ $}}
5556
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5657
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
57-
; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
58+
; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
5859
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
60+
;
5961
; WAVE64-LABEL: name: class_s16_vcc_vs
6062
; WAVE64: liveins: $sgpr0, $vgpr0
6163
; WAVE64-NEXT: {{ $}}
6264
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
6365
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
64-
; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
66+
; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
6567
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
6668
%0:vgpr(s32) = COPY $vgpr0
6769
%1:sgpr(s32) = COPY $sgpr0
@@ -84,14 +86,15 @@ body: |
8486
; WAVE32-NEXT: {{ $}}
8587
; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
8688
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
87-
; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
89+
; WAVE32-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
8890
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
91+
;
8992
; WAVE64-LABEL: name: class_s16_vcc_vv
9093
; WAVE64: liveins: $vgpr0, $vgpr1
9194
; WAVE64-NEXT: {{ $}}
9295
; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
9396
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
94-
; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec
97+
; WAVE64-NEXT: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_CLASS_F16_e64 0, [[COPY]], 0, [[COPY1]], implicit $exec
9598
; WAVE64-NEXT: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]]
9699
%0:vgpr(s32) = COPY $vgpr0
97100
%1:vgpr(s32) = COPY $vgpr1

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ body: |
3030
; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
3131
; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
3232
; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
33-
; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
34-
; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_t16_e64_]]
33+
; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
34+
; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
3535
%0:vgpr(s32) = COPY $vgpr0
3636
%1:vgpr(s32) = COPY $vgpr1
3737
%2:vgpr(s16) = G_FPTRUNC %0
@@ -68,8 +68,8 @@ body: |
6868
; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
6969
; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
7070
; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
71-
; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_t16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
72-
; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_t16_e64_]]
71+
; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
72+
; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
7373
%0:vgpr(s32) = COPY $vgpr0
7474
%1:vgpr(s32) = COPY $vgpr1
7575
%2:vgpr(s16) = G_FPTRUNC %0

0 commit comments

Comments
 (0)