@@ -901,198 +901,6 @@ unsigned getCmpOperandFoldingProfit(Register CmpOp, MachineRegisterInfo &MRI) {
901901 return 0 ;
902902}
903903
904- // Helper function for matchFpTruncFpTrunc.
905- // Checks that the given definition belongs to an FPTRUNC and that the source is
906- // not an integer, as no rounding is necessary due to the range of values
907- bool isFPTruncFromDouble (MachineRegisterInfo &MRI, MachineInstr *MaybeFpTrunc) {
908- if (!MaybeFpTrunc || MaybeFpTrunc->getOpcode () != TargetOpcode::G_FPTRUNC_ODD)
909- return false ;
910-
911- // Check the source is 64 bits as we only want to match a very specific
912- // pattern
913- Register FpTruncSrc = MaybeFpTrunc->getOperand (1 ).getReg ();
914- LLT SrcTy = MRI.getType (FpTruncSrc);
915- if (SrcTy.getScalarSizeInBits () != 64 )
916- return false ;
917-
918- // Need to check the float didn't come from an int as no rounding is
919- // neccessary
920- MachineInstr *FpTruncSrcDef = getDefIgnoringCopies (FpTruncSrc, MRI);
921- if (FpTruncSrcDef->getOpcode () == TargetOpcode::G_SITOFP ||
922- FpTruncSrcDef->getOpcode () == TargetOpcode::G_UITOFP)
923- return false ;
924-
925- return true ;
926- }
927-
928- // To avoid double rounding issues we need to lower FPTRUNC(FPTRUNC) to an odd
929- // rounding truncate and a normal truncate. When
930- // truncating an FP that came from an integer this is not a problem as the range
931- // of values is lower in the int
932- bool matchFpTruncFpTrunc (MachineInstr &MI, MachineRegisterInfo &MRI) {
933- assert (MI.getOpcode () == TargetOpcode::G_FPTRUNC && " Expected G_FPTRUNC" );
934-
935- // Check the destination is 16 bits as we only want to match a very specific
936- // pattern
937- Register Dst = MI.getOperand (0 ).getReg ();
938- LLT DstTy = MRI.getType (Dst);
939- if (DstTy.getScalarSizeInBits () != 16 )
940- return false ;
941-
942- Register Src = MI.getOperand (1 ).getReg ();
943-
944- MachineInstr *ParentDef = getDefIgnoringCopies (Src, MRI);
945- if (!ParentDef)
946- return false ;
947-
948- MachineInstr *FpTruncDef;
949- switch (ParentDef->getOpcode ()) {
950- default :
951- return false ;
952- case TargetOpcode::G_CONCAT_VECTORS: {
953- // Expecting exactly two FPTRUNCs
954- if (ParentDef->getNumOperands () != 3 )
955- return false ;
956-
957- // All operands need to be FPTRUNC
958- for (unsigned OpIdx = 1 , NumOperands = ParentDef->getNumOperands ();
959- OpIdx != NumOperands; ++OpIdx) {
960- Register FpTruncDst = ParentDef->getOperand (OpIdx).getReg ();
961- FpTruncDef = getDefIgnoringCopies (FpTruncDst, MRI);
962-
963- if (!isFPTruncFromDouble (MRI, FpTruncDef))
964- return false ;
965- }
966-
967- return true ;
968- }
969- // This is to match cases in which vectors are widened to a larger size
970- case TargetOpcode::G_INSERT_VECTOR_ELT: {
971- Register VecExtractDst = ParentDef->getOperand (2 ).getReg ();
972- MachineInstr *VecExtractDef = getDefIgnoringCopies (VecExtractDst, MRI);
973-
974- if (!VecExtractDef ||
975- VecExtractDef->getOpcode () != TargetOpcode::G_EXTRACT_VECTOR_ELT)
976- return false ;
977-
978- Register FpTruncDst = VecExtractDef->getOperand (1 ).getReg ();
979- FpTruncDef = getDefIgnoringCopies (FpTruncDst, MRI);
980- break ;
981- }
982- case TargetOpcode::G_FPTRUNC: {
983- Register FpTruncDst = ParentDef->getOperand (1 ).getReg ();
984- FpTruncDef = getDefIgnoringCopies (FpTruncDst, MRI);
985- break ;
986- }
987- }
988-
989- if (!isFPTruncFromDouble (MRI, FpTruncDef))
990- return false ;
991-
992- return true ;
993- }
994-
995- void applyFpTruncFpTrunc (MachineInstr &MI, MachineRegisterInfo &MRI,
996- MachineIRBuilder &B) {
997- assert (MI.getOpcode () == TargetOpcode::G_FPTRUNC && " Expected G_FPTRUNC" );
998- Register Dst = MI.getOperand (0 ).getReg ();
999- Register Src = MI.getOperand (1 ).getReg ();
1000-
1001- MachineInstr *ParentDef = getDefIgnoringCopies (Src, MRI);
1002- if (!ParentDef)
1003- return ;
1004-
1005- LLT V2F32 = LLT::fixed_vector (2 , LLT::scalar (32 ));
1006- LLT V4F32 = LLT::fixed_vector (4 , LLT::scalar (32 ));
1007- LLT V4F16 = LLT::fixed_vector (4 , LLT::scalar (16 ));
1008-
1009- B.setInstrAndDebugLoc (MI);
1010-
1011- switch (ParentDef->getOpcode ()) {
1012- default :
1013- return ;
1014- case TargetOpcode::G_INSERT_VECTOR_ELT: {
1015- Register VecExtractDst = ParentDef->getOperand (2 ).getReg ();
1016- MachineInstr *VecExtractDef = getDefIgnoringCopies (VecExtractDst, MRI);
1017-
1018- Register FpTruncDst = VecExtractDef->getOperand (1 ).getReg ();
1019- MachineInstr *FpTruncDef = getDefIgnoringCopies (FpTruncDst, MRI);
1020-
1021- Register FpTruncSrc = FpTruncDef->getOperand (1 ).getReg ();
1022- MRI.setRegClass (FpTruncSrc, &AArch64::FPR128RegClass);
1023-
1024- Register Fp32 = MRI.createGenericVirtualRegister (V2F32);
1025- MRI.setRegClass (Fp32, &AArch64::FPR64RegClass);
1026-
1027- B.buildInstr (AArch64::FCVTXNv2f32, {Fp32}, {FpTruncSrc});
1028-
1029- // Only 4f32 -> 4f16 is legal so we need to mimic that situation
1030- Register Fp32Padding = B.buildUndef (V2F32).getReg (0 );
1031- MRI.setRegClass (Fp32Padding, &AArch64::FPR64RegClass);
1032-
1033- Register Fp32Full = MRI.createGenericVirtualRegister (V4F32);
1034- MRI.setRegClass (Fp32Full, &AArch64::FPR128RegClass);
1035- B.buildConcatVectors (Fp32Full, {Fp32, Fp32Padding});
1036-
1037- Register Fp16 = MRI.createGenericVirtualRegister (V4F16);
1038- MRI.setRegClass (Fp16, &AArch64::FPR64RegClass);
1039- B.buildFPTrunc (Fp16, Fp32Full);
1040-
1041- MRI.replaceRegWith (Dst, Fp16);
1042- MI.eraseFromParent ();
1043- break ;
1044- }
1045- case TargetOpcode::G_CONCAT_VECTORS: {
1046- // Get the two FP Truncs that are being concatenated
1047- Register FpTrunc1Dst = ParentDef->getOperand (1 ).getReg ();
1048- Register FpTrunc2Dst = ParentDef->getOperand (2 ).getReg ();
1049-
1050- MachineInstr *FpTrunc1Def = getDefIgnoringCopies (FpTrunc1Dst, MRI);
1051- MachineInstr *FpTrunc2Def = getDefIgnoringCopies (FpTrunc2Dst, MRI);
1052-
1053- // Make the registers 128bit to store the 2 doubles
1054- Register LoFp64 = FpTrunc1Def->getOperand (1 ).getReg ();
1055- MRI.setRegClass (LoFp64, &AArch64::FPR128RegClass);
1056- Register HiFp64 = FpTrunc2Def->getOperand (1 ).getReg ();
1057- MRI.setRegClass (HiFp64, &AArch64::FPR128RegClass);
1058-
1059- // Convert the lower half
1060- Register LoFp32 = MRI.createGenericVirtualRegister (V2F32);
1061- MRI.setRegClass (LoFp32, &AArch64::FPR64RegClass);
1062- B.buildInstr (AArch64::FCVTXNv2f32, {LoFp32}, {LoFp64});
1063-
1064- // Create a register for the high half to use
1065- Register AccUndef = MRI.createGenericVirtualRegister (V4F32);
1066- MRI.setRegClass (AccUndef, &AArch64::FPR128RegClass);
1067- B.buildUndef (AccUndef);
1068-
1069- Register Acc = MRI.createGenericVirtualRegister (V4F32);
1070- MRI.setRegClass (Acc, &AArch64::FPR128RegClass);
1071- B.buildInstr (TargetOpcode::INSERT_SUBREG)
1072- .addDef (Acc)
1073- .addUse (AccUndef)
1074- .addUse (LoFp32)
1075- .addImm (AArch64::dsub);
1076-
1077- // Convert the high half
1078- Register AccOut = MRI.createGenericVirtualRegister (V4F32);
1079- MRI.setRegClass (AccOut, &AArch64::FPR128RegClass);
1080- B.buildInstr (AArch64::FCVTXNv4f32)
1081- .addDef (AccOut)
1082- .addUse (Acc)
1083- .addUse (HiFp64);
1084-
1085- Register Fp16 = MRI.createGenericVirtualRegister (V4F16);
1086- MRI.setRegClass (Fp16, &AArch64::FPR64RegClass);
1087- B.buildFPTrunc (Fp16, AccOut);
1088-
1089- MRI.replaceRegWith (Dst, Fp16);
1090- MI.eraseFromParent ();
1091- break ;
1092- }
1093- }
1094- }
1095-
1096904// / \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
1097905// / instruction \p MI.
1098906bool trySwapICmpOperands (MachineInstr &MI, MachineRegisterInfo &MRI) {
0 commit comments