5353#include "llvm/CodeGen/SelectionDAGNodes.h"
5454#include "llvm/CodeGen/TargetCallingConv.h"
5555#include "llvm/CodeGen/TargetInstrInfo.h"
56+ #include "llvm/CodeGen/TargetLowering.h"
5657#include "llvm/CodeGen/TargetOpcodes.h"
5758#include "llvm/CodeGen/ValueTypes.h"
5859#include "llvm/CodeGenTypes/MachineValueType.h"
@@ -1268,6 +1269,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
12681269 for (auto Op : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
12691270 ISD::STRICT_UINT_TO_FP})
12701271 for (auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
1272+ // and this is where v2i64 is marked custom
12711273 setOperationAction(Op, VT, Custom);
12721274
12731275 if (Subtarget->hasFullFP16()) {
@@ -1285,6 +1287,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
12851287 } else {
12861288 // when AArch64 doesn't have fullfp16 support, promote the input
12871289 // to i32 first.
1290+ // this is where the promotion from v8i8 to v8i32 is dictated.
12881291 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
12891292 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
12901293 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
@@ -5095,6 +5098,31 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
50955098 uint64_t VTSize = VT.getFixedSizeInBits();
50965099 uint64_t InVTSize = InVT.getFixedSizeInBits();
50975100 if (VTSize < InVTSize) {
5101+ bool IsTargetf32Orf64 = VT.getVectorElementType() == MVT::f32 ||
5102+ VT.getVectorElementType() == MVT::f64;
5103+ bool IsTargetf16 = false;
5104+ // check if Op has any uses that only boils down to f16.
5105+ if (Op.hasOneUse() &&
5106+ Op->user_begin()->getOpcode() == ISD::CONCAT_VECTORS &&
5107+ Op->user_begin()->hasOneUse() &&
5108+ Op->user_begin()->user_begin()->getOpcode() == ISD::FP_ROUND &&
5109+ Op->user_begin()->user_begin()->getValueType(0).isVector() &&
5110+ Op->user_begin()
5111+ ->user_begin()
5112+ ->getValueType(0)
5113+ .getVectorElementType() == MVT::f16) {
5114+ IsTargetf16 = true;
5115+ }
5116+
5117+ if (IsTargetf32Orf64 && !IsTargetf16) {
5118+ // AArch64 doesn't have a direct vector instruction to convert
5119+ // fixed point to floating point AND narrow it at the same time.
5120+ // Additional rounding when the target is f32/f64 causes subtle
5121+ // differences across different platforms (that do have such
5122+ // instructions). Conversion to f16 however is fine.
5123+ return SDValue();
5124+ }
5125+
50985126 MVT CastVT =
50995127 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
51005128 InVT.getVectorNumElements());
0 commit comments