From 491755c243f06e12b9ee134334aeda3416839a76 Mon Sep 17 00:00:00 2001 From: Chauhan Jaydeep Ashwinbhai Date: Thu, 2 Jan 2025 23:22:45 +0800 Subject: [PATCH 1/6] Map vector saturated converts to public intrinsics --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 33 +++ llvm/lib/Target/X86/X86ISelLowering.cpp | 65 ++++- llvm/lib/Target/X86/X86ISelLowering.h | 4 + llvm/lib/Target/X86/X86InstrAVX10.td | 56 ++++ .../CodeGen/X86/avx10_2_512fptosi_satcvtds.ll | 122 +++++++++ .../CodeGen/X86/avx10_2fptosi_satcvtds.ll | 244 ++++++++++++++++++ 6 files changed, 522 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 9b340a778b36a..0dbae94d3f58e 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5456,6 +5456,39 @@ void X86DAGToDAGISel::Select(SDNode *Node) { break; } + case X86ISD::FP_TO_SINT_SAT_CUSTOM: + case X86ISD::FP_TO_UINT_SAT_CUSTOM: + if (Subtarget->hasAVX10_2()) { + bool IsSigned = Node->getOpcode() == X86ISD::FP_TO_SINT_SAT_CUSTOM; + SDValue Op = Node->getOperand(0); + EVT VT = Node->getValueType(0); + EVT OpVT = Op.getValueType(); + MachineSDNode *MachineNode; + + if (VT == MVT::v4i32 && OpVT == MVT::v4f32) { + if (IsSigned) + MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2DQSZ128rr, dl, + MVT::v4i32, Op); + else + MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2UDQSZ128rr, dl, + MVT::v4i32, Op); + } + + if ((VT == MVT::v2i64 && OpVT == MVT::v2f64)) { + if (IsSigned) + MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2QQSZ128rr, dl, + MVT::v2i64, Op); + else + MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2UQQSZ128rr, dl, + MVT::v2i64, Op); + } + + SDValue NewNode = SDValue(MachineNode, 0); + ReplaceNode(Node, NewNode.getNode()); + return; + } + break; + case X86ISD::ANDNP: if (tryVPTERNLOG(Node)) return; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a0514e93d6598..3364043cda056 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -341,8 +341,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } if (Subtarget.hasAVX10_2()) { - setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Legal); + setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v2i32, Custom); + setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v2i32, Custom); + for (MVT VT : {MVT::i32, MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, + MVT::v4i64, MVT::v8i64}) { + setOperationAction(ISD::FP_TO_UINT_SAT, VT, Legal); + setOperationAction(ISD::FP_TO_SINT_SAT, VT, Legal); + } if (Subtarget.is64Bit()) { setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Legal); setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Legal); @@ -2656,6 +2661,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP, ISD::STRICT_UINT_TO_FP, + ISD::FP_TO_SINT_SAT, + ISD::FP_TO_UINT_SAT, ISD::SETCC, ISD::MUL, ISD::XOR, @@ -33665,6 +33672,30 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } return; } + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: { + if (!Subtarget.hasAVX10_2()) + return; + + bool IsSigned = Opc == ISD::FP_TO_SINT_SAT; + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + EVT OpVT = Op.getValueType(); + SDValue V4I32; + + if (VT == MVT::v2i32 && OpVT == MVT::v2f64) { + SDValue V4f32 = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Op); + if (IsSigned) + V4I32 = + DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32); + else + V4I32 = + DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32); + Results.push_back(V4I32); + return; + } + break; + } case ISD::FP_TO_SINT: case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: @@ -34645,6 +34676,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VPERMV3) NODE_NAME_CASE(VPERMI) NODE_NAME_CASE(VPTERNLOG) + NODE_NAME_CASE(FP_TO_SINT_SAT_CUSTOM) + NODE_NAME_CASE(FP_TO_UINT_SAT_CUSTOM) NODE_NAME_CASE(VFIXUPIMM) NODE_NAME_CASE(VFIXUPIMM_SAE) NODE_NAME_CASE(VFIXUPIMMS) @@ -56202,6 +56235,32 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS +static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + if (!Subtarget.hasAVX10_2()) + return SDValue(); + + bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT; + EVT SrcVT = N->getOperand(0).getValueType(); + EVT DstVT = N->getValueType(0); + SDLoc dl(N); + + if (SrcVT == MVT::v2f32 && DstVT == MVT::v2i64) { + // Convert v2f32 to v2f64 + SDValue V2F64 = + DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, N->getOperand(0)); + + // Select the FP_TO_SINT_SAT_CUSTOM/FP_TO_UINT_SAT_CUSTOM node + if (IsSigned) + return DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64); + else + return DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64); + } + + return SDValue(); +} + static bool needCarryOrOverflowFlag(SDValue Flags) { assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); @@ -59315,6 +59374,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::INTRINSIC_WO_CHAIN: return combineINTRINSIC_WO_CHAIN(N, DAG, DCI); case ISD::INTRINSIC_W_CHAIN: return combineINTRINSIC_W_CHAIN(N, DAG, DCI); case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI); + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget); // clang-format on } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 2b7a8eaf249d8..0c04cf122bddd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -908,6 +908,10 @@ namespace llvm { // Load x87 FPU environment from memory. FLDENVm, + // Custom handling for FP_TO_xINT_SAT + FP_TO_SINT_SAT_CUSTOM, + FP_TO_UINT_SAT_CUSTOM, + /// This instruction implements FP_TO_SINT with the /// integer destination in memory and a FP reg source. This corresponds /// to the X86::FIST*m instructions and the rounding mode change stuff. It diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 3bc64eda01a9c..e373111fe6c00 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -831,6 +831,62 @@ let Predicates = [HasAVX10_2] in { // patterns have been disabled with null_frag. // Patterns VCVTTPD2DQSZ128 +// VCVTTPD2DQS +def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)), + (VCVTTPD2DQSZ256rr VR256X:$src)>; +def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)), + (VCVTTPD2DQSZrr VR512:$src)>; + +// VCVTTPD2QQS +def : Pat<(v2i64(fp_to_sint_sat(v2f64 VR128X:$src), i64)), + (VCVTTPD2QQSZ128rr VR128X:$src)>; +def : Pat<(v4i64(fp_to_sint_sat(v4f64 VR256X:$src), i64)), + (VCVTTPD2QQSZ256rr VR256X:$src)>; +def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)), + (VCVTTPD2QQSZrr VR512:$src)>; + +// VCVTTPD2UDQS +def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)), + (VCVTTPD2UDQSZ256rr VR256X:$src)>; +def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)), + (VCVTTPD2UDQSZrr VR512:$src)>; + +// VCVTTPD2UQQS +def : Pat<(v2i64(fp_to_uint_sat(v2f64 VR128X:$src), i64)), + (VCVTTPD2UQQSZ128rr VR128X:$src)>; +def : Pat<(v4i64(fp_to_uint_sat(v4f64 VR256X:$src), i64)), + (VCVTTPD2UQQSZ256rr VR256X:$src)>; +def : Pat<(v8i64(fp_to_uint_sat(v8f64 VR512:$src), i64)), + (VCVTTPD2UQQSZrr VR512:$src)>; + +// VCVTTPS2DQS +def : Pat<(v4i32(fp_to_sint_sat(v4f32 VR128X:$src), i32)), + (VCVTTPS2DQSZ128rr VR128X:$src)>; +def : Pat<(v8i32(fp_to_sint_sat(v8f32 VR256X:$src), i32)), + (VCVTTPS2DQSZ256rr VR256X:$src)>; +def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)), + (VCVTTPS2DQSZrr VR512:$src)>; + +// VCVTTPS2QQS +def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)), + (VCVTTPS2QQSZ256rr VR128X:$src)>; +def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)), + (VCVTTPS2QQSZrr VR256X:$src)>; + +// VCVTTPS2UDQS +def : Pat<(v4i32(fp_to_uint_sat(v4f32 VR128X:$src), i32)), + (VCVTTPS2UDQSZ128rr VR128X:$src)>; +def : Pat<(v8i32(fp_to_uint_sat(v8f32 VR256X:$src), i32)), + (VCVTTPS2UDQSZ256rr VR256X:$src)>; +def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)), + (VCVTTPS2UDQSZrr VR512:$src)>; + +// VCVTTPS2UQQS +def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)), + (VCVTTPS2UQQSZ256rr VR128X:$src)>; +def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)), + (VCVTTPS2UQQSZrr VR256X:$src)>; + def : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))), (VCVTTPD2DQSZ128rr VR128X:$src)>; def : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))), diff --git a/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll new file mode 100644 index 0000000000000..70465a28bad6a --- /dev/null +++ b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64 + +; VCVTTPD2DQS +define <8 x i32> @test_signed_v8i32_v8f64(<8 x double> %f) nounwind { +; X86-LABEL: test_signed_v8i32_v8f64: +; X86: # %bb.0: +; X86-NEXT: vcvttpd2dqs %zmm0, %ymm0 +; X86-NEXT: retl +; +; X64-LABEL: test_signed_v8i32_v8f64: +; X64: # %bb.0: +; X64-NEXT: vcvttpd2dqs %zmm0, %ymm0 +; X64-NEXT: retq + %x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> %f) + ret <8 x i32> %x +} + +; VCVTTPD2QQS +define <8 x i64> @test_signed_v8i64_v8f64(<8 x double> %f) nounwind { +; X86-LABEL: test_signed_v8i64_v8f64: +; X86: # %bb.0: +; X86-NEXT: vcvttpd2qqs %zmm0, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_signed_v8i64_v8f64: +; X64: # %bb.0: +; X64-NEXT: vcvttpd2qqs %zmm0, %zmm0 +; X64-NEXT: retq + %x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> %f) + ret <8 x i64> %x +} + +; VCVTTPD2UDQS +define <8 x i32> @test_unsigned_v8i32_v8f64(<8 x double> %f) nounwind { +; X86-LABEL: test_unsigned_v8i32_v8f64: +; X86: # %bb.0: +; X86-NEXT: vcvttpd2udqs %zmm0, %ymm0 +; X86-NEXT: retl +; +; X64-LABEL: test_unsigned_v8i32_v8f64: +; X64: # %bb.0: +; X64-NEXT: vcvttpd2udqs %zmm0, %ymm0 +; X64-NEXT: retq + %x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> %f) + ret <8 x i32> %x +} + +; VCVTTPD2UQQS +define <8 x i64> @test_unsigned_v8i64_v8f64(<8 x double> %f) nounwind { +; X86-LABEL: test_unsigned_v8i64_v8f64: +; X86: # %bb.0: +; X86-NEXT: vcvttpd2uqqs %zmm0, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_unsigned_v8i64_v8f64: +; X64: # %bb.0: +; X64-NEXT: vcvttpd2uqqs %zmm0, %zmm0 +; X64-NEXT: retq + %x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> %f) + ret <8 x i64> %x +} + +; VCVTTPS2DQS +define <16 x i32> @test_signed_v16i32_v16f32(<16 x float> %f) nounwind { +; X86-LABEL: test_signed_v16i32_v16f32: +; X86: # %bb.0: +; X86-NEXT: vcvttps2dqs %zmm0, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_signed_v16i32_v16f32: +; X64: # %bb.0: +; X64-NEXT: vcvttps2dqs %zmm0, %zmm0 +; X64-NEXT: retq + %x = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> %f) + ret <16 x i32> %x +} + +; VCVTTPS2UDQS +define <16 x i32> @test_unsigned_v16i32_v16f32(<16 x float> %f) nounwind { +; X86-LABEL: test_unsigned_v16i32_v16f32: +; X86: # %bb.0: +; X86-NEXT: vcvttps2udqs %zmm0, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_unsigned_v16i32_v16f32: +; X64: # %bb.0: +; X64-NEXT: vcvttps2udqs %zmm0, %zmm0 +; X64-NEXT: retq + %x = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> %f) + ret <16 x i32> %x +} +; VCVTTPS2QQS +define <8 x i64> @test_signed_v8i64_v8f32(<8 x float> %f) nounwind { +; X86-LABEL: test_signed_v8i64_v8f32: +; X86: # %bb.0: +; X86-NEXT: vcvttps2qqs %ymm0, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_signed_v8i64_v8f32: +; X64: # %bb.0: +; X64-NEXT: vcvttps2qqs %ymm0, %zmm0 +; X64-NEXT: retq + %x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> %f) + ret <8 x i64> %x +} + +; VCVTTPS2UQQS +define <8 x i64> @test_unsigned_v8i64_v8f32(<8 x float> %f) nounwind { +; X86-LABEL: test_unsigned_v8i64_v8f32: +; X86: # %bb.0: +; X86-NEXT: vcvttps2uqqs %ymm0, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_unsigned_v8i64_v8f32: +; X64: # %bb.0: +; X64-NEXT: vcvttps2uqqs %ymm0, %zmm0 +; X64-NEXT: retq + %x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> %f) + ret <8 x i64> %x +} diff --git a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll index 494e4bc8e068e..0c731a09f8dbd 100644 --- a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll +++ b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll @@ -112,3 +112,247 @@ define i64 @test_signed_i64_f64(double %f) nounwind { %x = call i64 @llvm.fptosi.sat.i64.f64(double %f) ret i64 %x } + +; VCVTTPD2DQS +define <2 x i32> @test_signed_v2i32_v2f64(<2 x double> %d) nounwind { +; X86-LABEL: test_signed_v2i32_v2f64: +; X86: # %bb.0: +; X86-NEXT: vcvtpd2ps %xmm0, %xmm0 +; X86-NEXT: vcvttpd2dqs %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_signed_v2i32_v2f64: +; X64: # %bb.0: +; X64-NEXT: vcvtpd2ps %xmm0, %xmm0 +; X64-NEXT: vcvttpd2dqs %xmm0, %xmm0 +; X64-NEXT: retq + %x = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %d) + ret <2 x i32> %x +} + +define <4 x i32> @test_signed_v4i32_v4f64(<4 x double> %f) nounwind { +; X86-LABEL: test_signed_v4i32_v4f64: +; X86: # %bb.0: +; X86-NEXT: vcvttpd2dqs %ymm0, %xmm0 +; X86-NEXT: vzeroupper +; X86-NEXT: retl +; +; X64-LABEL: test_signed_v4i32_v4f64: +; X64: # %bb.0: +; X64-NEXT: vcvttpd2dqs %ymm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq + %x = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> %f) + ret <4 x i32> %x +} + +; VCVTTPD2QQS +define <2 x i64> @test_signed_v2i64_v2f64(<2 x double> %f) nounwind { +; X86-LABEL: test_signed_v2i64_v2f64: +; X86: # %bb.0: +; X86-NEXT: vcvttpd2qqs %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_signed_v2i64_v2f64: +; X64: # %bb.0: +; X64-NEXT: vcvttpd2qqs %xmm0, %xmm0 +; X64-NEXT: retq + %x = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %f) + ret <2 x i64> %x +} + +define <4 x i64> @test_signed_v4i64_v4f64(<4 x double> %f) nounwind { +; X86-LABEL: test_signed_v4i64_v4f64: +; X86: # %bb.0: +; X86-NEXT: vcvttpd2qqs %ymm0, %ymm0 +; X86-NEXT: retl +; +; X64-LABEL: test_signed_v4i64_v4f64: +; X64: # %bb.0: +; X64-NEXT: vcvttpd2qqs %ymm0, %ymm0 +; X64-NEXT: retq + %x = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> %f) + ret <4 x i64> %x +} + +; VCVTTPD2UDQS +define <2 x i32> @test_unsigned_v2i32_v2f64(<2 x double> %d) nounwind { +; X86-LABEL: test_unsigned_v2i32_v2f64: +; X86: # %bb.0: +; X86-NEXT: vcvtpd2ps %xmm0, %xmm0 +; X86-NEXT: vcvttpd2udqs %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_unsigned_v2i32_v2f64: +; X64: # %bb.0: +; X64-NEXT: vcvtpd2ps %xmm0, %xmm0 +; X64-NEXT: vcvttpd2udqs %xmm0, %xmm0 +; X64-NEXT: retq + %x = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %d) + ret <2 x i32> %x +} + +define <4 x i32> @test_unsigned_v4i32_v4f64(<4 x double> %f) nounwind { +; X86-LABEL: test_unsigned_v4i32_v4f64: +; X86: # %bb.0: +; X86-NEXT: vcvttpd2udqs %ymm0, %xmm0 +; X86-NEXT: vzeroupper +; X86-NEXT: retl +; +; X64-LABEL: test_unsigned_v4i32_v4f64: +; X64: # %bb.0: +; X64-NEXT: vcvttpd2udqs %ymm0, %xmm0 +; X64-NEXT: vzeroupper +; X64-NEXT: retq + %x = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> %f) + ret <4 x i32> %x +} + +; VCVTTPD2UQQS +define <2 x i64> @test_unsigned_v2i64_v2f64(<2 x double> %f) nounwind { +; X86-LABEL: test_unsigned_v2i64_v2f64: +; X86: # %bb.0: +; X86-NEXT: vcvttpd2uqqs %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_unsigned_v2i64_v2f64: +; X64: # %bb.0: +; X64-NEXT: vcvttpd2uqqs %xmm0, %xmm0 +; X64-NEXT: retq + %x = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %f) + ret <2 x i64> %x +} + +define <4 x i64> @test_unsigned_v4i64_v4f64(<4 x double> %f) nounwind { +; X86-LABEL: test_unsigned_v4i64_v4f64: +; X86: # %bb.0: +; X86-NEXT: vcvttpd2uqqs %ymm0, %ymm0 +; X86-NEXT: retl +; +; X64-LABEL: test_unsigned_v4i64_v4f64: +; X64: # %bb.0: +; X64-NEXT: vcvttpd2uqqs %ymm0, %ymm0 +; X64-NEXT: retq + %x = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> %f) + ret <4 x i64> %x +} + +; VCVTTPS2DQS +define <4 x i32> @test_signed_v4i32_v4f32(<4 x float> %f) nounwind { +; X86-LABEL: test_signed_v4i32_v4f32: +; X86: # %bb.0: +; X86-NEXT: vcvttps2dqs %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_signed_v4i32_v4f32: +; X64: # %bb.0: +; X64-NEXT: vcvttps2dqs %xmm0, %xmm0 +; X64-NEXT: retq + %x = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %f) + ret <4 x i32> %x +} + +define <8 x i32> @test_signed_v8i32_v8f32(<8 x float> %f) nounwind { +; X86-LABEL: test_signed_v8i32_v8f32: +; X86: # %bb.0: +; X86-NEXT: vcvttps2dqs %ymm0, %ymm0 +; X86-NEXT: retl +; +; X64-LABEL: test_signed_v8i32_v8f32: +; X64: # %bb.0: +; X64-NEXT: vcvttps2dqs %ymm0, %ymm0 +; X64-NEXT: retq + %x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> %f) + ret <8 x i32> %x +} + +; VCVTTPS2UDQS +define <4 x i32> @test_unsigned_v4i32_v4f32(<4 x float> %f) nounwind { +; X86-LABEL: test_unsigned_v4i32_v4f32: +; X86: # %bb.0: +; X86-NEXT: vcvttps2udqs %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_unsigned_v4i32_v4f32: +; X64: # %bb.0: +; X64-NEXT: vcvttps2udqs %xmm0, %xmm0 +; X64-NEXT: retq + %x = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %f) + ret <4 x i32> %x +} + +define <8 x i32> @test_unsigned_v8i32_v8f32(<8 x float> %f) nounwind { +; X86-LABEL: test_unsigned_v8i32_v8f32: +; X86: # %bb.0: +; X86-NEXT: vcvttps2udqs %ymm0, %ymm0 +; X86-NEXT: retl +; +; X64-LABEL: test_unsigned_v8i32_v8f32: +; X64: # %bb.0: +; X64-NEXT: vcvttps2udqs %ymm0, %ymm0 +; X64-NEXT: retq + %x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> %f) + ret <8 x i32> %x +} + +; VCVTTPS2QQS +define <2 x i64> @test_signed_v2i64_v2f32(<2 x float> %f) nounwind { +; X86-LABEL: test_signed_v2i64_v2f32: +; X86: # %bb.0: +; X86-NEXT: vcvtps2pd %xmm0, %xmm0 +; X86-NEXT: vcvttps2qqs %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_signed_v2i64_v2f32: +; X64: # %bb.0: +; X64-NEXT: vcvtps2pd %xmm0, %xmm0 +; X64-NEXT: vcvttps2qqs %xmm0, %xmm0 +; X64-NEXT: retq + %x = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %f) + ret <2 x i64> %x +} + +define <4 x i64> @test_signed_v4i64_v4f32(<4 x float> %f) nounwind { +; X86-LABEL: test_signed_v4i64_v4f32: +; X86: # %bb.0: +; X86-NEXT: vcvttps2qqs %xmm0, %ymm0 +; X86-NEXT: retl +; +; X64-LABEL: test_signed_v4i64_v4f32: +; X64: # %bb.0: +; X64-NEXT: vcvttps2qqs %xmm0, %ymm0 +; X64-NEXT: retq + %x = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> %f) + ret <4 x i64> %x +} + +; VCVTTPS2UQQS +define <2 x i64> @test_unsigned_v2i64_v2f32(<2 x float> %f) nounwind { +; X86-LABEL: test_unsigned_v2i64_v2f32: +; X86: # %bb.0: +; X86-NEXT: vcvtps2pd %xmm0, %xmm0 +; X86-NEXT: vcvttps2uqqs %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_unsigned_v2i64_v2f32: +; X64: # %bb.0: +; X64-NEXT: vcvtps2pd %xmm0, %xmm0 +; X64-NEXT: vcvttps2uqqs %xmm0, %xmm0 +; X64-NEXT: retq + %x = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> %f) + ret <2 x i64> %x +} + +define <4 x i64> @test_unsigned_v4i64_v4f32(<4 x float> %f) nounwind { +; X86-LABEL: test_unsigned_v4i64_v4f32: +; X86: # %bb.0: +; X86-NEXT: vcvttps2uqqs %xmm0, %ymm0 +; X86-NEXT: retl +; +; X64-LABEL: test_unsigned_v4i64_v4f32: +; X64: # %bb.0: +; X64-NEXT: vcvttps2uqqs %xmm0, %ymm0 +; X64-NEXT: retq + %x = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> %f) + ret <4 x i64> %x +} From a51c29dde73000309155a3ecb2500d8488b300ab Mon Sep 17 00:00:00 2001 From: Chauhan Jaydeep Ashwinbhai Date: Fri, 3 Jan 2025 16:19:12 +0800 Subject: [PATCH 2/6] Addressed the review comments1 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 58 +++-- llvm/lib/Target/X86/X86ISelLowering.cpp | 25 +- llvm/lib/Target/X86/X86ISelLowering.h | 4 +- .../CodeGen/X86/avx10_2_512fptosi_satcvtds.ll | 111 +++------ .../CodeGen/X86/avx10_2fptosi_satcvtds.ll | 230 ++++++------------ 5 files changed, 152 insertions(+), 276 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 0dbae94d3f58e..5e736a9bbb7ac 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5456,39 +5456,37 @@ void X86DAGToDAGISel::Select(SDNode *Node) { break; } - case X86ISD::FP_TO_SINT_SAT_CUSTOM: - case X86ISD::FP_TO_UINT_SAT_CUSTOM: - if (Subtarget->hasAVX10_2()) { - bool IsSigned = Node->getOpcode() == X86ISD::FP_TO_SINT_SAT_CUSTOM; - SDValue Op = Node->getOperand(0); - EVT VT = Node->getValueType(0); - EVT OpVT = Op.getValueType(); - MachineSDNode *MachineNode; - - if (VT == MVT::v4i32 && OpVT == MVT::v4f32) { - if (IsSigned) - MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2DQSZ128rr, dl, - MVT::v4i32, Op); - else - MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2UDQSZ128rr, dl, - MVT::v4i32, Op); - } - - if ((VT == MVT::v2i64 && OpVT == MVT::v2f64)) { - if (IsSigned) - MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2QQSZ128rr, dl, - MVT::v2i64, Op); - else - MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2UQQSZ128rr, dl, - MVT::v2i64, Op); - } + case X86ISD::FP_TO_SINT_SAT: + case X86ISD::FP_TO_UINT_SAT: { + assert(Subtarget->hasAVX10_2() && "Unsupported node"); + bool IsSigned = Node->getOpcode() == X86ISD::FP_TO_SINT_SAT; + SDValue Op = Node->getOperand(0); + EVT VT = Node->getValueType(0); + EVT OpVT = Op.getValueType(); + MachineSDNode *MachineNode; + + if (VT == MVT::v4i32 && OpVT == MVT::v4f32) { + if (IsSigned) + MachineNode = + CurDAG->getMachineNode(X86::VCVTTPD2DQSZ128rr, dl, MVT::v4i32, Op); + else + MachineNode = + CurDAG->getMachineNode(X86::VCVTTPD2UDQSZ128rr, dl, MVT::v4i32, Op); + } - SDValue NewNode = SDValue(MachineNode, 0); - ReplaceNode(Node, NewNode.getNode()); - return; + if ((VT == MVT::v2i64 && OpVT == MVT::v2f64)) { + if (IsSigned) + MachineNode = + CurDAG->getMachineNode(X86::VCVTTPS2QQSZ128rr, dl, MVT::v2i64, Op); + else + MachineNode = + CurDAG->getMachineNode(X86::VCVTTPS2UQQSZ128rr, dl, MVT::v2i64, Op); } - break; + SDValue NewNode = SDValue(MachineNode, 0); + ReplaceNode(Node, NewNode.getNode()); + return; + } case X86ISD::ANDNP: if (tryVPTERNLOG(Node)) return; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3364043cda056..a009706daede4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -344,10 +344,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v2i32, Custom); setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v2i32, Custom); for (MVT VT : {MVT::i32, MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, - MVT::v4i64, MVT::v8i64}) { + MVT::v4i64}) { setOperationAction(ISD::FP_TO_UINT_SAT, VT, Legal); setOperationAction(ISD::FP_TO_SINT_SAT, VT, Legal); } + if (Subtarget.hasAVX10_2_512()) { + setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v8i64, Legal); + setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v8i64, Legal); + } if (Subtarget.is64Bit()) { setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Legal); setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Legal); @@ -33686,11 +33690,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, if (VT == MVT::v2i32 && OpVT == MVT::v2f64) { SDValue V4f32 = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Op); if (IsSigned) - V4I32 = - DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32); + V4I32 = DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v4i32, V4f32); else - V4I32 = - DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32); + V4I32 = DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v4i32, V4f32); Results.push_back(V4I32); return; } @@ -34676,8 +34678,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VPERMV3) NODE_NAME_CASE(VPERMI) NODE_NAME_CASE(VPTERNLOG) - NODE_NAME_CASE(FP_TO_SINT_SAT_CUSTOM) - NODE_NAME_CASE(FP_TO_UINT_SAT_CUSTOM) + NODE_NAME_CASE(FP_TO_SINT_SAT) + NODE_NAME_CASE(FP_TO_UINT_SAT) NODE_NAME_CASE(VFIXUPIMM) NODE_NAME_CASE(VFIXUPIMM_SAE) NODE_NAME_CASE(VFIXUPIMMS) @@ -56251,13 +56253,12 @@ static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG, SDValue V2F64 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, N->getOperand(0)); - // Select the FP_TO_SINT_SAT_CUSTOM/FP_TO_UINT_SAT_CUSTOM node + // Select the FP_TO_SINT_SAT/FP_TO_UINT_SAT node if (IsSigned) - return DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64); - else - return DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64); - } + return DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v2i64, V2F64); + return DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v2i64, V2F64); + } return SDValue(); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 0c04cf122bddd..eaedaa0b88d22 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -909,8 +909,8 @@ namespace llvm { FLDENVm, // Custom handling for FP_TO_xINT_SAT - FP_TO_SINT_SAT_CUSTOM, - FP_TO_UINT_SAT_CUSTOM, + FP_TO_SINT_SAT, + FP_TO_UINT_SAT, /// This instruction implements FP_TO_SINT with the /// integer destination in memory and a FP reg source. This corresponds diff --git a/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll index 70465a28bad6a..d7ad7b048c6d6 100644 --- a/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll +++ b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll @@ -1,122 +1,85 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64 ; VCVTTPD2DQS define <8 x i32> @test_signed_v8i32_v8f64(<8 x double> %f) nounwind { -; X86-LABEL: test_signed_v8i32_v8f64: -; X86: # %bb.0: -; X86-NEXT: vcvttpd2dqs %zmm0, %ymm0 -; X86-NEXT: retl -; -; X64-LABEL: test_signed_v8i32_v8f64: -; X64: # %bb.0: -; X64-NEXT: vcvttpd2dqs %zmm0, %ymm0 -; X64-NEXT: retq +; CHECK-LABEL: test_signed_v8i32_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttpd2dqs %zmm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> %f) ret <8 x i32> %x } ; VCVTTPD2QQS define <8 x i64> @test_signed_v8i64_v8f64(<8 x double> %f) nounwind { -; X86-LABEL: test_signed_v8i64_v8f64: -; X86: # %bb.0: -; X86-NEXT: vcvttpd2qqs %zmm0, %zmm0 -; X86-NEXT: retl -; -; X64-LABEL: test_signed_v8i64_v8f64: -; X64: # %bb.0: -; X64-NEXT: vcvttpd2qqs %zmm0, %zmm0 -; X64-NEXT: retq +; CHECK-LABEL: test_signed_v8i64_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttpd2qqs %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> %f) ret <8 x i64> %x } ; VCVTTPD2UDQS define <8 x i32> @test_unsigned_v8i32_v8f64(<8 x double> %f) nounwind { -; X86-LABEL: test_unsigned_v8i32_v8f64: -; X86: # %bb.0: -; X86-NEXT: vcvttpd2udqs %zmm0, %ymm0 -; X86-NEXT: retl -; -; X64-LABEL: test_unsigned_v8i32_v8f64: -; X64: # %bb.0: -; X64-NEXT: vcvttpd2udqs %zmm0, %ymm0 -; X64-NEXT: retq +; CHECK-LABEL: test_unsigned_v8i32_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttpd2udqs %zmm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> %f) ret <8 x i32> %x } ; VCVTTPD2UQQS define <8 x i64> @test_unsigned_v8i64_v8f64(<8 x double> %f) nounwind { -; X86-LABEL: test_unsigned_v8i64_v8f64: -; X86: # %bb.0: -; X86-NEXT: vcvttpd2uqqs %zmm0, %zmm0 -; X86-NEXT: retl -; -; X64-LABEL: test_unsigned_v8i64_v8f64: -; X64: # %bb.0: -; X64-NEXT: vcvttpd2uqqs %zmm0, %zmm0 -; X64-NEXT: retq +; CHECK-LABEL: test_unsigned_v8i64_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttpd2uqqs %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> %f) ret <8 x i64> %x } ; VCVTTPS2DQS define <16 x i32> @test_signed_v16i32_v16f32(<16 x float> %f) nounwind { -; X86-LABEL: test_signed_v16i32_v16f32: -; X86: # %bb.0: -; X86-NEXT: vcvttps2dqs %zmm0, %zmm0 -; X86-NEXT: retl -; -; X64-LABEL: test_signed_v16i32_v16f32: -; X64: # %bb.0: -; X64-NEXT: vcvttps2dqs %zmm0, %zmm0 -; X64-NEXT: retq +; CHECK-LABEL: test_signed_v16i32_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttps2dqs %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> %f) ret <16 x i32> %x } ; VCVTTPS2UDQS define <16 x i32> @test_unsigned_v16i32_v16f32(<16 x float> %f) nounwind { -; X86-LABEL: test_unsigned_v16i32_v16f32: -; X86: # %bb.0: -; X86-NEXT: vcvttps2udqs %zmm0, %zmm0 -; X86-NEXT: retl -; -; X64-LABEL: test_unsigned_v16i32_v16f32: -; X64: # %bb.0: -; X64-NEXT: vcvttps2udqs %zmm0, %zmm0 -; X64-NEXT: retq +; CHECK-LABEL: test_unsigned_v16i32_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttps2udqs %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> %f) ret <16 x i32> %x } ; VCVTTPS2QQS define <8 x i64> @test_signed_v8i64_v8f32(<8 x float> %f) nounwind { -; X86-LABEL: test_signed_v8i64_v8f32: -; X86: # %bb.0: -; X86-NEXT: vcvttps2qqs %ymm0, %zmm0 -; X86-NEXT: retl -; -; X64-LABEL: test_signed_v8i64_v8f32: -; X64: # %bb.0: -; X64-NEXT: vcvttps2qqs %ymm0, %zmm0 -; X64-NEXT: retq +; CHECK-LABEL: test_signed_v8i64_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttps2qqs %ymm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> %f) ret <8 x i64> %x } ; VCVTTPS2UQQS define <8 x i64> @test_unsigned_v8i64_v8f32(<8 x float> %f) nounwind { -; X86-LABEL: test_unsigned_v8i64_v8f32: -; X86: # %bb.0: -; X86-NEXT: vcvttps2uqqs %ymm0, %zmm0 -; X86-NEXT: retl -; -; X64-LABEL: test_unsigned_v8i64_v8f32: -; X64: # %bb.0: -; X64-NEXT: vcvttps2uqqs %ymm0, %zmm0 -; X64-NEXT: retq +; CHECK-LABEL: test_unsigned_v8i64_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttps2uqqs %ymm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> %f) ret <8 x i64> %x } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; X64: {{.*}} +; X86: {{.*}} diff --git a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll index 0c731a09f8dbd..a975a23917064 100644 --- a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll +++ b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64 ; ; 32-bit float to signed integer @@ -115,244 +115,158 @@ define i64 @test_signed_i64_f64(double %f) nounwind { ; VCVTTPD2DQS define <2 x i32> @test_signed_v2i32_v2f64(<2 x double> %d) nounwind { -; X86-LABEL: test_signed_v2i32_v2f64: -; X86: # %bb.0: -; X86-NEXT: vcvtpd2ps %xmm0, %xmm0 -; X86-NEXT: vcvttpd2dqs %xmm0, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: test_signed_v2i32_v2f64: -; X64: # %bb.0: -; X64-NEXT: vcvtpd2ps %xmm0, %xmm0 -; X64-NEXT: vcvttpd2dqs %xmm0, %xmm0 -; X64-NEXT: retq +; CHECK-LABEL: test_signed_v2i32_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 +; CHECK-NEXT: vcvttpd2dqs %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %d) ret <2 x i32> %x } define <4 x i32> @test_signed_v4i32_v4f64(<4 x double> %f) nounwind { -; X86-LABEL: test_signed_v4i32_v4f64: -; X86: # %bb.0: -; X86-NEXT: vcvttpd2dqs %ymm0, %xmm0 -; X86-NEXT: vzeroupper -; X86-NEXT: retl -; -; X64-LABEL: test_signed_v4i32_v4f64: -; X64: # %bb.0: -; X64-NEXT: vcvttpd2dqs %ymm0, %xmm0 -; X64-NEXT: vzeroupper -; X64-NEXT: retq +; CHECK-LABEL: test_signed_v4i32_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttpd2dqs %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: ret{{[l|q]}} %x = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> %f) ret <4 x i32> %x } ; VCVTTPD2QQS define <2 x i64> @test_signed_v2i64_v2f64(<2 x double> %f) nounwind { -; X86-LABEL: test_signed_v2i64_v2f64: -; X86: # %bb.0: -; X86-NEXT: vcvttpd2qqs %xmm0, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: test_signed_v2i64_v2f64: -; X64: # %bb.0: -; X64-NEXT: vcvttpd2qqs %xmm0, %xmm0 -; X64-NEXT: retq +; CHECK-LABEL: test_signed_v2i64_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttpd2qqs %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %f) ret <2 x i64> %x } define <4 x i64> @test_signed_v4i64_v4f64(<4 x double> %f) nounwind { -; X86-LABEL: test_signed_v4i64_v4f64: -; X86: # %bb.0: -; X86-NEXT: vcvttpd2qqs %ymm0, %ymm0 -; X86-NEXT: retl -; -; X64-LABEL: test_signed_v4i64_v4f64: -; X64: # %bb.0: -; X64-NEXT: vcvttpd2qqs %ymm0, %ymm0 -; X64-NEXT: retq +; CHECK-LABEL: test_signed_v4i64_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttpd2qqs %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> %f) ret <4 x i64> %x } ; VCVTTPD2UDQS define <2 x i32> @test_unsigned_v2i32_v2f64(<2 x double> %d) nounwind { -; X86-LABEL: test_unsigned_v2i32_v2f64: -; X86: # %bb.0: -; X86-NEXT: vcvtpd2ps %xmm0, %xmm0 -; X86-NEXT: vcvttpd2udqs %xmm0, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: test_unsigned_v2i32_v2f64: -; X64: # %bb.0: -; X64-NEXT: vcvtpd2ps %xmm0, %xmm0 -; X64-NEXT: vcvttpd2udqs %xmm0, %xmm0 -; X64-NEXT: retq +; CHECK-LABEL: test_unsigned_v2i32_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 +; CHECK-NEXT: vcvttpd2udqs %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %d) ret <2 x i32> %x } define <4 x i32> @test_unsigned_v4i32_v4f64(<4 x double> %f) nounwind { -; X86-LABEL: test_unsigned_v4i32_v4f64: -; X86: # %bb.0: -; X86-NEXT: vcvttpd2udqs %ymm0, %xmm0 -; X86-NEXT: vzeroupper -; X86-NEXT: retl -; -; X64-LABEL: test_unsigned_v4i32_v4f64: -; X64: # %bb.0: -; X64-NEXT: vcvttpd2udqs %ymm0, %xmm0 -; X64-NEXT: vzeroupper -; X64-NEXT: retq +; CHECK-LABEL: test_unsigned_v4i32_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttpd2udqs %ymm0, %xmm0 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: ret{{[l|q]}} %x = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> %f) ret <4 x i32> %x } ; VCVTTPD2UQQS define <2 x i64> @test_unsigned_v2i64_v2f64(<2 x double> %f) nounwind { -; X86-LABEL: test_unsigned_v2i64_v2f64: -; X86: # %bb.0: -; X86-NEXT: vcvttpd2uqqs %xmm0, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: test_unsigned_v2i64_v2f64: -; X64: # %bb.0: -; X64-NEXT: vcvttpd2uqqs %xmm0, %xmm0 -; X64-NEXT: retq +; CHECK-LABEL: test_unsigned_v2i64_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttpd2uqqs %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %f) ret <2 x i64> %x } define <4 x i64> @test_unsigned_v4i64_v4f64(<4 x double> %f) nounwind { -; X86-LABEL: test_unsigned_v4i64_v4f64: -; X86: # %bb.0: -; X86-NEXT: vcvttpd2uqqs %ymm0, %ymm0 -; X86-NEXT: retl -; -; X64-LABEL: test_unsigned_v4i64_v4f64: -; X64: # %bb.0: -; X64-NEXT: vcvttpd2uqqs %ymm0, %ymm0 -; X64-NEXT: retq +; CHECK-LABEL: test_unsigned_v4i64_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttpd2uqqs %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> %f) ret <4 x i64> %x } ; VCVTTPS2DQS define <4 x i32> @test_signed_v4i32_v4f32(<4 x float> %f) nounwind { -; X86-LABEL: test_signed_v4i32_v4f32: -; X86: # %bb.0: -; X86-NEXT: vcvttps2dqs %xmm0, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: test_signed_v4i32_v4f32: -; X64: # %bb.0: -; X64-NEXT: vcvttps2dqs %xmm0, %xmm0 -; X64-NEXT: retq +; CHECK-LABEL: test_signed_v4i32_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttps2dqs %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %f) ret <4 x i32> %x } define <8 x i32> @test_signed_v8i32_v8f32(<8 x float> %f) nounwind { -; X86-LABEL: test_signed_v8i32_v8f32: -; X86: # %bb.0: -; X86-NEXT: vcvttps2dqs %ymm0, %ymm0 -; X86-NEXT: retl -; -; X64-LABEL: test_signed_v8i32_v8f32: -; X64: # %bb.0: -; X64-NEXT: vcvttps2dqs %ymm0, %ymm0 -; X64-NEXT: retq +; CHECK-LABEL: test_signed_v8i32_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttps2dqs %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> %f) ret <8 x i32> %x } ; VCVTTPS2UDQS define <4 x i32> @test_unsigned_v4i32_v4f32(<4 x float> %f) nounwind { -; X86-LABEL: test_unsigned_v4i32_v4f32: -; X86: # %bb.0: -; X86-NEXT: vcvttps2udqs %xmm0, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: test_unsigned_v4i32_v4f32: -; X64: # %bb.0: -; X64-NEXT: vcvttps2udqs %xmm0, %xmm0 -; X64-NEXT: retq +; CHECK-LABEL: test_unsigned_v4i32_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttps2udqs %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %f) ret <4 x i32> %x } define <8 x i32> @test_unsigned_v8i32_v8f32(<8 x float> %f) nounwind { -; X86-LABEL: test_unsigned_v8i32_v8f32: -; X86: # %bb.0: -; X86-NEXT: vcvttps2udqs %ymm0, %ymm0 -; X86-NEXT: retl -; -; X64-LABEL: test_unsigned_v8i32_v8f32: -; X64: # %bb.0: -; X64-NEXT: vcvttps2udqs %ymm0, %ymm0 -; X64-NEXT: retq +; CHECK-LABEL: test_unsigned_v8i32_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttps2udqs %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> %f) ret <8 x i32> %x } ; VCVTTPS2QQS define <2 x i64> @test_signed_v2i64_v2f32(<2 x float> %f) nounwind { -; X86-LABEL: test_signed_v2i64_v2f32: -; X86: # %bb.0: -; X86-NEXT: vcvtps2pd %xmm0, %xmm0 -; X86-NEXT: vcvttps2qqs %xmm0, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: test_signed_v2i64_v2f32: -; X64: # %bb.0: -; X64-NEXT: vcvtps2pd %xmm0, %xmm0 -; X64-NEXT: vcvttps2qqs %xmm0, %xmm0 -; X64-NEXT: retq +; CHECK-LABEL: test_signed_v2i64_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0 +; CHECK-NEXT: vcvttps2qqs %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %f) ret <2 x i64> %x } define <4 x i64> @test_signed_v4i64_v4f32(<4 x float> %f) nounwind { -; X86-LABEL: test_signed_v4i64_v4f32: -; X86: # %bb.0: -; X86-NEXT: vcvttps2qqs %xmm0, %ymm0 -; X86-NEXT: retl -; -; X64-LABEL: test_signed_v4i64_v4f32: -; X64: # %bb.0: -; X64-NEXT: vcvttps2qqs %xmm0, %ymm0 -; X64-NEXT: retq +; CHECK-LABEL: test_signed_v4i64_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttps2qqs %xmm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> %f) ret <4 x i64> %x } ; VCVTTPS2UQQS define <2 x i64> @test_unsigned_v2i64_v2f32(<2 x float> %f) nounwind { -; X86-LABEL: test_unsigned_v2i64_v2f32: -; X86: # %bb.0: -; X86-NEXT: vcvtps2pd %xmm0, %xmm0 -; X86-NEXT: vcvttps2uqqs %xmm0, %xmm0 -; X86-NEXT: retl -; -; X64-LABEL: test_unsigned_v2i64_v2f32: -; X64: # %bb.0: -; X64-NEXT: vcvtps2pd %xmm0, %xmm0 -; X64-NEXT: vcvttps2uqqs %xmm0, %xmm0 -; X64-NEXT: retq +; CHECK-LABEL: test_unsigned_v2i64_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0 +; CHECK-NEXT: vcvttps2uqqs %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> %f) ret <2 x i64> %x } define <4 x i64> @test_unsigned_v4i64_v4f32(<4 x float> %f) nounwind { -; X86-LABEL: test_unsigned_v4i64_v4f32: -; X86: # %bb.0: -; X86-NEXT: vcvttps2uqqs %xmm0, %ymm0 -; X86-NEXT: retl -; -; X64-LABEL: test_unsigned_v4i64_v4f32: -; X64: # %bb.0: -; X64-NEXT: vcvttps2uqqs %xmm0, %ymm0 -; X64-NEXT: retq +; CHECK-LABEL: test_unsigned_v4i64_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvttps2uqqs %xmm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} %x = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> %f) ret <4 x i64> %x } From 94234cbb9e13aa2628f1ee2dd1f15cf74297d750 Mon Sep 17 00:00:00 2001 From: Chauhan Jaydeep Ashwinbhai Date: Fri, 3 Jan 2025 20:52:50 +0800 Subject: [PATCH 3/6] Addressed the review comments2 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 31 -------------------- llvm/lib/Target/X86/X86InstrAVX10.td | 8 +++++ llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 7 +++++ 3 files changed, 15 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 5e736a9bbb7ac..9b340a778b36a 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5456,37 +5456,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) { break; } - case X86ISD::FP_TO_SINT_SAT: - case X86ISD::FP_TO_UINT_SAT: { - assert(Subtarget->hasAVX10_2() && "Unsupported node"); - bool IsSigned = Node->getOpcode() == X86ISD::FP_TO_SINT_SAT; - SDValue Op = Node->getOperand(0); - EVT VT = Node->getValueType(0); - EVT OpVT = Op.getValueType(); - MachineSDNode *MachineNode; - - if (VT == MVT::v4i32 && OpVT == MVT::v4f32) { - if (IsSigned) - MachineNode = - CurDAG->getMachineNode(X86::VCVTTPD2DQSZ128rr, dl, MVT::v4i32, Op); - else - MachineNode = - CurDAG->getMachineNode(X86::VCVTTPD2UDQSZ128rr, dl, MVT::v4i32, Op); - } - - if ((VT == MVT::v2i64 && OpVT == MVT::v2f64)) { - if (IsSigned) - MachineNode = - CurDAG->getMachineNode(X86::VCVTTPS2QQSZ128rr, dl, MVT::v2i64, Op); - else - MachineNode = - CurDAG->getMachineNode(X86::VCVTTPS2UQQSZ128rr, dl, MVT::v2i64, Op); - } - - SDValue NewNode = SDValue(MachineNode, 0); - ReplaceNode(Node, NewNode.getNode()); - return; - } case X86ISD::ANDNP: if (tryVPTERNLOG(Node)) return; diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 5b8806593fbb8..91ba80f4ea697 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -835,6 +835,8 @@ let Predicates = [HasAVX10_2] in { // Patterns VCVTTPD2DQSZ128 // VCVTTPD2DQS +def : Pat<(v4i32(X86fp2sisat(v4f32 VR128X:$src))), + (VCVTTPD2DQSZ128rr VR128X:$src)>; def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)), (VCVTTPD2DQSZ256rr VR256X:$src)>; def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)), @@ -849,6 +851,8 @@ def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)), (VCVTTPD2QQSZrr VR512:$src)>; // VCVTTPD2UDQS +def : Pat<(v4i32(X86fp2uisat(v4f32 VR128X:$src))), + (VCVTTPD2UDQSZ128rr VR128X:$src)>; def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)), (VCVTTPD2UDQSZ256rr VR256X:$src)>; def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)), @@ -871,6 +875,8 @@ def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)), (VCVTTPS2DQSZrr VR512:$src)>; // VCVTTPS2QQS +def : Pat<(v2i64(X86fp2sisat(v2f64 VR128X:$src))), + (VCVTTPS2QQSZ128rr VR128X:$src)>; def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)), (VCVTTPS2QQSZ256rr VR128X:$src)>; def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)), @@ -885,6 +891,8 @@ def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)), (VCVTTPS2UDQSZrr VR512:$src)>; // VCVTTPS2UQQS +def : Pat<(v2i64(X86fp2uisat(v2f64 VR128X:$src))), + (VCVTTPS2UQQSZ128rr VR128X:$src)>; def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)), (VCVTTPS2UQQSZ256rr VR128X:$src)>; def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)), diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index f6231b78f4c2e..af0267a7d32c3 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -390,6 +390,13 @@ def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>, SDTCisFP<0>, SDTCisVT<4, i32>]>; +def SDTFPToxIntSatOp + : SDTypeProfile<1, + 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisFP<1>]>; + +def X86fp2sisat : SDNode<"X86ISD::FP_TO_SINT_SAT", SDTFPToxIntSatOp>; +def X86fp2uisat : SDNode<"X86ISD::FP_TO_UINT_SAT", SDTFPToxIntSatOp>; + def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>, From 2a5149ba1ce7c7b6ed14b9bc22335488a6f6a5d1 Mon Sep 17 00:00:00 2001 From: Chauhan Jaydeep Ashwinbhai Date: Sat, 4 Jan 2025 17:51:54 +0800 Subject: [PATCH 4/6] Addressed the review comments3 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 22 ++++++++++--------- llvm/lib/Target/X86/X86InstrAVX10.td | 8 +++---- .../CodeGen/X86/avx10_2fptosi_satcvtds.ll | 4 ---- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a009706daede4..267d8701824de 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33685,15 +33685,14 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT VT = N->getValueType(0); SDValue Op = N->getOperand(0); EVT OpVT = Op.getValueType(); - SDValue V4I32; + SDValue Res; if (VT == MVT::v2i32 && OpVT == MVT::v2f64) { - SDValue V4f32 = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Op); if (IsSigned) - V4I32 = DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v4i32, V4f32); + Res = DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v4i32, Op); else - V4I32 = DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v4i32, V4f32); - Results.push_back(V4I32); + Res = DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v4i32, Op); + Results.push_back(Res); return; } break; @@ -56249,15 +56248,18 @@ static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG, SDLoc dl(N); if (SrcVT == MVT::v2f32 && DstVT == MVT::v2i64) { - // Convert v2f32 to v2f64 - SDValue V2F64 = - DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, N->getOperand(0)); + // Create an undefined value of type v2f32 + SDValue UndefV2F32Value = DAG.getUNDEF(MVT::v2f32); + + // Concatenate the original v2f32 input and undef v2f32 to create v4f32 + SDValue NewSrc = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, + N->getOperand(0), UndefV2F32Value); // Select the FP_TO_SINT_SAT/FP_TO_UINT_SAT node if (IsSigned) - return DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v2i64, V2F64); + return DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v2i64, NewSrc); - return DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v2i64, V2F64); + return DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v2i64, NewSrc); } return SDValue(); } diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 91ba80f4ea697..127016184bc17 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -835,7 +835,7 @@ let Predicates = [HasAVX10_2] in { // Patterns VCVTTPD2DQSZ128 // VCVTTPD2DQS -def : Pat<(v4i32(X86fp2sisat(v4f32 VR128X:$src))), +def : Pat<(v4i32(X86fp2sisat(v2f64 VR128X:$src))), (VCVTTPD2DQSZ128rr VR128X:$src)>; def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)), (VCVTTPD2DQSZ256rr VR256X:$src)>; @@ -851,7 +851,7 @@ def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)), (VCVTTPD2QQSZrr VR512:$src)>; // VCVTTPD2UDQS -def : Pat<(v4i32(X86fp2uisat(v4f32 VR128X:$src))), +def : Pat<(v4i32(X86fp2uisat(v2f64 VR128X:$src))), (VCVTTPD2UDQSZ128rr VR128X:$src)>; def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)), (VCVTTPD2UDQSZ256rr VR256X:$src)>; @@ -875,7 +875,7 @@ def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)), (VCVTTPS2DQSZrr VR512:$src)>; // VCVTTPS2QQS -def : Pat<(v2i64(X86fp2sisat(v2f64 VR128X:$src))), +def : Pat<(v2i64(X86fp2sisat(v4f32 VR128X:$src))), (VCVTTPS2QQSZ128rr VR128X:$src)>; def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)), (VCVTTPS2QQSZ256rr VR128X:$src)>; @@ -891,7 +891,7 @@ def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)), (VCVTTPS2UDQSZrr VR512:$src)>; // VCVTTPS2UQQS -def : Pat<(v2i64(X86fp2uisat(v2f64 VR128X:$src))), +def : Pat<(v2i64(X86fp2uisat(v4f32 VR128X:$src))), (VCVTTPS2UQQSZ128rr VR128X:$src)>; def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)), (VCVTTPS2UQQSZ256rr VR128X:$src)>; diff --git a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll index a975a23917064..a2f167e94cc23 100644 --- a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll +++ b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll @@ -117,7 +117,6 @@ define i64 @test_signed_i64_f64(double %f) nounwind { define <2 x i32> @test_signed_v2i32_v2f64(<2 x double> %d) nounwind { ; CHECK-LABEL: test_signed_v2i32_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 ; CHECK-NEXT: vcvttpd2dqs %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} %x = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %d) @@ -157,7 +156,6 @@ define <4 x i64> @test_signed_v4i64_v4f64(<4 x double> %f) nounwind { define <2 x i32> @test_unsigned_v2i32_v2f64(<2 x double> %d) nounwind { ; CHECK-LABEL: test_unsigned_v2i32_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 ; CHECK-NEXT: vcvttpd2udqs %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} %x = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %d) @@ -235,7 +233,6 @@ define <8 x i32> @test_unsigned_v8i32_v8f32(<8 x float> %f) nounwind { define <2 x i64> @test_signed_v2i64_v2f32(<2 x float> %f) nounwind { ; CHECK-LABEL: test_signed_v2i64_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0 ; CHECK-NEXT: vcvttps2qqs %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} %x = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %f) @@ -255,7 +252,6 @@ define <4 x i64> @test_signed_v4i64_v4f32(<4 x float> %f) nounwind { define <2 x i64> @test_unsigned_v2i64_v2f32(<2 x float> %f) nounwind { ; CHECK-LABEL: test_unsigned_v2i64_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0 ; CHECK-NEXT: vcvttps2uqqs %xmm0, %xmm0 ; CHECK-NEXT: ret{{[l|q]}} %x = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> %f) From cb0ddf3dbad16a6646e24acb5ec294e197b5f6db Mon Sep 17 00:00:00 2001 From: Chauhan Jaydeep Ashwinbhai Date: Sat, 4 Jan 2025 19:00:10 +0800 Subject: [PATCH 5/6] Fixed the undef issue --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 267d8701824de..bfde34537dc87 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -56248,12 +56248,11 @@ static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG, SDLoc dl(N); if (SrcVT == MVT::v2f32 && DstVT == MVT::v2i64) { - // Create an undefined value of type v2f32 - SDValue UndefV2F32Value = DAG.getUNDEF(MVT::v2f32); + SDValue V2F32Value = DAG.getUNDEF(SrcVT); - // Concatenate the original v2f32 input and undef v2f32 to create v4f32 + // Concatenate the original v2f32 input and V2F32Value to create v4f32 SDValue NewSrc = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, - N->getOperand(0), UndefV2F32Value); + N->getOperand(0), V2F32Value); // Select the FP_TO_SINT_SAT/FP_TO_UINT_SAT node if (IsSigned) From b3a52f13af959fa246fcdfe2ae78f53b30264d74 Mon Sep 17 00:00:00 2001 From: Chauhan Jaydeep Ashwinbhai Date: Sat, 4 Jan 2025 22:56:04 +0800 Subject: [PATCH 6/6] Fixed return --- llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bfde34537dc87..07b9a30b57564 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33693,9 +33693,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, else Res = DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v4i32, Op); Results.push_back(Res); - return; } - break; + return; } case ISD::FP_TO_SINT: case ISD::STRICT_FP_TO_SINT: