Skip to content

Conversation

@AlexMaclean
Copy link
Member

This change rewrites LowerCall handling of byval arguments to vectorize the loads in addition to the stores. In addition various minor NFC updates and cleanups are made to reduce code duplication.

@llvmbot
Copy link
Member

llvmbot commented Jul 29, 2025

@llvm/pr-subscribers-backend-nvptx

Author: Alex MacLean (AlexMaclean)

Changes

This change rewrites LowerCall handling of byval arguments to vectorize the loads in addition to the stores. In addition various minor NFC updates and cleanups are made to reduce code duplication.


Patch is 41.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151070.diff

6 Files Affected:

  • (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+179-185)
  • (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (-6)
  • (added) llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll (+38)
  • (modified) llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll (+12-12)
  • (modified) llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll (+33-33)
  • (modified) llvm/test/CodeGen/NVPTX/param-vectorize-device.ll (+11-11)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index f79b8629f01e2..0a4726212bf12 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -382,6 +382,51 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
   }
 }
 
+static EVT getVectorizedVT(EVT VT, unsigned N, LLVMContext &C) {
+  if (N == 1)
+    return VT;
+
+  const unsigned PackingAmt = VT.isVector() ? VT.getVectorNumElements() : 1;
+  return EVT::getVectorVT(C, VT.getScalarType(), N * PackingAmt);
+}
+
+static SDValue getExtractVectorizedValue(SDValue V, unsigned I, EVT VT,
+                                         const SDLoc &dl, SelectionDAG &DAG) {
+  if (V.getValueType() == VT) {
+    assert(I == 0 && "Index must be 0 for scalar value");
+    return V;
+  }
+
+  if (!VT.isVector())
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, V,
+                       DAG.getVectorIdxConstant(I, dl));
+
+  return DAG.getNode(
+      ISD::EXTRACT_SUBVECTOR, dl, VT, V,
+      DAG.getVectorIdxConstant(I * VT.getVectorNumElements(), dl));
+}
+
+template <typename T>
+static inline SDValue getBuildVectorizedValue(T GetElement, unsigned N,
+                                              const SDLoc &dl,
+                                              SelectionDAG &DAG) {
+  if (N == 1)
+    return GetElement(0);
+
+  SmallVector<SDValue, 8> Values;
+  for (const unsigned I : llvm::seq(N)) {
+    SDValue Val = GetElement(I);
+    if (Val.getValueType().isVector())
+      DAG.ExtractVectorElements(Val, Values);
+    else
+      Values.push_back(Val);
+  }
+
+  EVT VT = EVT::getVectorVT(*DAG.getContext(), Values[0].getValueType(),
+                            Values.size());
+  return DAG.getBuildVector(VT, dl, Values);
+}
+
 /// PromoteScalarIntegerPTX
 /// Used to make sure the arguments/returns are suitable for passing
 /// and promote them to a larger size if they're not.
@@ -420,9 +465,10 @@ static EVT promoteScalarIntegerPTX(const EVT VT) {
 // parameter starting at index Idx using a single vectorized op of
 // size AccessSize. If so, it returns the number of param pieces
 // covered by the vector op. Otherwise, it returns 1.
-static unsigned CanMergeParamLoadStoresStartingAt(
+template <typename T>
+static unsigned canMergeParamLoadStoresStartingAt(
     unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
-    const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) {
+    const SmallVectorImpl<T> &Offsets, Align ParamAlignment) {
 
   // Can't vectorize if param alignment is not sufficient.
   if (ParamAlignment < AccessSize)
@@ -472,10 +518,11 @@ static unsigned CanMergeParamLoadStoresStartingAt(
 // of the same size as ValueVTs indicating how each piece should be
 // loaded/stored (i.e. as a scalar, or as part of a vector
 // load/store).
+template <typename T>
 static SmallVector<unsigned, 16>
 VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
-                     const SmallVectorImpl<uint64_t> &Offsets,
-                     Align ParamAlignment, bool IsVAArg = false) {
+                     const SmallVectorImpl<T> &Offsets, Align ParamAlignment,
+                     bool IsVAArg = false) {
   // Set vector size to match ValueVTs and mark all elements as
   // scalars by default.
 
@@ -486,7 +533,7 @@ VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
 
   const auto GetNumElts = [&](unsigned I) -> unsigned {
     for (const unsigned AccessSize : {16, 8, 4, 2}) {
-      const unsigned NumElts = CanMergeParamLoadStoresStartingAt(
+      const unsigned NumElts = canMergeParamLoadStoresStartingAt(
           I, AccessSize, ValueVTs, Offsets, ParamAlignment);
       assert((NumElts == 1 || NumElts == 2 || NumElts == 4) &&
              "Unexpected vectorization size");
@@ -1476,15 +1523,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     const SDValue ParamSymbol =
         getCallParamSymbol(DAG, IsVAArg ? FirstVAArg : ArgI, MVT::i32);
 
-    SmallVector<EVT, 16> VTs;
-    SmallVector<uint64_t, 16> Offsets;
-
     assert((!IsByVal || Arg.IndirectType) &&
            "byval arg must have indirect type");
     Type *ETy = (IsByVal ? Arg.IndirectType : Arg.Ty);
-    ComputePTXValueVTs(*this, DL, ETy, VTs, &Offsets, IsByVal ? 0 : VAOffset);
-    assert(VTs.size() == Offsets.size() && "Size mismatch");
-    assert((IsByVal || VTs.size() == ArgOuts.size()) && "Size mismatch");
 
     const Align ArgAlign = [&]() {
       if (IsByVal) {
@@ -1492,17 +1533,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         // so we don't need to worry whether it's naturally aligned or not.
         // See TargetLowering::LowerCallTo().
         const Align InitialAlign = ArgOuts[0].Flags.getNonZeroByValAlign();
-        const Align ByValAlign = getFunctionByValParamAlign(
+        return getFunctionByValParamAlign(
             CB->getCalledFunction(), ETy, InitialAlign, DL);
-        if (IsVAArg)
-          VAOffset = alignTo(VAOffset, ByValAlign);
-        return ByValAlign;
       }
       return getArgumentAlignment(CB, Arg.Ty, ArgI + 1, DL);
     }();
 
-    const unsigned TypeSize = DL.getTypeAllocSize(ETy);
-    assert((!IsByVal || TypeSize == ArgOuts[0].Flags.getByValSize()) &&
+    const unsigned TySize = DL.getTypeAllocSize(ETy);
+    assert((!IsByVal || TySize == ArgOuts[0].Flags.getByValSize()) &&
            "type size mismatch");
 
     const SDValue ArgDeclare = [&]() {
@@ -1510,105 +1548,120 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         return VADeclareParam;
 
       if (IsByVal || shouldPassAsArray(Arg.Ty))
-        return MakeDeclareArrayParam(ParamSymbol, ArgAlign, TypeSize);
+        return MakeDeclareArrayParam(ParamSymbol, ArgAlign, TySize);
 
       assert(ArgOuts.size() == 1 && "We must pass only one value as non-array");
       assert((ArgOuts[0].VT.isInteger() || ArgOuts[0].VT.isFloatingPoint()) &&
              "Only int and float types are supported as non-array arguments");
 
-      return MakeDeclareScalarParam(ParamSymbol, TypeSize);
+      return MakeDeclareScalarParam(ParamSymbol, TySize);
     }();
 
-    // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
-    // than 32-bits are sign extended or zero extended, depending on
-    // whether they are signed or unsigned types. This case applies
-    // only to scalar parameters and not to aggregate values.
-    const bool ExtendIntegerParam =
-        Arg.Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Arg.Ty) < 32;
-
-    const auto GetStoredValue = [&](const unsigned I, EVT EltVT,
-                                    const MaybeAlign PartAlign) {
-      if (IsByVal) {
-        SDValue Ptr = ArgOutVals[0];
-        auto MPI = refinePtrAS(Ptr, DAG, DL, *this);
-        SDValue SrcAddr =
-            DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(Offsets[I]));
+    if (IsByVal) {
+      assert(ArgOutVals.size() == 1 && "We must pass only one value as byval");
+      SDValue SrcPtr = ArgOutVals[0];
+      auto PointerInfo = refinePtrAS(SrcPtr, DAG, DL, *this);
 
-        return DAG.getLoad(EltVT, dl, CallChain, SrcAddr, MPI, PartAlign);
-      }
-      SDValue StVal = ArgOutVals[I];
-      assert(promoteScalarIntegerPTX(StVal.getValueType()) ==
-                 StVal.getValueType() &&
-             "OutVal type should always be legal");
+      if (IsVAArg)
+        VAOffset = alignTo(VAOffset, ArgAlign);
 
-      const EVT VTI = promoteScalarIntegerPTX(VTs[I]);
-      const EVT StoreVT =
-          ExtendIntegerParam ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI);
+      SmallVector<EVT, 4> ValueVTs, MemVTs;
+      SmallVector<TypeSize, 4> Offsets;
+      ComputeValueVTs(*this, DL, ETy, ValueVTs, &MemVTs, &Offsets);
 
-      return correctParamType(StVal, StoreVT, ArgOuts[I].Flags, DAG, dl);
-    };
+      unsigned J = 0;
+      const auto VI = VectorizePTXValueVTs(MemVTs, Offsets, ArgAlign, IsVAArg);
+      for (const unsigned NumElts : VI) {
+        const Align Alignment = commonAlignment(ArgAlign, Offsets[J]);
+        const EVT LoadVT =
+            getVectorizedVT(MemVTs[J], NumElts, *DAG.getContext());
 
-    const auto VectorInfo =
-        VectorizePTXValueVTs(VTs, Offsets, ArgAlign, IsVAArg);
+        SDValue SrcAddr = DAG.getObjectPtrOffset(dl, SrcPtr, Offsets[J]);
+        SDValue Load =
+            DAG.getLoad(LoadVT, dl, CallChain, SrcAddr, PointerInfo, Alignment);
 
-    unsigned J = 0;
-    for (const unsigned NumElts : VectorInfo) {
-      const int CurOffset = Offsets[J];
-      const EVT EltVT = promoteScalarIntegerPTX(VTs[J]);
+        SDValue ByvalAddr = DAG.getObjectPtrOffset(
+            dl, ParamSymbol, Offsets[J].getWithIncrement(VAOffset));
 
-      if (IsVAArg && !IsByVal)
-        // Align each part of the variadic argument to their type.
-        VAOffset = alignTo(VAOffset, DAG.getEVTAlign(EltVT));
+        SDValue StoreParam =
+            DAG.getStore(ArgDeclare, dl, Load, ByvalAddr,
+                         MachinePointerInfo(ADDRESS_SPACE_PARAM), Alignment);
+        CallPrereqs.push_back(StoreParam);
 
-      assert((IsVAArg || VAOffset == 0) &&
-             "VAOffset must be 0 for non-VA args");
+        J += NumElts;
+      }
+      if (IsVAArg)
+        VAOffset += TySize;
+    } else {
+      SmallVector<EVT, 16> VTs;
+      SmallVector<uint64_t, 16> Offsets;
+      ComputePTXValueVTs(*this, DL, Arg.Ty, VTs, &Offsets, VAOffset);
+      assert(VTs.size() == Offsets.size() && "Size mismatch");
+      assert(VTs.size() == ArgOuts.size() && "Size mismatch");
+
+      // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
+      // than 32-bits are sign extended or zero extended, depending on
+      // whether they are signed or unsigned types. This case applies
+      // only to scalar parameters and not to aggregate values.
+      const bool ExtendIntegerParam =
+          Arg.Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Arg.Ty) < 32;
+
+      const auto GetStoredValue = [&](const unsigned I) {
+        SDValue StVal = ArgOutVals[I];
+        assert(promoteScalarIntegerPTX(StVal.getValueType()) ==
+                   StVal.getValueType() &&
+               "OutVal type should always be legal");
+
+        const EVT VTI = promoteScalarIntegerPTX(VTs[I]);
+        const EVT StoreVT =
+            ExtendIntegerParam ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI);
+
+        return correctParamType(StVal, StoreVT, ArgOuts[I].Flags, DAG, dl);
+      };
+
+      unsigned J = 0;
+      const auto VI = VectorizePTXValueVTs(VTs, Offsets, ArgAlign, IsVAArg);
+      for (const unsigned NumElts : VI) {
+        const EVT EltVT = promoteScalarIntegerPTX(VTs[J]);
+
+        unsigned Offset;
+        if (IsVAArg) {
+          // TODO: We may need to support vector types that can be passed
+          // as scalars in variadic arguments.
+          assert(NumElts == 1 &&
+                 "Vectorization should be disabled for vaargs.");
+
+          // Align each part of the variadic argument to their type.
+          VAOffset = alignTo(VAOffset, DAG.getEVTAlign(EltVT));
+          Offset = VAOffset;
+
+          const EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
+          VAOffset += DL.getTypeAllocSize(
+              TheStoreType.getTypeForEVT(*DAG.getContext()));
+        } else {
+          assert(VAOffset == 0 && "VAOffset must be 0 for non-VA args");
+          Offset = Offsets[J];
+        }
 
-      const unsigned Offset =
-          (VAOffset + ((IsVAArg && !IsByVal) ? 0 : CurOffset));
-      SDValue Ptr =
-          DAG.getObjectPtrOffset(dl, ParamSymbol, TypeSize::getFixed(Offset));
+        SDValue Ptr =
+            DAG.getObjectPtrOffset(dl, ParamSymbol, TypeSize::getFixed(Offset));
 
-      const MaybeAlign CurrentAlign = ExtendIntegerParam
-                                          ? MaybeAlign(std::nullopt)
-                                          : commonAlignment(ArgAlign, Offset);
+        const MaybeAlign CurrentAlign = ExtendIntegerParam
+                                            ? MaybeAlign(std::nullopt)
+                                            : commonAlignment(ArgAlign, Offset);
 
-      SDValue Val;
-      if (NumElts == 1) {
-        Val = GetStoredValue(J, EltVT, CurrentAlign);
-      } else {
-        SmallVector<SDValue, 8> StoreVals;
-        for (const unsigned K : llvm::seq(NumElts)) {
-          SDValue ValJ = GetStoredValue(J + K, EltVT, CurrentAlign);
-          if (ValJ.getValueType().isVector())
-            DAG.ExtractVectorElements(ValJ, StoreVals);
-          else
-            StoreVals.push_back(ValJ);
-        }
+        SDValue Val = getBuildVectorizedValue(
+            [&](unsigned K) { return GetStoredValue(J + K); }, NumElts, dl,
+            DAG);
 
-        EVT VT = EVT::getVectorVT(
-            *DAG.getContext(), StoreVals[0].getValueType(), StoreVals.size());
-        Val = DAG.getBuildVector(VT, dl, StoreVals);
-      }
+        SDValue StoreParam =
+            DAG.getStore(ArgDeclare, dl, Val, Ptr,
+                         MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign);
+        CallPrereqs.push_back(StoreParam);
 
-      SDValue StoreParam =
-          DAG.getStore(ArgDeclare, dl, Val, Ptr,
-                       MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign);
-      CallPrereqs.push_back(StoreParam);
-
-      // TODO: We may need to support vector types that can be passed
-      // as scalars in variadic arguments.
-      if (IsVAArg && !IsByVal) {
-        assert(NumElts == 1 &&
-               "Vectorization is expected to be disabled for variadics.");
-        const EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT;
-        VAOffset +=
-            DL.getTypeAllocSize(TheStoreType.getTypeForEVT(*DAG.getContext()));
+        J += NumElts;
       }
-
-      J += NumElts;
     }
-    if (IsVAArg && IsByVal)
-      VAOffset += TypeSize;
   }
 
   // Handle Result
@@ -1676,17 +1729,6 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     CallPrereqs.push_back(PrototypeDeclare);
   }
 
-  if (ConvertToIndirectCall) {
-    // Copy the function ptr to a ptx register and use the register to call the
-    // function.
-    const MVT DestVT = Callee.getValueType().getSimpleVT();
-    MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
-    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-    Register DestReg = MRI.createVirtualRegister(TLI.getRegClassFor(DestVT));
-    auto RegCopy = DAG.getCopyToReg(DAG.getEntryNode(), dl, DestReg, Callee);
-    Callee = DAG.getCopyFromReg(RegCopy, dl, DestReg, DestVT);
-  }
-
   const unsigned Proto = IsIndirectCall ? UniqueCallSite : 0;
   const unsigned NumArgs =
       std::min<unsigned>(CLI.NumFixedArgs + 1, Args.size());
@@ -1703,10 +1745,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   if (!Ins.empty()) {
     SmallVector<EVT, 16> VTs;
     SmallVector<uint64_t, 16> Offsets;
-    ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
+    ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
     assert(VTs.size() == Ins.size() && "Bad value decomposition");
 
     const Align RetAlign = getArgumentAlignment(CB, RetTy, 0, DL);
+    const SDValue RetSymbol = DAG.getExternalSymbol("retval0", MVT::i32);
 
     // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
     // 32-bits are sign extended or zero extended, depending on whether
@@ -1714,9 +1757,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     const bool ExtendIntegerRetVal =
         RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
 
-    const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
     unsigned I = 0;
-    for (const unsigned NumElts : VectorInfo) {
+    const auto VI = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
+    for (const unsigned NumElts : VI) {
       const MaybeAlign CurrentAlign =
           ExtendIntegerRetVal ? MaybeAlign(std::nullopt)
                               : commonAlignment(RetAlign, Offsets[I]);
@@ -1724,16 +1767,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       const EVT VTI = promoteScalarIntegerPTX(VTs[I]);
       const EVT LoadVT =
           ExtendIntegerRetVal ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI);
-
-      const unsigned PackingAmt =
-          LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
-
-      const EVT VecVT = NumElts == 1 ? LoadVT
-                                     : EVT::getVectorVT(*DAG.getContext(),
-                                                        LoadVT.getScalarType(),
-                                                        NumElts * PackingAmt);
-
-      const SDValue RetSymbol = DAG.getExternalSymbol("retval0", MVT::i32);
+      const EVT VecVT = getVectorizedVT(LoadVT, NumElts, *DAG.getContext());
       SDValue Ptr =
           DAG.getObjectPtrOffset(dl, RetSymbol, TypeSize::getFixed(Offsets[I]));
 
@@ -1742,17 +1776,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                       MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign);
 
       LoadChains.push_back(R.getValue(1));
-
-      if (NumElts == 1)
-        ProxyRegOps.push_back(R);
-      else
-        for (const unsigned J : llvm::seq(NumElts)) {
-          SDValue Elt = DAG.getNode(
-              LoadVT.isVector() ? ISD::EXTRACT_SUBVECTOR
-                                : ISD::EXTRACT_VECTOR_ELT,
-              dl, LoadVT, R, DAG.getVectorIdxConstant(J * PackingAmt, dl));
-          ProxyRegOps.push_back(Elt);
-        }
+      for (const unsigned J : llvm::seq(NumElts))
+        ProxyRegOps.push_back(getExtractVectorizedValue(R, J, LoadVT, dl, DAG));
       I += NumElts;
     }
   }
@@ -3227,11 +3252,10 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
-  MachineFunction &MF = DAG.getMachineFunction();
   const DataLayout &DL = DAG.getDataLayout();
   auto PtrVT = getPointerTy(DAG.getDataLayout());
 
-  const Function *F = &MF.getFunction();
+  const Function &F = DAG.getMachineFunction().getFunction();
 
   SDValue Root = DAG.getRoot();
   SmallVector<SDValue, 16> OutChains;
@@ -3247,7 +3271,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
   // See similar issue in LowerCall.
 
   auto AllIns = ArrayRef(Ins);
-  for (const auto &Arg : F->args()) {
+  for (const auto &Arg : F.args()) {
     const auto ArgIns = AllIns.take_while(
         [&](auto I) { return I.OrigArgIndex == Arg.getArgNo(); });
     AllIns = AllIns.drop_front(ArgIns.size());
@@ -3287,7 +3311,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
       assert(ByvalIn.VT == PtrVT && "ByVal argument must be a pointer");
 
       SDValue P;
-      if (isKernelFunction(*F)) {
+      if (isKernelFunction(F)) {
         P = ArgSymbol;
         P.getNode()->setIROrder(Arg.getArgNo() + 1);
       } else {
@@ -3305,43 +3329,27 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
       assert(VTs.size() == Offsets.size() && "Size mismatch");
 
       const Align ArgAlign = getFunctionArgumentAlignment(
-          F, Ty, Arg.getArgNo() + AttributeList::FirstArgIndex, DL);
+          &F, Ty, Arg.getArgNo() + AttributeList::FirstArgIndex, DL);
 
-      const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign);
       unsigned I = 0;
-      for (const unsigned NumElts : VectorInfo) {
+      const auto VI = VectorizePTXValueVTs(VTs, Offsets, ArgAlign);
+      for (const unsigned NumElts : VI) {
         // i1 is loaded/stored as i8
         const EVT LoadVT = VTs[I] == MVT::i1 ? MVT::i8 : VTs[I];
-        // If the element is a packed type (ex. v2f16, v4i8, etc) holding
-        // multiple elements.
-        const unsigned PackingAmt =
-            LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
-
-        const EVT VecVT =
-            NumElts == 1
-                ? LoadVT
-                : EVT::getVectorVT(F->getContext(), LoadVT.getScalarType(),
-                                   NumElts * PackingAmt);
+        const EVT VecVT = getVectorizedVT(...
[truncated]

@github-actions
Copy link

github-actions bot commented Jul 29, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@AlexMaclean AlexMaclean force-pushed the dev/amaclean/byval-align branch from 3937042 to cdfc0e3 Compare July 29, 2025 17:26
Copy link
Member

@Artem-B Artem-B left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM overall, with a few style nits.

}
}

static EVT getVectorizedVT(EVT VT, unsigned N, LLVMContext &C) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could use a comment.

We return an EVT that can hold N VTs
If the VT is a vector, the resulting EVT is a flat vector with the same element type as VT's element type (as opposed to N-element vector of VT vectors).

If may be better for readability to restructure the code so we don't need to explain it as a comment:

   return VT.isVector()
        ? EVT::getVectorVT(C, VT.getScalarType(), VT.getVectorNumElements()  * N)
        : EVT::getVectorVT(C, VT, N);

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice. Switched to doing it like this.

}

template <typename T>
static inline SDValue getBuildVectorizedValue(T GetElement, unsigned N,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: callables are somewhat more readable when they are passed as the last argument. Otherwise subsequent function argument are somewhat lost in the noise of closing brackets of the passed lambda.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved.

Comment on lines +1632 to +1633
assert(NumElts == 1 &&
"Vectorization should be disabled for vaargs.");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we hit this code path now? How do we handle it before this patch?

If this is a constraint on the user input, the check should probably be done somewhere in the verifier, and failing in a way that does not rely on the assertions enabled/disabled.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This check was in here before, I just moved it around. All we're doing is confirming that we didn't attempt to vectorize the vaargs stores, we can still handle vector types so this is not a constraint on user input, just confirming we did the right thing internally.

StoreVals.push_back(ValJ);
}
SDValue Val = getBuildVectorizedValue(
[&](unsigned K) { return GetStoredValue(J + K); }, NumElts, dl,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I'd use [=] here to indicate that we're not intending to modify the captured value.

; CHECK-LABEL: .visible .func caller_St4x3(
; CHECK: .param .align 4 .b8 caller_St4x3_param_0[12],
; CHECK: .param .b64 caller_St4x3_param_1
; CHECK: )
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps it's good opportunity to convert it to automatic checks, too. We do want to see that the values are passed in correct order and not just in a right kind of register matched by a regex.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test seems to be checking for the defined function signature as well as some of the contents:

  ; CHECK-LABEL: .visible .func caller_St4x1(
  ; CHECK:               .param .align 4 .b8 caller_St4x1_param_0[4],
  ; CHECK:               .param .b64 caller_St4x1_param_1
  ; CHECK:       )

If we were to auto-generate the checks we'd miss this validation. Do you still think it makes sense to auto-generate?

@AlexMaclean AlexMaclean merged commit 66e8163 into llvm:main Aug 1, 2025
9 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Aug 1, 2025

LLVM Buildbot has detected a new failure on builder clang-hip-vega20 running on hip-vega20-0 while building llvm at step 3 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/123/builds/24548

Here is the relevant piece of the build log for the reference
Step 3 (annotate) failure: '../llvm-zorg/zorg/buildbot/builders/annotated/hip-build.sh --jobs=' (failure)
...
[57/59] Linking CXX executable External/HIP/math_h-hip-6.3.0
[58/59] Building CXX object External/HIP/CMakeFiles/TheNextWeek-hip-6.3.0.dir/workload/ray-tracing/TheNextWeek/main.cc.o
[59/59] Linking CXX executable External/HIP/TheNextWeek-hip-6.3.0
+ build_step 'Testing HIP test-suite'
+ echo '@@@BUILD_STEP Testing HIP test-suite@@@'
+ ninja check-hip-simple
@@@BUILD_STEP Testing HIP test-suite@@@
[0/1] cd /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP && /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/bin/llvm-lit -sv array-hip-6.3.0.test empty-hip-6.3.0.test with-fopenmp-hip-6.3.0.test saxpy-hip-6.3.0.test memmove-hip-6.3.0.test split-kernel-args-hip-6.3.0.test builtin-logb-scalbn-hip-6.3.0.test TheNextWeek-hip-6.3.0.test algorithm-hip-6.3.0.test cmath-hip-6.3.0.test complex-hip-6.3.0.test math_h-hip-6.3.0.test new-hip-6.3.0.test blender.test
-- Testing: 14 tests, 14 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80.. 90
FAIL: test-suite :: External/HIP/blender.test (14 of 14)
******************** TEST 'test-suite :: External/HIP/blender.test' FAILED ********************

/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/tools/timeit-target --timeout 7200 --limit-core 0 --limit-cpu 7200 --limit-file-size 209715200 --limit-rss-size 838860800 --append-exitstatus --redirect-output /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/blender.test.out --redirect-input /dev/null --summary /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/blender.test.time /bin/bash test_blender.sh
/bin/bash verify_blender.sh /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/blender.test.out
Begin Blender test.
TEST_SUITE_HIP_ROOT=/opt/botworker/llvm/External/hip
Render /opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo_release.blend
Blender 4.1.1 (hash e1743a0317bc built 2024-04-15 23:47:45)
Read blend: "/opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo_release.blend"
Could not open as Ogawa file from provided streams.
Unable to open /opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.004", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.002", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.003", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.001", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
Could not open as Ogawa file from provided streams.
Unable to open /opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.003", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.001", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.002", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.004", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
I0801 22:07:09.132745 2693369 device.cpp:39] HIPEW initialization succeeded
I0801 22:07:09.134615 2693369 device.cpp:45] Found HIPCC hipcc
I0801 22:07:09.197247 2693369 device.cpp:207] Device has compute preemption or is not used for display.
I0801 22:07:09.197312 2693369 device.cpp:211] Added device "" with id "HIP__0000:a3:00".
I0801 22:07:09.197398 2693369 device.cpp:568] Mapped host memory limit set to 536,444,985,344 bytes. (499.60G)
I0801 22:07:09.197654 2693369 device_impl.cpp:63] Using AVX2 CPU kernels.
Fra:1 Mem:524.00M (Peak 524.70M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Eyepiece_rim
Fra:1 Mem:524.00M (Peak 524.70M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Rivets.014
Fra:1 Mem:524.00M (Peak 524.70M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Rivets.019
Fra:1 Mem:524.14M (Peak 524.70M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Rivets.023
Fra:1 Mem:524.25M (Peak 524.70M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_Cables.004
Fra:1 Mem:525.03M (Peak 525.03M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_Wires
Fra:1 Mem:525.02M (Peak 525.03M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Rivets.025
Fra:1 Mem:525.10M (Peak 525.10M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Rivets.026
Fra:1 Mem:525.20M (Peak 525.21M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Rivets.039
Step 12 (Testing HIP test-suite) failure: Testing HIP test-suite (failure)
@@@BUILD_STEP Testing HIP test-suite@@@
[0/1] cd /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP && /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/bin/llvm-lit -sv array-hip-6.3.0.test empty-hip-6.3.0.test with-fopenmp-hip-6.3.0.test saxpy-hip-6.3.0.test memmove-hip-6.3.0.test split-kernel-args-hip-6.3.0.test builtin-logb-scalbn-hip-6.3.0.test TheNextWeek-hip-6.3.0.test algorithm-hip-6.3.0.test cmath-hip-6.3.0.test complex-hip-6.3.0.test math_h-hip-6.3.0.test new-hip-6.3.0.test blender.test
-- Testing: 14 tests, 14 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80.. 90
FAIL: test-suite :: External/HIP/blender.test (14 of 14)
******************** TEST 'test-suite :: External/HIP/blender.test' FAILED ********************

/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/tools/timeit-target --timeout 7200 --limit-core 0 --limit-cpu 7200 --limit-file-size 209715200 --limit-rss-size 838860800 --append-exitstatus --redirect-output /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/blender.test.out --redirect-input /dev/null --summary /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/blender.test.time /bin/bash test_blender.sh
/bin/bash verify_blender.sh /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/blender.test.out
Begin Blender test.
TEST_SUITE_HIP_ROOT=/opt/botworker/llvm/External/hip
Render /opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo_release.blend
Blender 4.1.1 (hash e1743a0317bc built 2024-04-15 23:47:45)
Read blend: "/opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo_release.blend"
Could not open as Ogawa file from provided streams.
Unable to open /opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.004", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.002", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.003", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.001", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
Could not open as Ogawa file from provided streams.
Unable to open /opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.003", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.001", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.002", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.004", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
I0801 22:07:09.132745 2693369 device.cpp:39] HIPEW initialization succeeded
I0801 22:07:09.134615 2693369 device.cpp:45] Found HIPCC hipcc
I0801 22:07:09.197247 2693369 device.cpp:207] Device has compute preemption or is not used for display.
I0801 22:07:09.197312 2693369 device.cpp:211] Added device "" with id "HIP__0000:a3:00".
I0801 22:07:09.197398 2693369 device.cpp:568] Mapped host memory limit set to 536,444,985,344 bytes. (499.60G)
I0801 22:07:09.197654 2693369 device_impl.cpp:63] Using AVX2 CPU kernels.
Fra:1 Mem:524.00M (Peak 524.70M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Eyepiece_rim
Fra:1 Mem:524.00M (Peak 524.70M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Rivets.014
Fra:1 Mem:524.00M (Peak 524.70M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Rivets.019
Fra:1 Mem:524.14M (Peak 524.70M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Rivets.023
Fra:1 Mem:524.25M (Peak 524.70M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_Cables.004
Fra:1 Mem:525.03M (Peak 525.03M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_Wires
Fra:1 Mem:525.02M (Peak 525.03M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Rivets.025
Fra:1 Mem:525.10M (Peak 525.10M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Rivets.026
Fra:1 Mem:525.20M (Peak 525.21M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Rivets.039
Fra:1 Mem:525.25M (Peak 525.25M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_Connectors.001
Fra:1 Mem:526.21M (Peak 526.21M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_wires
Fra:1 Mem:526.49M (Peak 526.49M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | ENV-fog
Fra:1 Mem:526.46M (Peak 526.49M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Ground
Fra:1 Mem:550.37M (Peak 550.43M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_Connectors.002
Fra:1 Mem:550.48M (Peak 550.48M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_Connectors.004
Fra:1 Mem:550.48M (Peak 550.48M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_Connectors.007

@llvm-ci
Copy link
Collaborator

llvm-ci commented Aug 1, 2025

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-sie-win running on sie-win-worker while building llvm at step 7 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/46/builds/21082

Here is the relevant piece of the build log for the reference
Step 7 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'lld :: COFF/import_weak_alias.test' FAILED ********************
Exit Code: 3221225477

Command Output (stdout):
--
# RUN: at line 3
split-file Z:\b\llvm-clang-x86_64-sie-win\llvm-project\lld\test\COFF\import_weak_alias.test Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.dir
# executed command: split-file 'Z:\b\llvm-clang-x86_64-sie-win\llvm-project\lld\test\COFF\import_weak_alias.test' 'Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.dir'
# note: command had no output on stdout or stderr
# RUN: at line 4
z:\b\llvm-clang-x86_64-sie-win\build\bin\llvm-mc.exe --filetype=obj -triple=x86_64-windows-msvc Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.dir/foo.s -o Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.foo.obj
# executed command: 'z:\b\llvm-clang-x86_64-sie-win\build\bin\llvm-mc.exe' --filetype=obj -triple=x86_64-windows-msvc 'Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.dir/foo.s' -o 'Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.foo.obj'
# note: command had no output on stdout or stderr
# RUN: at line 5
z:\b\llvm-clang-x86_64-sie-win\build\bin\llvm-mc.exe --filetype=obj -triple=x86_64-windows-msvc Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.dir/qux.s -o Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.qux.obj
# executed command: 'z:\b\llvm-clang-x86_64-sie-win\build\bin\llvm-mc.exe' --filetype=obj -triple=x86_64-windows-msvc 'Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.dir/qux.s' -o 'Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.qux.obj'
# note: command had no output on stdout or stderr
# RUN: at line 6
z:\b\llvm-clang-x86_64-sie-win\build\bin\lld-link.exe Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.qux.obj Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.foo.obj -out:Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.dll -dll
# executed command: 'z:\b\llvm-clang-x86_64-sie-win\build\bin\lld-link.exe' 'Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.qux.obj' 'Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.foo.obj' '-out:Z:\b\llvm-clang-x86_64-sie-win\build\tools\lld\test\COFF\Output\import_weak_alias.test.tmp.dll' -dll
# .---command stderr------------
# | PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
# | Stack dump:
# | 0.	Program arguments: z:\\b\\llvm-clang-x86_64-sie-win\\build\\bin\\lld-link.exe Z:\\b\\llvm-clang-x86_64-sie-win\\build\\tools\\lld\\test\\COFF\\Output\\import_weak_alias.test.tmp.qux.obj Z:\\b\\llvm-clang-x86_64-sie-win\\build\\tools\\lld\\test\\COFF\\Output\\import_weak_alias.test.tmp.foo.obj -out:Z:\\b\\llvm-clang-x86_64-sie-win\\build\\tools\\lld\\test\\COFF\\Output\\import_weak_alias.test.tmp.dll -dll
# | Exception Code: 0xC0000005
# | #0 0x00007ff8e5361b39 (C:\Windows\System32\KERNELBASE.dll+0x41b39)
# | #1 0x00007ff794ffbb18 (z:\b\llvm-clang-x86_64-sie-win\build\bin\lld-link.exe+0xcbb18)
# | #2 0x00007ff7950832db (z:\b\llvm-clang-x86_64-sie-win\build\bin\lld-link.exe+0x1532db)
# | #3 0x00007ff794fdd9aa (z:\b\llvm-clang-x86_64-sie-win\build\bin\lld-link.exe+0xad9aa)
# | #4 0x00007ff794fdda14 (z:\b\llvm-clang-x86_64-sie-win\build\bin\lld-link.exe+0xada14)
# | #5 0x00007ff797750454 (z:\b\llvm-clang-x86_64-sie-win\build\bin\lld-link.exe+0x2820454)
# | #6 0x00007ff8e7947ac4 (C:\Windows\System32\KERNEL32.DLL+0x17ac4)
# | #7 0x00007ff8e879a8c1 (C:\Windows\SYSTEM32\ntdll.dll+0x5a8c1)
# `-----------------------------
# error: command failed with exit status: 0xc0000005

--

********************


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants