[Arm64EC][clang] Implement varargs support in clang. (#152411)

efriedma-quic · web-flow · commit 4e275f727439 · 2025-11-19T16:45:08.000-08:00
The clang side of the calling convention code for arm64 vs. arm64ec is close enough that this isn't really noticeable in most cases, but the rule for choosing whether to pass a struct directly or indirectly is significantly different. (Adapted from my old patch https://reviews.llvm.org/D125419 .) Fixes #89615.
diff --git a/clang/lib/CodeGen/ABIInfo.cpp b/clang/lib/CodeGen/ABIInfo.cpp
@@ -251,6 +251,11 @@ llvm::Value *ABIInfo::createCoercedLoad(Address SrcAddr, const ABIArgInfo &AI,
 void ABIInfo::createCoercedStore(llvm::Value *Val, Address DstAddr,
                                  const ABIArgInfo &AI, bool DestIsVolatile,
                                  CodeGenFunction &CGF) const {}
+
+ABIArgInfo ABIInfo::classifyArgForArm64ECVarArg(QualType Ty) const {
+  llvm_unreachable("Only implemented for x86");
+}
+
 // Pin the vtable to this file.
 SwiftABIInfo::~SwiftABIInfo() = default;
 
diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h
@@ -138,6 +138,10 @@ class ABIInfo {
   virtual void createCoercedStore(llvm::Value *Val, Address DstAddr,
                                   const ABIArgInfo &AI, bool DestIsVolatile,
                                   CodeGenFunction &CGF) const;
+
+  /// Used by Arm64EC calling convention code to call into x86 calling
+  /// convention code for varargs function.
+  virtual ABIArgInfo classifyArgForArm64ECVarArg(QualType Ty) const;
 };
 
 /// Target specific hooks for defining how a type should be passed or returned
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -24,9 +24,16 @@ namespace {
 class AArch64ABIInfo : public ABIInfo {
   AArch64ABIKind Kind;
 
+  std::unique_ptr<TargetCodeGenInfo> WinX86_64CodegenInfo;
+
 public:
-  AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
-      : ABIInfo(CGT), Kind(Kind) {}
+  AArch64ABIInfo(CodeGenModule &CGM, AArch64ABIKind Kind)
+      : ABIInfo(CGM.getTypes()), Kind(Kind) {
+    if (getTarget().getTriple().isWindowsArm64EC()) {
+      WinX86_64CodegenInfo =
+          createWinX86_64TargetCodeGenInfo(CGM, X86AVXABILevel::None);
+    }
+  }
 
   bool isSoftFloat() const { return Kind == AArch64ABIKind::AAPCSSoft; }
 
@@ -119,9 +126,9 @@ class AArch64SwiftABIInfo : public SwiftABIInfo {
 
 class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
-  AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
-      : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {
-    SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT);
+  AArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind Kind)
+      : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGM, Kind)) {
+    SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGM.getTypes());
   }
 
   StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
@@ -200,8 +207,8 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
 
 class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
 public:
-  WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind K)
-      : AArch64TargetCodeGenInfo(CGT, K) {}
+  WindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind K)
+      : AArch64TargetCodeGenInfo(CGM, K) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
@@ -368,6 +375,12 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
                                                 unsigned &NPRN) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
+  if (IsVariadicFn && getTarget().getTriple().isWindowsArm64EC()) {
+    // Arm64EC varargs functions use the x86_64 classification rules,
+    // not the AArch64 ABI rules.
+    return WinX86_64CodegenInfo->getABIInfo().classifyArgForArm64ECVarArg(Ty);
+  }
+
   // Handle illegal vector types here.
   if (isIllegalVectorType(Ty))
     return coerceIllegalVector(Ty, NSRN, NPRN);
@@ -1153,9 +1166,16 @@ RValue AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                    QualType Ty, AggValueSlot Slot) const {
   bool IsIndirect = false;
 
-  // Composites larger than 16 bytes are passed by reference.
-  if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
-    IsIndirect = true;
+  if (getTarget().getTriple().isWindowsArm64EC()) {
+    // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
+    // not 1, 2, 4, or 8 bytes, must be passed by reference."
+    uint64_t Width = getContext().getTypeSize(Ty);
+    IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width);
+  } else {
+    // Composites larger than 16 bytes are passed by reference.
+    if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
+      IsIndirect = true;
+  }
 
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
                           CGF.getContext().getTypeInfoInChars(Ty),
@@ -1347,11 +1367,11 @@ void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr,
 std::unique_ptr<TargetCodeGenInfo>
 CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM,
                                         AArch64ABIKind Kind) {
-  return std::make_unique<AArch64TargetCodeGenInfo>(CGM.getTypes(), Kind);
+  return std::make_unique<AArch64TargetCodeGenInfo>(CGM, Kind);
 }
 
 std::unique_ptr<TargetCodeGenInfo>
 CodeGen::createWindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM,
                                                AArch64ABIKind K) {
-  return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM.getTypes(), K);
+  return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM, K);
 }
diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp
@@ -1417,6 +1417,12 @@ class WinX86_64ABIInfo : public ABIInfo {
     return isX86VectorCallAggregateSmallEnough(NumMembers);
   }
 
+  ABIArgInfo classifyArgForArm64ECVarArg(QualType Ty) const override {
+    unsigned FreeSSERegs = 0;
+    return classify(Ty, FreeSSERegs, /*IsReturnType=*/false,
+                    /*IsVectorCall=*/false, /*IsRegCall=*/false);
+  }
+
 private:
   ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType,
                       bool IsVectorCall, bool IsRegCall) const;
diff --git a/clang/test/CodeGen/arm64ec-varargs.c b/clang/test/CodeGen/arm64ec-varargs.c
@@ -0,0 +1,79 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs --global-value-regex "f"
+// RUN: %clang_cc1 -opaque-pointers -triple arm64ec-windows-msvc -emit-llvm -o - %s | FileCheck %s
+
+typedef struct { float x[2]; } A;
+typedef struct { char x[6]; } B;
+typedef struct { float x[4]; } C;
+void f(A a, ...) {
+  __builtin_va_list args;
+  __builtin_va_start(args, a);
+  float x = __builtin_va_arg(args, A).x[0];
+  float y = __builtin_va_arg(args, B).x[0];
+  float z = __builtin_va_arg(args, C).x[0];
+}
+void g(A a, B b, C c) { f(a, b, c); }
+
+// CHECK-LABEL: @f(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4
+// CHECK-NEXT:    [[ARGS:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[X:%.*]] = alloca float, align 4
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_A]], align 4
+// CHECK-NEXT:    [[Y:%.*]] = alloca float, align 4
+// CHECK-NEXT:    [[REF_TMP2:%.*]] = alloca [[STRUCT_B:%.*]], align 1
+// CHECK-NEXT:    [[Z:%.*]] = alloca float, align 4
+// CHECK-NEXT:    [[REF_TMP7:%.*]] = alloca [[STRUCT_C:%.*]], align 4
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store i64 [[A_COERCE:%.*]], ptr [[COERCE_DIVE]], align 4
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[ARGS]])
+// CHECK-NEXT:    [[ARGP_CUR:%.*]] = load ptr, ptr [[ARGS]], align 8
+// CHECK-NEXT:    [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8
+// CHECK-NEXT:    store ptr [[ARGP_NEXT]], ptr [[ARGS]], align 8
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 8 [[ARGP_CUR]], i64 8, i1 false)
+// CHECK-NEXT:    [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x float], ptr [[X1]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    store float [[TMP0]], ptr [[X]], align 4
+// CHECK-NEXT:    [[ARGP_CUR3:%.*]] = load ptr, ptr [[ARGS]], align 8
+// CHECK-NEXT:    [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i64 8
+// CHECK-NEXT:    store ptr [[ARGP_NEXT4]], ptr [[ARGS]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR3]], align 8
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[REF_TMP2]], ptr align 1 [[TMP1]], i64 6, i1 false)
+// CHECK-NEXT:    [[X5:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[REF_TMP2]], i32 0, i32 0
+// CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [6 x i8], ptr [[X5]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX6]], align 1
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp i8 [[TMP2]] to float
+// CHECK-NEXT:    store float [[CONV]], ptr [[Y]], align 4
+// CHECK-NEXT:    [[ARGP_CUR8:%.*]] = load ptr, ptr [[ARGS]], align 8
+// CHECK-NEXT:    [[ARGP_NEXT9:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR8]], i64 8
+// CHECK-NEXT:    store ptr [[ARGP_NEXT9]], ptr [[ARGS]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR8]], align 8
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP7]], ptr align 4 [[TMP3]], i64 16, i1 false)
+// CHECK-NEXT:    [[X10:%.*]] = getelementptr inbounds nuw [[STRUCT_C]], ptr [[REF_TMP7]], i32 0, i32 0
+// CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [4 x float], ptr [[X10]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
+// CHECK-NEXT:    store float [[TMP4]], ptr [[Z]], align 4
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: @g(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4
+// CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_B:%.*]], align 1
+// CHECK-NEXT:    [[C:%.*]] = alloca [[STRUCT_C:%.*]], align 4
+// CHECK-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_B]], align 1
+// CHECK-NEXT:    [[BYVAL_TEMP4:%.*]] = alloca [[STRUCT_C]], align 4
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    store [2 x float] [[A_COERCE:%.*]], ptr [[COERCE_DIVE]], align 4
+// CHECK-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT:    [[COERCE_VAL_II:%.*]] = trunc i64 [[B_COERCE:%.*]] to i48
+// CHECK-NEXT:    store i48 [[COERCE_VAL_II]], ptr [[COERCE_DIVE1]], align 1
+// CHECK-NEXT:    [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_C]], ptr [[C]], i32 0, i32 0
+// CHECK-NEXT:    store [4 x float] [[C_COERCE:%.*]], ptr [[COERCE_DIVE2]], align 4
+// CHECK-NEXT:    [[COERCE_DIVE3:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[COERCE_DIVE3]], align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[BYVAL_TEMP]], ptr align 1 [[B]], i64 6, i1 false)
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[BYVAL_TEMP4]], ptr align 4 [[C]], i64 16, i1 false)
+// CHECK-NEXT:    call void (i64, ...) @f(i64 [[TMP0]], ptr dead_on_return noundef [[BYVAL_TEMP]], ptr dead_on_return noundef [[BYVAL_TEMP4]])
+// CHECK-NEXT:    ret void
+//