diff --git a/clang/lib/CodeGen/ABIInfo.cpp b/clang/lib/CodeGen/ABIInfo.cpp index 3ef430e19ebd3..822c3803a33e7 100644 --- a/clang/lib/CodeGen/ABIInfo.cpp +++ b/clang/lib/CodeGen/ABIInfo.cpp @@ -244,6 +244,10 @@ ABIInfo::getOptimalVectorMemoryType(llvm::FixedVectorType *T, return T; } +ABIArgInfo ABIInfo::classifyArgForArm64ECVarArg(QualType Ty) const { + llvm_unreachable("Only implemented for x86"); +} + // Pin the vtable to this file. SwiftABIInfo::~SwiftABIInfo() = default; diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h index 9c7029c99bd44..6f07a82a39fd7 100644 --- a/clang/lib/CodeGen/ABIInfo.h +++ b/clang/lib/CodeGen/ABIInfo.h @@ -132,6 +132,10 @@ class ABIInfo { virtual llvm::FixedVectorType * getOptimalVectorMemoryType(llvm::FixedVectorType *T, const LangOptions &Opt) const; + + /// Used by Arm64EC calling convention code to call into x86 calling + /// convention code for varargs function. + virtual ABIArgInfo classifyArgForArm64ECVarArg(QualType Ty) const; }; /// Target specific hooks for defining how a type should be passed or returned diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index b82c46966cf0b..fe562f37e30bc 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -24,9 +24,16 @@ namespace { class AArch64ABIInfo : public ABIInfo { AArch64ABIKind Kind; + std::unique_ptr WinX86_64CodegenInfo; + public: - AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind) - : ABIInfo(CGT), Kind(Kind) {} + AArch64ABIInfo(CodeGenModule &CGM, AArch64ABIKind Kind) + : ABIInfo(CGM.getTypes()), Kind(Kind) { + if (getTarget().getTriple().isWindowsArm64EC()) { + WinX86_64CodegenInfo = + createWinX86_64TargetCodeGenInfo(CGM, X86AVXABILevel::None); + } + } bool isSoftFloat() const { return Kind == AArch64ABIKind::AAPCSSoft; } @@ -119,9 +126,9 @@ class AArch64SwiftABIInfo : public SwiftABIInfo { class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { public: - AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind Kind) - : TargetCodeGenInfo(std::make_unique(CGT, Kind)) { - SwiftInfo = std::make_unique(CGT); + AArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind Kind) + : TargetCodeGenInfo(std::make_unique(CGM, Kind)) { + SwiftInfo = std::make_unique(CGM.getTypes()); } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { @@ -200,8 +207,8 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo { public: - WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind K) - : AArch64TargetCodeGenInfo(CGT, K) {} + WindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind K) + : AArch64TargetCodeGenInfo(CGM, K) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; @@ -368,6 +375,12 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn, unsigned &NPRN) const { Ty = useFirstFieldIfTransparentUnion(Ty); + if (IsVariadicFn && getTarget().getTriple().isWindowsArm64EC()) { + // Arm64EC varargs functions use the x86_64 classification rules, + // not the AArch64 ABI rules. + return WinX86_64CodegenInfo->getABIInfo().classifyArgForArm64ECVarArg(Ty); + } + // Handle illegal vector types here. if (isIllegalVectorType(Ty)) return coerceIllegalVector(Ty, NSRN, NPRN); @@ -1151,9 +1164,16 @@ RValue AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, AggValueSlot Slot) const { bool IsIndirect = false; - // Composites larger than 16 bytes are passed by reference. - if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128) - IsIndirect = true; + if (getTarget().getTriple().isWindowsArm64EC()) { + // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is + // not 1, 2, 4, or 8 bytes, must be passed by reference." + uint64_t Width = getContext().getTypeSize(Ty); + IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); + } else { + // Composites larger than 16 bytes are passed by reference. + if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128) + IsIndirect = true; + } return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, CGF.getContext().getTypeInfoInChars(Ty), @@ -1345,11 +1365,11 @@ void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr, std::unique_ptr CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind Kind) { - return std::make_unique(CGM.getTypes(), Kind); + return std::make_unique(CGM, Kind); } std::unique_ptr CodeGen::createWindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind K) { - return std::make_unique(CGM.getTypes(), K); + return std::make_unique(CGM, K); } diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index abb91486e7ee6..f473e9d7665ac 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -1409,6 +1409,12 @@ class WinX86_64ABIInfo : public ABIInfo { return isX86VectorCallAggregateSmallEnough(NumMembers); } + ABIArgInfo classifyArgForArm64ECVarArg(QualType Ty) const override { + unsigned FreeSSERegs = 0; + return classify(Ty, FreeSSERegs, /*IsReturnType=*/false, + /*IsVectorCall=*/false, /*IsRegCall=*/false); + } + private: ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, bool IsVectorCall, bool IsRegCall) const; diff --git a/clang/test/CodeGen/arm64ec-varargs.c b/clang/test/CodeGen/arm64ec-varargs.c new file mode 100644 index 0000000000000..ef2c4dc5703b1 --- /dev/null +++ b/clang/test/CodeGen/arm64ec-varargs.c @@ -0,0 +1,79 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs --global-value-regex "f" +// RUN: %clang_cc1 -opaque-pointers -triple arm64ec-windows-msvc -emit-llvm -o - %s | FileCheck %s + +typedef struct { float x[2]; } A; +typedef struct { char x[6]; } B; +typedef struct { float x[4]; } C; +void f(A a, ...) { + __builtin_va_list args; + __builtin_va_start(args, a); + float x = __builtin_va_arg(args, A).x[0]; + float y = __builtin_va_arg(args, B).x[0]; + float z = __builtin_va_arg(args, C).x[0]; +} +void g(A a, B b, C c) { f(a, b, c); } + +// CHECK-LABEL: @f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4 +// CHECK-NEXT: [[ARGS:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[X:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_A]], align 4 +// CHECK-NEXT: [[Y:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[REF_TMP2:%.*]] = alloca [[STRUCT_B:%.*]], align 1 +// CHECK-NEXT: [[Z:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[REF_TMP7:%.*]] = alloca [[STRUCT_C:%.*]], align 4 +// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: store i64 [[A_COERCE:%.*]], ptr [[COERCE_DIVE]], align 4 +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARGS]]) +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[ARGS]], align 8 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[ARGS]], align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 8 [[ARGP_CUR]], i64 8, i1 false) +// CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x float], ptr [[X1]], i64 0, i64 0 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: store float [[TMP0]], ptr [[X]], align 4 +// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[ARGS]], align 8 +// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i64 8 +// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[ARGS]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR3]], align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[REF_TMP2]], ptr align 1 [[TMP1]], i64 6, i1 false) +// CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[REF_TMP2]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [6 x i8], ptr [[X5]], i64 0, i64 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX6]], align 1 +// CHECK-NEXT: [[CONV:%.*]] = sitofp i8 [[TMP2]] to float +// CHECK-NEXT: store float [[CONV]], ptr [[Y]], align 4 +// CHECK-NEXT: [[ARGP_CUR8:%.*]] = load ptr, ptr [[ARGS]], align 8 +// CHECK-NEXT: [[ARGP_NEXT9:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR8]], i64 8 +// CHECK-NEXT: store ptr [[ARGP_NEXT9]], ptr [[ARGS]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR8]], align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP7]], ptr align 4 [[TMP3]], i64 16, i1 false) +// CHECK-NEXT: [[X10:%.*]] = getelementptr inbounds nuw [[STRUCT_C]], ptr [[REF_TMP7]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [4 x float], ptr [[X10]], i64 0, i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4 +// CHECK-NEXT: store float [[TMP4]], ptr [[Z]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: @g( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_B:%.*]], align 1 +// CHECK-NEXT: [[C:%.*]] = alloca [[STRUCT_C:%.*]], align 4 +// CHECK-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_B]], align 1 +// CHECK-NEXT: [[BYVAL_TEMP4:%.*]] = alloca [[STRUCT_C]], align 4 +// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: store [2 x float] [[A_COERCE:%.*]], ptr [[COERCE_DIVE]], align 4 +// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[B]], i32 0, i32 0 +// CHECK-NEXT: [[COERCE_VAL_II:%.*]] = trunc i64 [[B_COERCE:%.*]] to i48 +// CHECK-NEXT: store i48 [[COERCE_VAL_II]], ptr [[COERCE_DIVE1]], align 1 +// CHECK-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_C]], ptr [[C]], i32 0, i32 0 +// CHECK-NEXT: store [4 x float] [[C_COERCE:%.*]], ptr [[COERCE_DIVE2]], align 4 +// CHECK-NEXT: [[COERCE_DIVE3:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[COERCE_DIVE3]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[BYVAL_TEMP]], ptr align 1 [[B]], i64 6, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[BYVAL_TEMP4]], ptr align 4 [[C]], i64 16, i1 false) +// CHECK-NEXT: call void (i64, ...) @f(i64 [[TMP0]], ptr dead_on_return noundef [[BYVAL_TEMP]], ptr dead_on_return noundef [[BYVAL_TEMP4]]) +// CHECK-NEXT: ret void +//