Skip to content

Commit 4e275f7

Browse files
[Arm64EC][clang] Implement varargs support in clang. (#152411)
The clang side of the calling convention code for arm64 vs. arm64ec is close enough that this isn't really noticeable in most cases, but the rule for choosing whether to pass a struct directly or indirectly is significantly different. (Adapted from my old patch https://reviews.llvm.org/D125419 .) Fixes #89615.
1 parent b1c4b55 commit 4e275f7

File tree

5 files changed

+126
-12
lines changed

5 files changed

+126
-12
lines changed

clang/lib/CodeGen/ABIInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,11 @@ llvm::Value *ABIInfo::createCoercedLoad(Address SrcAddr, const ABIArgInfo &AI,
251251
void ABIInfo::createCoercedStore(llvm::Value *Val, Address DstAddr,
252252
const ABIArgInfo &AI, bool DestIsVolatile,
253253
CodeGenFunction &CGF) const {}
254+
255+
ABIArgInfo ABIInfo::classifyArgForArm64ECVarArg(QualType Ty) const {
256+
llvm_unreachable("Only implemented for x86");
257+
}
258+
254259
// Pin the vtable to this file.
255260
SwiftABIInfo::~SwiftABIInfo() = default;
256261

clang/lib/CodeGen/ABIInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,10 @@ class ABIInfo {
138138
virtual void createCoercedStore(llvm::Value *Val, Address DstAddr,
139139
const ABIArgInfo &AI, bool DestIsVolatile,
140140
CodeGenFunction &CGF) const;
141+
142+
/// Used by Arm64EC calling convention code to call into x86 calling
143+
/// convention code for varargs function.
144+
virtual ABIArgInfo classifyArgForArm64ECVarArg(QualType Ty) const;
141145
};
142146

143147
/// Target specific hooks for defining how a type should be passed or returned

clang/lib/CodeGen/Targets/AArch64.cpp

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,16 @@ namespace {
2424
class AArch64ABIInfo : public ABIInfo {
2525
AArch64ABIKind Kind;
2626

27+
std::unique_ptr<TargetCodeGenInfo> WinX86_64CodegenInfo;
28+
2729
public:
28-
AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
29-
: ABIInfo(CGT), Kind(Kind) {}
30+
AArch64ABIInfo(CodeGenModule &CGM, AArch64ABIKind Kind)
31+
: ABIInfo(CGM.getTypes()), Kind(Kind) {
32+
if (getTarget().getTriple().isWindowsArm64EC()) {
33+
WinX86_64CodegenInfo =
34+
createWinX86_64TargetCodeGenInfo(CGM, X86AVXABILevel::None);
35+
}
36+
}
3037

3138
bool isSoftFloat() const { return Kind == AArch64ABIKind::AAPCSSoft; }
3239

@@ -119,9 +126,9 @@ class AArch64SwiftABIInfo : public SwiftABIInfo {
119126

120127
class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
121128
public:
122-
AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
123-
: TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {
124-
SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT);
129+
AArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind Kind)
130+
: TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGM, Kind)) {
131+
SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGM.getTypes());
125132
}
126133

127134
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
@@ -200,8 +207,8 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
200207

201208
class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
202209
public:
203-
WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind K)
204-
: AArch64TargetCodeGenInfo(CGT, K) {}
210+
WindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind K)
211+
: AArch64TargetCodeGenInfo(CGM, K) {}
205212

206213
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
207214
CodeGen::CodeGenModule &CGM) const override;
@@ -368,6 +375,12 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
368375
unsigned &NPRN) const {
369376
Ty = useFirstFieldIfTransparentUnion(Ty);
370377

378+
if (IsVariadicFn && getTarget().getTriple().isWindowsArm64EC()) {
379+
// Arm64EC varargs functions use the x86_64 classification rules,
380+
// not the AArch64 ABI rules.
381+
return WinX86_64CodegenInfo->getABIInfo().classifyArgForArm64ECVarArg(Ty);
382+
}
383+
371384
// Handle illegal vector types here.
372385
if (isIllegalVectorType(Ty))
373386
return coerceIllegalVector(Ty, NSRN, NPRN);
@@ -1153,9 +1166,16 @@ RValue AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
11531166
QualType Ty, AggValueSlot Slot) const {
11541167
bool IsIndirect = false;
11551168

1156-
// Composites larger than 16 bytes are passed by reference.
1157-
if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
1158-
IsIndirect = true;
1169+
if (getTarget().getTriple().isWindowsArm64EC()) {
1170+
// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
1171+
// not 1, 2, 4, or 8 bytes, must be passed by reference."
1172+
uint64_t Width = getContext().getTypeSize(Ty);
1173+
IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width);
1174+
} else {
1175+
// Composites larger than 16 bytes are passed by reference.
1176+
if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
1177+
IsIndirect = true;
1178+
}
11591179

11601180
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
11611181
CGF.getContext().getTypeInfoInChars(Ty),
@@ -1347,11 +1367,11 @@ void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr,
13471367
std::unique_ptr<TargetCodeGenInfo>
13481368
CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM,
13491369
AArch64ABIKind Kind) {
1350-
return std::make_unique<AArch64TargetCodeGenInfo>(CGM.getTypes(), Kind);
1370+
return std::make_unique<AArch64TargetCodeGenInfo>(CGM, Kind);
13511371
}
13521372

13531373
std::unique_ptr<TargetCodeGenInfo>
13541374
CodeGen::createWindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM,
13551375
AArch64ABIKind K) {
1356-
return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM.getTypes(), K);
1376+
return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM, K);
13571377
}

clang/lib/CodeGen/Targets/X86.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1417,6 +1417,12 @@ class WinX86_64ABIInfo : public ABIInfo {
14171417
return isX86VectorCallAggregateSmallEnough(NumMembers);
14181418
}
14191419

1420+
ABIArgInfo classifyArgForArm64ECVarArg(QualType Ty) const override {
1421+
unsigned FreeSSERegs = 0;
1422+
return classify(Ty, FreeSSERegs, /*IsReturnType=*/false,
1423+
/*IsVectorCall=*/false, /*IsRegCall=*/false);
1424+
}
1425+
14201426
private:
14211427
ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType,
14221428
bool IsVectorCall, bool IsRegCall) const;
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs --global-value-regex "f"
2+
// RUN: %clang_cc1 -opaque-pointers -triple arm64ec-windows-msvc -emit-llvm -o - %s | FileCheck %s
3+
4+
typedef struct { float x[2]; } A;
5+
typedef struct { char x[6]; } B;
6+
typedef struct { float x[4]; } C;
7+
void f(A a, ...) {
8+
__builtin_va_list args;
9+
__builtin_va_start(args, a);
10+
float x = __builtin_va_arg(args, A).x[0];
11+
float y = __builtin_va_arg(args, B).x[0];
12+
float z = __builtin_va_arg(args, C).x[0];
13+
}
14+
void g(A a, B b, C c) { f(a, b, c); }
15+
16+
// CHECK-LABEL: @f(
17+
// CHECK-NEXT: entry:
18+
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4
19+
// CHECK-NEXT: [[ARGS:%.*]] = alloca ptr, align 8
20+
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4
21+
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_A]], align 4
22+
// CHECK-NEXT: [[Y:%.*]] = alloca float, align 4
23+
// CHECK-NEXT: [[REF_TMP2:%.*]] = alloca [[STRUCT_B:%.*]], align 1
24+
// CHECK-NEXT: [[Z:%.*]] = alloca float, align 4
25+
// CHECK-NEXT: [[REF_TMP7:%.*]] = alloca [[STRUCT_C:%.*]], align 4
26+
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0
27+
// CHECK-NEXT: store i64 [[A_COERCE:%.*]], ptr [[COERCE_DIVE]], align 4
28+
// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARGS]])
29+
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[ARGS]], align 8
30+
// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8
31+
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[ARGS]], align 8
32+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 8 [[ARGP_CUR]], i64 8, i1 false)
33+
// CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[REF_TMP]], i32 0, i32 0
34+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x float], ptr [[X1]], i64 0, i64 0
35+
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
36+
// CHECK-NEXT: store float [[TMP0]], ptr [[X]], align 4
37+
// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[ARGS]], align 8
38+
// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i64 8
39+
// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[ARGS]], align 8
40+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR3]], align 8
41+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[REF_TMP2]], ptr align 1 [[TMP1]], i64 6, i1 false)
42+
// CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[REF_TMP2]], i32 0, i32 0
43+
// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [6 x i8], ptr [[X5]], i64 0, i64 0
44+
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX6]], align 1
45+
// CHECK-NEXT: [[CONV:%.*]] = sitofp i8 [[TMP2]] to float
46+
// CHECK-NEXT: store float [[CONV]], ptr [[Y]], align 4
47+
// CHECK-NEXT: [[ARGP_CUR8:%.*]] = load ptr, ptr [[ARGS]], align 8
48+
// CHECK-NEXT: [[ARGP_NEXT9:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR8]], i64 8
49+
// CHECK-NEXT: store ptr [[ARGP_NEXT9]], ptr [[ARGS]], align 8
50+
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGP_CUR8]], align 8
51+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP7]], ptr align 4 [[TMP3]], i64 16, i1 false)
52+
// CHECK-NEXT: [[X10:%.*]] = getelementptr inbounds nuw [[STRUCT_C]], ptr [[REF_TMP7]], i32 0, i32 0
53+
// CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [4 x float], ptr [[X10]], i64 0, i64 0
54+
// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
55+
// CHECK-NEXT: store float [[TMP4]], ptr [[Z]], align 4
56+
// CHECK-NEXT: ret void
57+
//
58+
//
59+
// CHECK-LABEL: @g(
60+
// CHECK-NEXT: entry:
61+
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4
62+
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_B:%.*]], align 1
63+
// CHECK-NEXT: [[C:%.*]] = alloca [[STRUCT_C:%.*]], align 4
64+
// CHECK-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_B]], align 1
65+
// CHECK-NEXT: [[BYVAL_TEMP4:%.*]] = alloca [[STRUCT_C]], align 4
66+
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0
67+
// CHECK-NEXT: store [2 x float] [[A_COERCE:%.*]], ptr [[COERCE_DIVE]], align 4
68+
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[B]], i32 0, i32 0
69+
// CHECK-NEXT: [[COERCE_VAL_II:%.*]] = trunc i64 [[B_COERCE:%.*]] to i48
70+
// CHECK-NEXT: store i48 [[COERCE_VAL_II]], ptr [[COERCE_DIVE1]], align 1
71+
// CHECK-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_C]], ptr [[C]], i32 0, i32 0
72+
// CHECK-NEXT: store [4 x float] [[C_COERCE:%.*]], ptr [[COERCE_DIVE2]], align 4
73+
// CHECK-NEXT: [[COERCE_DIVE3:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0
74+
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[COERCE_DIVE3]], align 4
75+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[BYVAL_TEMP]], ptr align 1 [[B]], i64 6, i1 false)
76+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[BYVAL_TEMP4]], ptr align 4 [[C]], i64 16, i1 false)
77+
// CHECK-NEXT: call void (i64, ...) @f(i64 [[TMP0]], ptr dead_on_return noundef [[BYVAL_TEMP]], ptr dead_on_return noundef [[BYVAL_TEMP4]])
78+
// CHECK-NEXT: ret void
79+
//

0 commit comments

Comments
 (0)