Skip to content

Commit 9b8fdaf

Browse files
committed
[Arm64EC][clang] Implement varargs support in clang.
The clang side of the calling convention code for arm64 vs. arm64ec is close enough that this isn't really noticeable in most cases, but the rule for choosing whether to pass a struct directly or indirectly is significantly different. (Adapted from my old patch https://reviews.llvm.org/D125419 .) Fixes #89615.
1 parent 936ee35 commit 9b8fdaf

File tree

5 files changed

+105
-12
lines changed

5 files changed

+105
-12
lines changed

clang/lib/CodeGen/ABIInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,10 @@ ABIInfo::getOptimalVectorMemoryType(llvm::FixedVectorType *T,
244244
return T;
245245
}
246246

247+
ABIArgInfo ABIInfo::classifyArgForArm64ECVarArg(QualType Ty) const {
248+
llvm_unreachable("Only implemented for x86");
249+
}
250+
247251
// Pin the vtable to this file.
248252
SwiftABIInfo::~SwiftABIInfo() = default;
249253

clang/lib/CodeGen/ABIInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,10 @@ class ABIInfo {
132132
virtual llvm::FixedVectorType *
133133
getOptimalVectorMemoryType(llvm::FixedVectorType *T,
134134
const LangOptions &Opt) const;
135+
136+
/// Used by Arm64EC calling convention code to call into x86 calling
137+
/// convention code for varargs function.
138+
virtual ABIArgInfo classifyArgForArm64ECVarArg(QualType Ty) const;
135139
};
136140

137141
/// Target specific hooks for defining how a type should be passed or returned

clang/lib/CodeGen/Targets/AArch64.cpp

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,16 @@ namespace {
2424
class AArch64ABIInfo : public ABIInfo {
2525
AArch64ABIKind Kind;
2626

27+
std::unique_ptr<TargetCodeGenInfo> WinX86_64CodegenInfo;
28+
2729
public:
28-
AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
29-
: ABIInfo(CGT), Kind(Kind) {}
30+
AArch64ABIInfo(CodeGenModule &CGM, AArch64ABIKind Kind)
31+
: ABIInfo(CGM.getTypes()), Kind(Kind) {
32+
if (getTarget().getTriple().isWindowsArm64EC()) {
33+
WinX86_64CodegenInfo =
34+
createWinX86_64TargetCodeGenInfo(CGM, X86AVXABILevel::None);
35+
}
36+
}
3037

3138
bool isSoftFloat() const { return Kind == AArch64ABIKind::AAPCSSoft; }
3239

@@ -119,9 +126,9 @@ class AArch64SwiftABIInfo : public SwiftABIInfo {
119126

120127
class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
121128
public:
122-
AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
123-
: TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {
124-
SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT);
129+
AArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind Kind)
130+
: TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGM, Kind)) {
131+
SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGM.getTypes());
125132
}
126133

127134
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
@@ -200,8 +207,8 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
200207

201208
class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
202209
public:
203-
WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind K)
204-
: AArch64TargetCodeGenInfo(CGT, K) {}
210+
WindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind K)
211+
: AArch64TargetCodeGenInfo(CGM, K) {}
205212

206213
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
207214
CodeGen::CodeGenModule &CGM) const override;
@@ -368,6 +375,12 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
368375
unsigned &NPRN) const {
369376
Ty = useFirstFieldIfTransparentUnion(Ty);
370377

378+
if (IsVariadicFn && getTarget().getTriple().isWindowsArm64EC()) {
379+
// Arm64EC varargs functions use the x86_64 classification rules,
380+
// not the AArch64 ABI rules.
381+
return WinX86_64CodegenInfo->getABIInfo().classifyArgForArm64ECVarArg(Ty);
382+
}
383+
371384
// Handle illegal vector types here.
372385
if (isIllegalVectorType(Ty))
373386
return coerceIllegalVector(Ty, NSRN, NPRN);
@@ -1151,9 +1164,16 @@ RValue AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
11511164
QualType Ty, AggValueSlot Slot) const {
11521165
bool IsIndirect = false;
11531166

1154-
// Composites larger than 16 bytes are passed by reference.
1155-
if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
1156-
IsIndirect = true;
1167+
if (getTarget().getTriple().isWindowsArm64EC()) {
1168+
// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
1169+
// not 1, 2, 4, or 8 bytes, must be passed by reference."
1170+
uint64_t Width = getContext().getTypeSize(Ty);
1171+
IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width);
1172+
} else {
1173+
// Composites larger than 16 bytes are passed by reference.
1174+
if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
1175+
IsIndirect = true;
1176+
}
11571177

11581178
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
11591179
CGF.getContext().getTypeInfoInChars(Ty),
@@ -1345,11 +1365,11 @@ void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr,
13451365
std::unique_ptr<TargetCodeGenInfo>
13461366
CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM,
13471367
AArch64ABIKind Kind) {
1348-
return std::make_unique<AArch64TargetCodeGenInfo>(CGM.getTypes(), Kind);
1368+
return std::make_unique<AArch64TargetCodeGenInfo>(CGM, Kind);
13491369
}
13501370

13511371
std::unique_ptr<TargetCodeGenInfo>
13521372
CodeGen::createWindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM,
13531373
AArch64ABIKind K) {
1354-
return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM.getTypes(), K);
1374+
return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM, K);
13551375
}

clang/lib/CodeGen/Targets/X86.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,12 @@ class WinX86_64ABIInfo : public ABIInfo {
14091409
return isX86VectorCallAggregateSmallEnough(NumMembers);
14101410
}
14111411

1412+
ABIArgInfo classifyArgForArm64ECVarArg(QualType Ty) const override {
1413+
unsigned FreeSSERegs = 0;
1414+
return classify(Ty, FreeSSERegs, /*IsReturnType=*/false,
1415+
/*IsVectorCall=*/false, /*IsRegCall=*/false);
1416+
}
1417+
14121418
private:
14131419
ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType,
14141420
bool IsVectorCall, bool IsRegCall) const;
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs --global-value-regex "f"
2+
// RUN: %clang_cc1 -opaque-pointers -triple arm64ec-windows-msvc -emit-llvm -o - %s | FileCheck %s
3+
4+
typedef struct { float x[2]; } A;
5+
typedef struct { float x[4]; } B;
6+
void f(A a, ...) {
7+
__builtin_va_list b;
8+
__builtin_va_start(b, a);
9+
float x = __builtin_va_arg(b, A).x[0];
10+
float y = __builtin_va_arg(b, B).x[0];
11+
}
12+
void g(A a, B b) { f(a, b); }
13+
14+
// CHECK-LABEL: @f(
15+
// CHECK-NEXT: entry:
16+
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4
17+
// CHECK-NEXT: [[B:%.*]] = alloca ptr, align 8
18+
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4
19+
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_A]], align 4
20+
// CHECK-NEXT: [[Y:%.*]] = alloca float, align 4
21+
// CHECK-NEXT: [[REF_TMP2:%.*]] = alloca [[STRUCT_B:%.*]], align 4
22+
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0
23+
// CHECK-NEXT: store i64 [[A_COERCE:%.*]], ptr [[COERCE_DIVE]], align 4
24+
// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[B]])
25+
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[B]], align 8
26+
// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8
27+
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[B]], align 8
28+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 8 [[ARGP_CUR]], i64 8, i1 false)
29+
// CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[REF_TMP]], i32 0, i32 0
30+
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x float], ptr [[X1]], i64 0, i64 0
31+
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
32+
// CHECK-NEXT: store float [[TMP0]], ptr [[X]], align 4
33+
// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[B]], align 8
34+
// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i64 8
35+
// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[B]], align 8
36+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR3]], align 8
37+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP2]], ptr align 4 [[TMP1]], i64 16, i1 false)
38+
// CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[REF_TMP2]], i32 0, i32 0
39+
// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x float], ptr [[X5]], i64 0, i64 0
40+
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
41+
// CHECK-NEXT: store float [[TMP2]], ptr [[Y]], align 4
42+
// CHECK-NEXT: ret void
43+
//
44+
//
45+
// CHECK-LABEL: @g(
46+
// CHECK-NEXT: entry:
47+
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4
48+
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_B:%.*]], align 4
49+
// CHECK-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_B]], align 4
50+
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0
51+
// CHECK-NEXT: store [2 x float] [[A_COERCE:%.*]], ptr [[COERCE_DIVE]], align 4
52+
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[B]], i32 0, i32 0
53+
// CHECK-NEXT: store [4 x float] [[B_COERCE:%.*]], ptr [[COERCE_DIVE1]], align 4
54+
// CHECK-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0
55+
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[COERCE_DIVE2]], align 4
56+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[BYVAL_TEMP]], ptr align 4 [[B]], i64 16, i1 false)
57+
// CHECK-NEXT: call void (i64, ...) @f(i64 [[TMP0]], ptr dead_on_return noundef [[BYVAL_TEMP]])
58+
// CHECK-NEXT: ret void
59+
//

0 commit comments

Comments
 (0)