Skip to content

Commit 919021b

Browse files
authored
[Arm64EC] Add support for half (#152843)
`f16` is passed and returned in vector registers on both x86 on AArch64, the same calling convention as `f32`, so it is a straightforward type to support. The calling convention support already exists, added as part of a6065f0 ("Arm64EC entry/exit thunks, consolidated. (#79067)"). Thus, add mangling and remove the error in order to make `half` work. MSVC does not yet support `_Float16`, so for now this will remain an LLVM-only extension. Fixes the `f16` portion of #94434
1 parent 8cdab07 commit 919021b

File tree

7 files changed

+100
-53
lines changed

7 files changed

+100
-53
lines changed

llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,12 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
316316
ThunkArgTranslation::PointerIndirection};
317317
};
318318

319+
if (T->isHalfTy()) {
320+
// Prefix with `llvm` since MSVC doesn't specify `_Float16`
321+
Out << "__llvm_h__";
322+
return direct(T);
323+
}
324+
319325
if (T->isFloatTy()) {
320326
Out << "f";
321327
return direct(T);
@@ -327,8 +333,8 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
327333
}
328334

329335
if (T->isFloatingPointTy()) {
330-
report_fatal_error(
331-
"Only 32 and 64 bit floating points are supported for ARM64EC thunks");
336+
report_fatal_error("Only 16, 32, and 64 bit floating points are supported "
337+
"for ARM64EC thunks");
332338
}
333339

334340
auto &DL = M->getDataLayout();
@@ -342,8 +348,16 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
342348
uint64_t ElementCnt = T->getArrayNumElements();
343349
uint64_t ElementSizePerBytes = DL.getTypeSizeInBits(ElementTy) / 8;
344350
uint64_t TotalSizeBytes = ElementCnt * ElementSizePerBytes;
345-
if (ElementTy->isFloatTy() || ElementTy->isDoubleTy()) {
346-
Out << (ElementTy->isFloatTy() ? "F" : "D") << TotalSizeBytes;
351+
if (ElementTy->isHalfTy() || ElementTy->isFloatTy() ||
352+
ElementTy->isDoubleTy()) {
353+
if (ElementTy->isHalfTy())
354+
// Prefix with `llvm` since MSVC doesn't specify `_Float16`
355+
Out << "__llvm_H__";
356+
else if (ElementTy->isFloatTy())
357+
Out << "F";
358+
else if (ElementTy->isDoubleTy())
359+
Out << "D";
360+
Out << TotalSizeBytes;
347361
if (Alignment.value() >= 16 && !Ret)
348362
Out << "a" << Alignment.value();
349363
if (TotalSizeBytes <= 8) {
@@ -355,8 +369,9 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
355369
return pointerIndirection(T);
356370
}
357371
} else if (T->isFloatingPointTy()) {
358-
report_fatal_error("Only 32 and 64 bit floating points are supported for "
359-
"ARM64EC thunks");
372+
report_fatal_error(
373+
"Only 16, 32, and 64 bit floating points are supported "
374+
"for ARM64EC thunks");
360375
}
361376
}
362377

llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,10 @@ define i64 @simple_integers(i8, i16, i32, i64) nounwind {
8585
ret i64 0
8686
}
8787

88-
; NOTE: Only float and double are supported.
89-
define double @simple_floats(float, double) nounwind {
90-
; CHECK-LABEL: .def $ientry_thunk$cdecl$d$fd;
91-
; CHECK: .section .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$d$fd
88+
; NOTE: Only half, float, and double are supported.
89+
define double @simple_floats(half, float, double) nounwind {
90+
; CHECK-LABEL: .def $ientry_thunk$cdecl$d$__llvm_h__fd;
91+
; CHECK: .section .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$d$__llvm_h__fd
9292
; CHECK: // %bb.0:
9393
; CHECK-NEXT: stp q6, q7, [sp, #-176]! // 32-byte Folded Spill
9494
; CHECK-NEXT: .seh_save_any_reg_px q6, 176
@@ -600,7 +600,7 @@ start:
600600
; CHECK-NEXT: .symidx $ientry_thunk$cdecl$i8$i8i8i8i8
601601
; CHECK-NEXT: .word 1
602602
; CHECK-NEXT: .symidx "#simple_floats"
603-
; CHECK-NEXT: .symidx $ientry_thunk$cdecl$d$fd
603+
; CHECK-NEXT: .symidx $ientry_thunk$cdecl$d$__llvm_h__fd
604604
; CHECK-NEXT: .word 1
605605
; CHECK-NEXT: .symidx "#has_varargs"
606606
; CHECK-NEXT: .symidx $ientry_thunk$cdecl$v$varargs

llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,10 @@ declare i64 @simple_integers(i8, i16, i32, i64) nounwind;
9393
; CHECK-NEXT: .seh_endfunclet
9494
; CHECK-NEXT: .seh_endproc
9595

96-
; NOTE: Only float and double are supported.
97-
declare double @simple_floats(float, double) nounwind;
98-
; CHECK-LABEL: .def $iexit_thunk$cdecl$d$fd;
99-
; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$d$fd
96+
; NOTE: Only half, float, and double are supported.
97+
declare double @simple_floats(half, float, double) nounwind;
98+
; CHECK-LABEL: .def $iexit_thunk$cdecl$d$__llvm_h__fd;
99+
; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$d$__llvm_h__fd
100100
; CHECK: // %bb.0:
101101
; CHECK-NEXT: sub sp, sp, #48
102102
; CHECK-NEXT: .seh_stackalloc 48
@@ -129,8 +129,8 @@ declare double @simple_floats(float, double) nounwind;
129129
; CHECK-NEXT: adrp x11, simple_floats
130130
; CHECK-NEXT: add x11, x11, :lo12:simple_floats
131131
; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall]
132-
; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$d$fd
133-
; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$d$fd
132+
; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$d$__llvm_h__fd
133+
; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$d$__llvm_h__fd
134134
; CHECK-NEXT: blr x8
135135
; CHECK-NEXT: .seh_startepilogue
136136
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -282,33 +282,36 @@ declare void @has_aligned_sret(ptr align 32 sret(%TSRet)) nounwind;
282282
; CHECK: .seh_endfunclet
283283
; CHECK: .seh_endproc
284284

285-
declare [2 x i8] @small_array([2 x i8], [2 x float]) nounwind;
286-
; CHECK-LABEL: .def $iexit_thunk$cdecl$m2$m2F8;
287-
; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m2$m2F8
285+
declare [2 x i8] @small_array([2 x i8], [2 x half], [2 x float]) nounwind;
286+
; CHECK-LABEL: .def $iexit_thunk$cdecl$m2$m2__llvm_H__4F8;
287+
; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m2$m2__llvm_H__4F8
288288
; CHECK: // %bb.0:
289-
; CHECK-NEXT: sub sp, sp, #64
290-
; CHECK-NEXT: .seh_stackalloc 64
291-
; CHECK-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill
292-
; CHECK-NEXT: .seh_save_fplr 48
293-
; CHECK-NEXT: add x29, sp, #48
294-
; CHECK-NEXT: .seh_add_fp 48
289+
; CHECK-NEXT: sub sp, sp, #80
290+
; CHECK-NEXT: .seh_stackalloc 80
291+
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
292+
; CHECK-NEXT: .seh_save_fplr 64
293+
; CHECK-NEXT: add x29, sp, #64
294+
; CHECK-NEXT: .seh_add_fp 64
295295
; CHECK-NEXT: .seh_endprologue
296-
; CHECK-NEXT: sturb w1, [x29, #-1]
297-
; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect
298-
; CHECK-NEXT: sturb w0, [x29, #-2]
299-
; CHECK-NEXT: ldr x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect]
300-
; CHECK-NEXT: stp s0, s1, [x29, #-12]
301-
; CHECK-NEXT: ldurh w0, [x29, #-2]
302-
; CHECK-NEXT: ldur x1, [x29, #-12]
303-
; CHECK-NEXT: blr x16
304-
; CHECK-NEXT: mov w0, w8
305-
; CHECK-NEXT: sturh w8, [x29, #-14]
306-
; CHECK-NEXT: ubfx w1, w8, #8, #8
296+
; CHECK-NEXT: sturb w0, [x29, #-2]
297+
; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect
298+
; CHECK-NEXT: sturb w1, [x29, #-1]
299+
; CHECK-NEXT: ldr x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect]
300+
; CHECK-NEXT: stur h0, [x29, #-6]
301+
; CHECK-NEXT: ldurh w0, [x29, #-2]
302+
; CHECK-NEXT: stur h1, [x29, #-4]
303+
; CHECK-NEXT: stp s2, s3, [x29, #-16]
304+
; CHECK-NEXT: ldur w1, [x29, #-6]
305+
; CHECK-NEXT: ldur x2, [x29, #-16]
306+
; CHECK-NEXT: blr x16
307+
; CHECK-NEXT: mov w0, w8
308+
; CHECK-NEXT: sturh w8, [x29, #-18]
309+
; CHECK-NEXT: ubfx w1, w8, #8, #8
307310
; CHECK-NEXT: .seh_startepilogue
308-
; CHECK-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload
309-
; CHECK-NEXT: .seh_save_fplr 48
310-
; CHECK-NEXT: add sp, sp, #64
311-
; CHECK-NEXT: .seh_stackalloc 64
311+
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
312+
; CHECK-NEXT: .seh_save_fplr 64
313+
; CHECK-NEXT: add sp, sp, #80
314+
; CHECK-NEXT: .seh_stackalloc 80
312315
; CHECK-NEXT: .seh_endepilogue
313316
; CHECK-NEXT: ret
314317
; CHECK-NEXT: .seh_endfunclet
@@ -325,8 +328,8 @@ declare [2 x i8] @small_array([2 x i8], [2 x float]) nounwind;
325328
; CHECK-NEXT: adrp x11, small_array
326329
; CHECK-NEXT: add x11, x11, :lo12:small_array
327330
; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall]
328-
; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m2$m2F8
329-
; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m2$m2F8
331+
; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m2$m2__llvm_H__4F8
332+
; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m2$m2__llvm_H__4F8
330333
; CHECK-NEXT: blr x8
331334
; CHECK-NEXT: .seh_startepilogue
332335
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -577,7 +580,7 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
577580
; CHECK-NEXT: .symidx simple_integers
578581
; CHECK-NEXT: .word 0
579582
; CHECK-NEXT: .symidx simple_floats
580-
; CHECK-NEXT: .symidx $iexit_thunk$cdecl$d$fd
583+
; CHECK-NEXT: .symidx $iexit_thunk$cdecl$d$__llvm_h__fd
581584
; CHECK-NEXT: .word 4
582585
; CHECK-NEXT: .symidx "#simple_floats$exit_thunk"
583586
; CHECK-NEXT: .symidx simple_floats
@@ -601,7 +604,7 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
601604
; CHECK-NEXT: .symidx has_aligned_sret
602605
; CHECK-NEXT: .word 0
603606
; CHECK-NEXT: .symidx small_array
604-
; CHECK-NEXT: .symidx $iexit_thunk$cdecl$m2$m2F8
607+
; CHECK-NEXT: .symidx $iexit_thunk$cdecl$m2$m2__llvm_H__4F8
605608
; CHECK-NEXT: .word 4
606609
; CHECK-NEXT: .symidx "#small_array$exit_thunk"
607610
; CHECK-NEXT: .symidx small_array
@@ -634,14 +637,14 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
634637
define void @func_caller() nounwind {
635638
call void @no_op()
636639
call i64 @simple_integers(i8 0, i16 0, i32 0, i64 0)
637-
call double @simple_floats(float 0.0, double 0.0)
640+
call double @simple_floats(half 0.0, float 0.0, double 0.0)
638641
call void (...) @has_varargs()
639642
%c = alloca i8
640643
call void @has_sret(ptr sret([100 x i8]) %c)
641644
%aligned = alloca %TSRet, align 32
642645
store %TSRet { i64 0, i64 0 }, ptr %aligned, align 32
643646
call void @has_aligned_sret(ptr align 32 sret(%TSRet) %aligned)
644-
call [2 x i8] @small_array([2 x i8] [i8 0, i8 0], [2 x float] [float 0.0, float 0.0])
647+
call [2 x i8] @small_array([2 x i8] [i8 0, i8 0], [2 x half] [half 0.0, half 0.0], [2 x float] [float 0.0, float 0.0])
645648
call [3 x i64] @large_array([3 x i64] [i64 0, i64 0, i64 0], [2 x double] [double 0.0, double 0.0], [2 x [2 x i64]] [[2 x i64] [i64 0, i64 0], [2 x i64] [i64 0, i64 0]])
646649
call %T2 @simple_struct(%T1 { i16 0 }, %T2 { i32 0, float 0.0 }, %T3 { i64 0, double 0.0 }, %T4 { i64 0, double 0.0, i8 0 })
647650
call <4 x i8> @small_vector(<4 x i8> <i8 0, i8 0, i8 0, i8 0>)

llvm/test/CodeGen/AArch64/frexp-arm64ec.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22

33
; Separate from llvm-frexp.ll test because this errors on half cases
44

5+
; ARM64EC-LABEL: test_frexp_f16_i32
6+
; ARM64EC: fcvt d0, h0
7+
; ARM64EC: bl "#frexp"
8+
; ARM64EC: fcvt h0, d0
9+
define { half, i32 } @test_frexp_f16_i32(half %a) {
10+
%result = call { half, i32 } @llvm.frexp.f16.i32(half %a)
11+
ret { half, i32 } %result
12+
}
13+
514
; ARM64EC-LABEL: test_frexp_f32_i32
615
; ARM64EC: fcvt d0, s0
716
; ARM64EC: bl "#frexp"

llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,15 @@
33

44
; Separate from ldexp.ll test because this errors on half cases
55

6+
; ARM64EC-LABEL: ldexp_f16 =
7+
; ARM64EC: fcvt d0, h0
8+
; ARM64EC: bl "#ldexp"
9+
; ARM64EC: fcvt h0, d0
10+
define half @ldexp_f16(half %val, i32 %a) {
11+
%call = call half @llvm.ldexp.f16(half %val, i32 %a)
12+
ret half %call
13+
}
14+
615
; ARM64EC-LABEL: ldexp_f32 =
716
; ARM64EC: fcvt d0, s0
817
; ARM64EC: bl "#ldexp"

llvm/test/CodeGen/AArch64/powi-arm64ec.ll

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,18 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -mtriple=arm64ec-windows-msvc < %s | FileCheck -check-prefix=ARM64EC %s
33

4-
declare double @llvm.powi.f64.i32(double, i32)
4+
declare half @llvm.powi.f16.i32(half, i32)
55
declare float @llvm.powi.f32.i32(float, i32)
6+
declare double @llvm.powi.f64.i32(double, i32)
7+
8+
; ARM64EC-LABEL: powi_f16
9+
; ARM64EC: fcvt s0, h0
10+
; ARM64EC: scvtf s1, w0
11+
; ARM64EC: bl "#powf"
12+
define half @powi_f16(half %x, i32 %n) nounwind {
13+
%ret = tail call half @llvm.powi.f16.i32(half %x, i32 %n)
14+
ret half %ret
15+
}
616

717
; ARM64EC-LABEL: powi_f32
818
; ARM64EC: scvtf s1, w0

llvm/test/CodeGen/Generic/half.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@
77
; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-apple-darwin | FileCheck %s --check-prefixes=ALL,CHECK %}
88
; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK %}
99
; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=aarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK %}
10-
; FIXME(#94434) unsupported on arm64ec
11-
; RUN: %if aarch64-registered-target %{ ! llc %s -o - -mtriple=arm64ec-pc-windows-msvc -filetype=null %}
10+
; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=arm64ec-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK %}
1211
; RUN: %if amdgpu-registered-target %{ llc %s -o - -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=ALL,CHECK %}
1312
; RUN: %if arc-registered-target %{ llc %s -o - -mtriple=arc-elf | FileCheck %s --check-prefixes=ALL,CHECK %}
1413
; RUN: %if arm-registered-target %{ llc %s -o - -mtriple=arm-unknown-linux-gnueabi | FileCheck %s --check-prefixes=ALL,CHECK %}
@@ -47,6 +46,8 @@
4746
; RUN: %if xcore-registered-target %{ llc %s -o - -mtriple=xcore-unknown-unknown | FileCheck %s --check-prefixes=ALL,CHECK %}
4847
; RUN: %if xtensa-registered-target %{ llc %s -o - -mtriple=xtensa-none-elf | FileCheck %s --check-prefixes=ALL,CHECK %}
4948

49+
; Note that arm64ec labels are quoted, hence the `{{"?}}:`.
50+
5051
; Codegen tests don't work the same for graphics targets. Add a dummy directive
5152
; for filecheck, just make sure we don't crash.
5253
; NOCRASH: {{.*}}
@@ -58,7 +59,7 @@
5859
; Regression test for https://github.com/llvm/llvm-project/issues/97981.
5960

6061
define half @from_bits(i16 %bits) nounwind {
61-
; ALL-LABEL: from_bits:
62+
; ALL-LABEL: from_bits{{"?}}:
6263
; CHECK-NOT: __extend
6364
; CHECK-NOT: __trunc
6465
; CHECK-NOT: __gnu
@@ -68,7 +69,7 @@ define half @from_bits(i16 %bits) nounwind {
6869
}
6970

7071
define i16 @to_bits(half %f) nounwind {
71-
; ALL-LABEL: to_bits:
72+
; ALL-LABEL: to_bits{{"?}}:
7273
; CHECK-NOT: __extend
7374
; CHECK-NOT: __trunc
7475
; CHECK-NOT: __gnu
@@ -81,7 +82,7 @@ define i16 @to_bits(half %f) nounwind {
8182
; https://github.com/llvm/llvm-project/issues/117337 and similar issues.
8283

8384
define half @check_freeze(half %f) nounwind {
84-
; ALL-LABEL: check_freeze:
85+
; ALL-LABEL: check_freeze{{"?}}:
8586
%t0 = freeze half %f
8687
ret half %t0
8788
}

0 commit comments

Comments
 (0)