Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions openmp/runtime/cmake/LibompGetArchitecture.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ function(libomp_get_architecture return_arch)
set(detect_arch_src_txt "
#if defined(__KNC__)
#error ARCHITECTURE=mic
#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__arm64ec__) || defined(_M_ARM64EC)
#error ARCHITECTURE=aarch64
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we should leave a comment here to say that arm64ec also has x86_64 defined, and therefore has to be ordered here before that (so nobody tries to reorder it back later)?

Then again, if we add a comment here we should probably do the same in kmp_platform.h below too. So perhaps it's not necessary; once things are working well enough we'll probably build this regularly enough anyway, so we'd catch any breakage that way.

#elif defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
#error ARCHITECTURE=x86_64
#elif defined(__i386) || defined(__i386__) || defined(__IA32__) || defined(_M_I86) || defined(_M_IX86) || defined(__X86__) || defined(_X86_)
Expand All @@ -37,8 +39,6 @@ function(libomp_get_architecture return_arch)
#error ARCHITECTURE=arm
#elif defined(__ARM64_ARCH_8_32__)
#error ARCHITECTURE=aarch64_32
#elif defined(__aarch64__) || defined(_M_ARM64)
#error ARCHITECTURE=aarch64
#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)
#error ARCHITECTURE=ppc64le
#elif defined(__powerpc64__)
Expand Down
9 changes: 5 additions & 4 deletions openmp/runtime/src/kmp_platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,16 @@
#define KMP_ARCH_SPARC 0

#if KMP_OS_WINDOWS
#if defined(_M_AMD64) || defined(__x86_64)
#undef KMP_ARCH_X86_64
#define KMP_ARCH_X86_64 1
#elif defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm64ec__) || \
defined(_M_ARM64EC)
#undef KMP_ARCH_AARCH64
#define KMP_ARCH_AARCH64 1
#elif defined(__arm__) || defined(_M_ARM)
#undef KMP_ARCH_ARMV7
#define KMP_ARCH_ARMV7 1
#elif defined(_M_AMD64) || defined(__x86_64)
#undef KMP_ARCH_X86_64
#define KMP_ARCH_X86_64 1
#else
#undef KMP_ARCH_X86
#define KMP_ARCH_X86 1
Expand Down
103 changes: 103 additions & 0 deletions openmp/runtime/src/z_Linux_asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ KMP_PREFIX_UNDERSCORE($0):
.globl KMP_PREFIX_UNDERSCORE(\proc)
KMP_PREFIX_UNDERSCORE(\proc):
.endm

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Stray change

# else // KMP_OS_DARWIN || KMP_OS_WINDOWS
# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
// Format labels so that they don't override function names in gdb's backtraces
Expand Down Expand Up @@ -1301,6 +1302,106 @@ KMP_LABEL(kmp_no_args):
// '
#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32)

#ifdef __arm64ec__

//------------------------------------------------------------------------
// int
// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
// int gtid, int tid,
// int argc, void *p_argv[]) {
// (*pkfn)( & gtid, & tid, argv[0], ... );
// return 1;
// }
//
// parameters:
// x0: pkfn
// w1: gtid
// w2: tid
// w3: argc
// x4: p_argv
//
// locals:
// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
// __tid: tid parm pushed on stack so can pass &tid to pkfn
//
// reg temps:
// x8: used as temporary for stack placement calculation
// w9: used as temporary for number of pkfn parms
// x10: used to traverse p_argv array
// x11: used to hold pkfn address
// x12: used as temporary for stack parameters
// x19: used to preserve exit_frame_ptr, callee-save
//
// return: w0 (always 1/TRUE)
//

__gtid = 4
__tid = 8

// -- Begin __kmp_invoke_microtask
// mark_begin;
.section .text,"xr",discard,"#__kmp_invoke_microtask"
.globl "#__kmp_invoke_microtask"
ALIGN 2
"#__kmp_invoke_microtask":
stp x29, x30, [sp, #-16]!
mov x29, sp

mov w9, #1
add w9, w9, w3, lsr #1
sub sp, sp, w9, uxtw #4
mov x8, sp

mov x11, x0
str w1, [x29, #-__gtid]
str w2, [x29, #-__tid]
mov w9, w3
mov x10, x4

sub x0, x29, #__gtid
sub x1, x29, #__tid
mov x4, sp
mov w5, #0

cbz w9, KMP_LABEL(kmp_1)
ldr x2, [x10]

sub w9, w9, #1
cbz w9, KMP_LABEL(kmp_1)
ldr x3, [x10, #8]!
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The calling convention details are a bit unclear to me here; we're accepting arguments to this function up to x4 in registers to this function, but for the called function we only pass up to x3 in registers and the rest on the stack? Is this something that the thunks and __os_arm64x_check_icall sort out if the called function is arm64ec, reloading the later arguments into registers? Or is this that the called function has a different calling convention as a variadic function?

I'm a little unsure about this aspect, since I'm not sure if the called function really is a variadic function; I have a faint memory that this function is used as adapter for calling non-variadic functions with a varying number of arguments. Did you manage to test this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

__os_arm64x_check_icall can’t resolve the target correctly if it’s ARM64EC. In that case, it leaves the target address unchanged and we call the function directly. As you noted, this means that if the target isn’t actually a varargs function, it will misinterpret the arguments.

If we want to support such cases, ARM64EC alone likely won’t suffice. We may need to build x86_64 assembly instead. If we do that and the target is ARM64EC, control will return from the emulator to the entry chunk, which should handle it correctly. The downside is that it's a bit tricky for the build system to handle. I’ll investigate this further.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another aspect overall here, is that openmp involves a lot of compiler generated calls to the helper functions; I'm curious about whether all of that really works out correctly without any further touches anywhere; both for the simple case of everything compiled as arm64ec (which would be fine if as such as a first step, if that works). Mixing in object files built as x86_64 would be the advanced case...


sub w5, w9, #1
lsl w5, w5, #3

KMP_LABEL(kmp_0):
cbz w9, KMP_LABEL(kmp_1)
ldr x12, [x10, #8]!
str x12, [x8], #8
sub w9, w9, #1
b KMP_LABEL(kmp_0)
KMP_LABEL(kmp_1):
adrp x10, $iexit_thunk$cdecl$v$varargs
add x10, x10, :lo12:$iexit_thunk$cdecl$v$varargs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which object file provides this symbol?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A number of .cpp files from openmp generate this exit thunk, for example for __kmp_fatal guest exit thunk.

adrp x8, __os_arm64x_check_icall
ldr x8, [x8, :lo12:__os_arm64x_check_icall]
blr x8
blr x11
mov w0, #1
mov sp, x29
ldp x29, x30, [sp], #16
ret

.weak_anti_dep __kmp_invoke_microtask
.set __kmp_invoke_microtask, "#__kmp_invoke_microtask"

.section .hybmp$x,"yi"
.symidx "#__kmp_invoke_microtask"
.symidx $ientry_thunk$cdecl$i8$i8i8i8i8i8
.word 1
// -- End __kmp_invoke_microtask

#else

//------------------------------------------------------------------------
// int
// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
Expand Down Expand Up @@ -1425,6 +1526,8 @@ KMP_LABEL(kmp_1):
DEBUG_INFO __kmp_invoke_microtask
// -- End __kmp_invoke_microtask

#endif

#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) */

#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM
Expand Down
Loading