Skip to content

Commit 7c2a3a9

Browse files
authored
JIT/AArch64: [macos][ZTS] Support fast path for tlv_get_addr (#7042)
* JIT/AArch64: [macos][ZTS] Support fast path for tlv_get_addr Access to TLV(thread local variable) in macOS is in "dynamic" form and function tlv_get_addr() is invoked to resolve the address. See the example in [1]. Note there is one struct TLVDescriptor [2] for each TLV. The first member holds the address of function tlv_get_addr(), and the other two members, "key" and "offset", would be used inside tlv_get_addr(). The disassembly code for function tlv_get_addr() is shown in [3]. With the value from system register, i.e. tpidrro_el0, together with "key" and "offset", the TLV address can be obtained. Note that the value from tpidrro_el0 varies for different threads, and unique address for TLV is resolved. It's worth noting that slow path would be executed, i.e. function tlv_allocate_and_initialize_for_key(), for the first time of TLV access. In this patch: 1. "_tsrm_ls_cache" is guaranteed to be accessed before any VM/JIT code during the request startup, e.g. in init_executor(), therefore, slow path can be avoided. 2. As TLVDecriptor is immutable and zend_jit_setup() executes once, we get this structure in tsrm_get_ls_cache_tcb_offset(). Note the 'ldr' instruction would be patched to 'add' by the linker. 3. Only fast path for tlv_get_addr() is implemented in macro LOAD_TSRM_CACHE. With this patch, all ~4k test cases can pass for ZTS+CALL in macOS on Apple silicon. [1] https://gist.github.com/shqking/4aab67e0105f7c1f2c549d57d5799f94 [2] https://opensource.apple.com/source/dyld/dyld-195.6/src/threadLocalVariables.c.auto.html [3] https://gist.github.com/shqking/329d7712c26bad49786ab0a544a4af43 Change-Id: I613e9c37e3ff2ecc3fab0f53f1e48a0246e12ee3
1 parent 805b391 commit 7c2a3a9

File tree

3 files changed

+36
-0
lines changed

3 files changed

+36
-0
lines changed

TSRM/TSRM.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -744,10 +744,17 @@ TSRM_API size_t tsrm_get_ls_cache_tcb_offset(void)
744744
#elif defined(__aarch64__)
745745
size_t ret;
746746

747+
# ifdef __APPLE__
748+
// Points to struct TLVDecriptor for _tsrm_ls_cache in macOS.
749+
asm("adrp %0, #__tsrm_ls_cache@TLVPPAGE\n\t"
750+
"ldr %0, [%0, #__tsrm_ls_cache@TLVPPAGEOFF]"
751+
: "=r" (ret));
752+
# else
747753
asm("mov %0, xzr\n\t"
748754
"add %0, %0, #:tprel_hi12:_tsrm_ls_cache, lsl #12\n\t"
749755
"add %0, %0, #:tprel_lo12_nc:_tsrm_ls_cache"
750756
: "=r" (ret));
757+
# endif
751758
return ret;
752759
#else
753760
return 0;

ext/opcache/jit/zend_jit.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4055,8 +4055,12 @@ ZEND_EXT_API void zend_jit_unprotect(void)
40554055
if (!(JIT_G(debug) & (ZEND_JIT_DEBUG_GDB|ZEND_JIT_DEBUG_PERF_DUMP))) {
40564056
int opts = PROT_READ | PROT_WRITE;
40574057
#ifdef ZTS
4058+
/* TODO: EXEC+WRITE is not supported in macOS. Removing EXEC is still buggy as
4059+
* other threads, which are executing the JITed code, would crash anyway. */
4060+
# ifndef __APPLE__
40584061
/* Another thread may be executing JITed code. */
40594062
opts |= PROT_EXEC;
4063+
# endif
40604064
#endif
40614065
if (mprotect(dasm_buf, dasm_size, opts) != 0) {
40624066
fprintf(stderr, "mprotect() failed [%d] %s\n", errno, strerror(errno));

ext/opcache/jit/zend_jit_arm64.dasc

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,14 @@ const char* zend_reg_name[] = {
184184

185185
#if ZTS
186186
static size_t tsrm_ls_cache_tcb_offset = 0;
187+
# ifdef __APPLE__
188+
struct TLVDescriptor {
189+
void* (*thunk)(struct TLVDescriptor*);
190+
uint64_t key;
191+
uint64_t offset;
192+
};
193+
typedef struct TLVDescriptor TLVDescriptor;
194+
# endif
187195
#endif
188196

189197
/* By default avoid JITing inline handlers if it does not seem profitable due to lack of
@@ -483,10 +491,27 @@ static int logical_immediate_p (uint64_t value, uint32_t reg_size)
483491
|| }
484492
|.endmacro
485493

494+
// Safe memory load/store with an unsigned 64-bit offset.
495+
|.macro SAFE_MEM_ACC_WITH_64_UOFFSET, ldr_str_ins, op, base_reg, offset, tmp_reg
496+
|| if (((uintptr_t)(offset)) > LDR_STR_PIMM64) {
497+
| LOAD_64BIT_VAL tmp_reg, offset
498+
| ldr_str_ins op, [base_reg, tmp_reg]
499+
|| } else {
500+
| ldr_str_ins op, [base_reg, #(offset)]
501+
|| }
502+
|.endmacro
503+
486504
|.macro LOAD_TSRM_CACHE, reg
505+
||#ifdef __APPLE__
506+
| .long 0xd53bd071 // TODO: hard-coded: mrs TMP3, tpidrro_el0
507+
| and TMP3, TMP3, #0xfffffffffffffff8
508+
| SAFE_MEM_ACC_WITH_64_UOFFSET ldr, TMP3, TMP3, (((TLVDescriptor*)tsrm_ls_cache_tcb_offset)->key << 3), TMP1
509+
| SAFE_MEM_ACC_WITH_64_UOFFSET ldr, reg, TMP3, (((TLVDescriptor*)tsrm_ls_cache_tcb_offset)->offset), TMP1
510+
||#else
487511
| .long 0xd53bd051 // TODO: hard-coded: mrs TMP3, tpidr_el0
488512
|| ZEND_ASSERT(tsrm_ls_cache_tcb_offset <= LDR_STR_PIMM64);
489513
| ldr reg, [TMP3, #tsrm_ls_cache_tcb_offset]
514+
||#endif
490515
|.endmacro
491516

492517
|.macro LOAD_ADDR_ZTS, reg, struct, field

0 commit comments

Comments
 (0)