@@ -698,90 +698,104 @@ static inline size_t _mi_os_numa_node_count(void) {
698698// -------------------------------------------------------------------
699699// Getting the thread id should be performant as it is called in the
700700// fast path of `_mi_free` and we specialize for various platforms.
701+ // We only require _mi_threadid() to return a unique id for each thread.
701702// -------------------------------------------------------------------
702703#if defined(_WIN32 )
704+
703705#define WIN32_LEAN_AND_MEAN
704706#include <windows.h>
705707static inline mi_threadid_t _mi_thread_id (void ) mi_attr_noexcept {
706708 // Windows: works on Intel and ARM in both 32- and 64-bit
707709 return (uintptr_t )NtCurrentTeb ();
708710}
709711
710- #elif defined( __GNUC__ ) && \
711- (defined( __x86_64__ ) || defined( __i386__ ) || defined( __aarch64__ ))
712-
712+ // We use assembly for a fast thread id on the main platforms. The TLS layout depends on
713+ // both the OS and libc implementation so we use specific tests for each main platform.
714+ // If you test on another platform and it works please send a PR :-)
713715// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
716+ #elif defined(__GNUC__ ) && ( \
717+ (defined(__GLIBC__ ) && (defined(__x86_64__ ) || defined(__i386__ ) || defined(__arm__ ) || defined(__aarch64__ ))) \
718+ || (defined(__APPLE__ ) && (defined(__x86_64__ ) || defined(__aarch64__ ))) \
719+ || (defined(__BIONIC__ ) && (defined(__x86_64__ ) || defined(__i386__ ) || defined(__arm__ ) || defined(__aarch64__ ))) \
720+ || (defined(__FreeBSD__ ) && (defined(__x86_64__ ) || defined(__i386__ ) || defined(__aarch64__ ))) \
721+ )
722+
714723static inline void * mi_tls_slot (size_t slot ) mi_attr_noexcept {
715724 void * res ;
716725 const size_t ofs = (slot * sizeof (void * ));
717- #if defined(__i386__ )
718- __asm__("movl %%gs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86 32-bit always uses GS
719- #elif defined(__APPLE__ ) && defined(__x86_64__ )
720- __asm__("movq %%gs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86_64 macOSX uses GS
721- #elif defined(__x86_64__ ) && (MI_INTPTR_SIZE == 4 )
722- __asm__("movl %%fs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x32 ABI
723- #elif defined(__x86_64__ )
724- __asm__("movq %%fs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86_64 Linux, BSD uses FS
725- #elif defined(__arm__ ) // arm32: defined but currently not used (see issue #495)
726- void * * tcb ; MI_UNUSED (ofs );
727- __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb ));
728- res = tcb [slot ];
729- #elif defined(__aarch64__ )
730- void * * tcb ; MI_UNUSED (ofs );
731- #if defined(__APPLE__ ) // M1, issue #343
732- __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb ));
733- tcb = (void * * )((uintptr_t )tcb & ~0x07UL ); // clear lower 3 bits
734- #else
735- __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb ));
726+ #if defined(__i386__ )
727+ __asm__("movl %%gs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86 32-bit always uses GS
728+ #elif defined(__APPLE__ ) && defined(__x86_64__ )
729+ __asm__("movq %%gs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86_64 macOSX uses GS
730+ #elif defined(__x86_64__ ) && (MI_INTPTR_SIZE == 4 )
731+ __asm__("movl %%fs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x32 ABI
732+ #elif defined(__x86_64__ )
733+ __asm__("movq %%fs:%1, %0" : "=r" (res ) : "m" (* ((void * * )ofs )) : ); // x86_64 Linux, BSD uses FS
734+ #elif defined(__arm__ )
735+ void * * tcb ; MI_UNUSED (ofs );
736+ __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb ));
737+ res = tcb [slot ];
738+ #elif defined(__aarch64__ )
739+ void * * tcb ; MI_UNUSED (ofs );
740+ #if defined(__APPLE__ ) // M1, issue #343
741+ __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb ));
742+ #else
743+ __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb ));
744+ #endif
745+ res = tcb [slot ];
736746 #endif
737- res = tcb [slot ];
738- #endif
739747 return res ;
740748}
741749
742- // setting a tls slot is only used on macOSX for now
750+ // setting a tls slot is only used on macOS for now
743751static inline void mi_tls_slot_set (size_t slot , void * value ) mi_attr_noexcept {
744752 const size_t ofs = (slot * sizeof (void * ));
745- #if defined(__i386__ )
746- __asm__("movl %1,%%gs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // 32-bit always uses GS
747- #elif defined(__APPLE__ ) && defined(__x86_64__ )
748- __asm__("movq %1,%%gs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x86_64 macOSX uses GS
749- #elif defined(__x86_64__ ) && (MI_INTPTR_SIZE == 4 )
750- __asm__("movl %1,%%fs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x32 ABI
751- #elif defined(__x86_64__ )
752- __asm__("movq %1,%%fs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x86_64 Linux, BSD uses FS
753- #elif defined(__arm__ )
754- void * * tcb ; MI_UNUSED (ofs );
755- __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb ));
756- tcb [slot ] = value ;
757- #elif defined(__aarch64__ )
758- void * * tcb ; MI_UNUSED (ofs );
759- #if defined(__APPLE__ ) // M1, issue #343
760- __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb ));
761- tcb = (void * * )((uintptr_t )tcb & ~0x07UL ); // clear lower 3 bits
762- #else
763- __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb ));
753+ #if defined(__i386__ )
754+ __asm__("movl %1,%%gs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // 32-bit always uses GS
755+ #elif defined(__APPLE__ ) && defined(__x86_64__ )
756+ __asm__("movq %1,%%gs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x86_64 macOS uses GS
757+ #elif defined(__x86_64__ ) && (MI_INTPTR_SIZE == 4 )
758+ __asm__("movl %1,%%fs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x32 ABI
759+ #elif defined(__x86_64__ )
760+ __asm__("movq %1,%%fs:%0" : "=m" (* ((void * * )ofs )) : "rn" (value ) : ); // x86_64 Linux, BSD uses FS
761+ #elif defined(__arm__ )
762+ void * * tcb ; MI_UNUSED (ofs );
763+ __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb ));
764+ tcb [slot ] = value ;
765+ #elif defined(__aarch64__ )
766+ void * * tcb ; MI_UNUSED (ofs );
767+ #if defined(__APPLE__ ) // M1, issue #343
768+ __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb ));
769+ #else
770+ __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb ));
771+ #endif
772+ tcb [slot ] = value ;
764773 #endif
765- tcb [slot ] = value ;
766- #endif
767774}
768775
769776static inline mi_threadid_t _mi_thread_id (void ) mi_attr_noexcept {
770- #if defined(__ANDROID__ ) && (defined(__arm__ ) || defined(__aarch64__ ))
771- // issue #384, #495: on arm Android, slot 1 is the thread ID (pointer to pthread internal struct)
772- return (uintptr_t )mi_tls_slot (1 );
773- #else
774- // in all our other targets, slot 0 is the pointer to the thread control block
775- return (uintptr_t )mi_tls_slot (0 );
776- #endif
777+ #if defined(__BIONIC__ )
778+ // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
779+ // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86
780+ return (uintptr_t )mi_tls_slot (1 );
781+ #else
782+ // in all our other targets, slot 0 is the thread id
783+ // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h
784+ // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36
785+ return (uintptr_t )mi_tls_slot (0 );
786+ #endif
777787}
788+
778789#else
779- // otherwise use portable C
790+
791+ // otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).
780792static inline mi_threadid_t _mi_thread_id (void ) mi_attr_noexcept {
781793 return (uintptr_t )& _mi_heap_default ;
782794}
795+
783796#endif
784797
798+
785799// -----------------------------------------------------------------------
786800// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
787801// -----------------------------------------------------------------------
0 commit comments