From f4e38914257558935a3e5225868ea3d02d595d74 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Sun, 23 Mar 2025 16:45:24 +0100 Subject: [PATCH] [libc++] Document our ABI guarantees and what ABI flags exist to modify these guarantees --- libcxx/docs/ABIGuarantees.rst | 210 +++++++++++++++++++++++++++ libcxx/docs/UserDocumentation.rst | 2 + libcxx/docs/VendorDocumentation.rst | 5 + libcxx/docs/index.rst | 1 + libcxx/include/__configuration/abi.h | 126 ++++------------ 5 files changed, 248 insertions(+), 96 deletions(-) create mode 100644 libcxx/docs/ABIGuarantees.rst diff --git a/libcxx/docs/ABIGuarantees.rst b/libcxx/docs/ABIGuarantees.rst new file mode 100644 index 0000000000000..c25aaa8e42330 --- /dev/null +++ b/libcxx/docs/ABIGuarantees.rst @@ -0,0 +1,210 @@ +.. _ABIGuarantees: + +======================= +libc++'s ABI Guarantees +======================= + +libc++ provides multiple types of ABI guarantees. These include stability of the layout of structs, the linking of TUs +built against different versions and configurations of the library, and more. This document describes what guarantees +libc++ provides in these different areas as well as what options exist for vendors to affect these guarantees. + +Note that all of the guarantees listed below come with an asterisk that there may be circumstances where we deem it +worth it to break that guarantee. These breaks are communicated to vendors by CCing #libcxx-vendors on GitHub. If you +are a vendor, please ask to be added to that group to be notified about changes that potentially affect you. + +ABI flags +========= +All the ABI flags listed below can be added to the ``__config_site`` header by the vendor to opt in to an ABI breaking +change. These flags should never be set by the user. When porting libc++ to a new platform, vendord should consider +which flags to enable, assuming that ABI stability is relevant to them. Please contact the libc++ team on Discord or +through other means to be able to make an informed decision on which flags make sense to enable, and to avoid enabling +flags which may not be stable. Flags can be enabled via the ``LIBCXX_ABI_DEFINES`` CMake option. + + +Stability of the Layout of Structs +================================== + +The layout of any user-observable struct is kept stable across versions of the library and any user-facing options +documented :ref:`here `. There are a lot of structs that have internal names, but are none +the less observable by users; for example through public aliases to these types or because they affect the layout of +other types. + +There are multiple ABI flags which affect the layout of certain structs: + +``_LIBCPP_ABI_ALTERNATE_STRING_LAYOUT`` +--------------------------------------- +This changes the internal layout of ``basic_string`` to move the section that is used for the internal buffer to the +front, making it eight byte aligned instead of being unaligned, improving the performance of some operations +significantly. + +``_LIBCPP_ABI_NO_ITERATOR_BASES`` +--------------------------------- +This removes the ``iterator`` base class from ``back_insert_iterator``, ``front_insert_iterator``, ``insert_iterator``, +``istream_iterator``, ``ostream_iterator``, ``ostreambuf_itreator``, ``reverse_iterator``, and ``raw_storage_iterator``. +This doesn't directly affect the layout of these types in most cases, but may result in more padding being used when +they are used in combination, for example ``reverse_iterator>``. + +``_LIBCPP_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION`` +------------------------------------------------- +This changes the index type used inside ``variant`` to the smallest required type to reduce the datasize of variants in +most cases. + +``_LIBCPP_ABI_OPTIMIZED_FUNCTION`` +---------------------------------- +This significantly restructures how ``function`` is written to provide better performance, but is currently not ABI +stable. + +``_LIBCPP_ABI_NO_RANDOM_DEVICE_COMPATIBILITY_LAYOUT`` +----------------------------------------------------- +This changes the layout of ``random_device`` to only holds state with an implementation that gets entropy from a file +(see ``_LIBCPP_USING_DEV_RANDOM``). When switching from this implementation to another one on a platform that has +already shipped ``random_device``, one needs to retain the same object layout to remain ABI compatible. This flag +removes these workarounds for platforms that don't care about ABI compatibility. + +``_LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING`` +------------------------------------------ +This removes artifical padding from ``_LIBCPP_COMPRESSED_PAIR`` and ``_LIBCPP_COMPRESSED_TRIPLE``. + +These macros are used inside the associative and unordered containers, ``deque``, ``forward_list``, ``future``, +``list``, ``basic_string``, ``function``, ``shared_ptr``, ``unique_ptr``, and ``vector`` to stay ABI compatible with the +legacy ``__compressed_pair`` type. ``__compressed_pair`` had historically been used to reduce storage requirements in +the case of empty types, but has been replaced by ``[[no_unique_address]]``. ``[[no_unique_address]]`` is significantly +lighter in terms of compile time and debug information, and also improves the layout of structs further. However, to +keep ABI stability, the additional improvements in layout had to be reverted by introducing artificial padding. This +flag removes that artificial padding. + +``_LIBCPP_ABI_IOS_ALLOW_ARBITRARY_FILL_VALUE`` +---------------------------------------------- +``basic_ios`` uses ``WEOF`` to indicate that the fill value is uninitialized. However, on platforms where the size of +``char_type`` is equal to or greater than the size of ``int_type`` and ``char_type`` is unsigned, +``char_traits::eq_int_type()`` cannot distinguish between ``WEOF`` and ``WCHAR_MAX``. This flag changes +``basic_ios`` to instead track whether the fill value has been initialized using a separate boolean. + + +Linking TUs which have been compiled against different releases of libc++ +========================================================================= +libc++ supports linking TUs which have beeen compiled against different releases of libc++ by marking symbols with +hidden visibility and changing the mangling of header-only functions in every release. + + +Linking TUs which have been compiled with different flags affecting code gen +============================================================================ +There are a lot of compiler (and library) flags which change the code generated for functions. This includes flags like +``-O1``, which are guaranteed by the compiler to not change the observable behaviour of a correct program, as well as +flags like ``-fexceptions``, which **do** change the observable behaviour. libc++ allows linking of TUs which have been +compiled whith specific flags only and makes no guarantees for any of the flags not listed below. + +The flags allowed (in any combination) are: +- ``-f[no-]exceptions`` +- ``-D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE{_FAST,_EXTENSIVE,_DEBUG,_NONE}`` + +Note that this does not provide any guarantees about user-defined functions, but only that the libc++ functions linked +behave as the flags say. + + +Availability of symbols in the built library (both static and shared) +===================================================================== +In general, libc++ does not make any guarantees about forwards-compability. That is, a TU compiled against new headers +may not work with an older library. Vendors who require such support can leverage availability markup. On the other +hand, backwards compatibility is generally guaranteed. + +There are multiple ABI flags that change the symbols exported from the built library: + +``_LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON`` +------------------------------------------------- +This removes ``__basic_string_common::__throw_length_error()`` and +``__basic_string_common::__throw_out_of_range()``. These symbols have been used by ``basic_string`` in the past, +but are not referenced from the headers anymore. + +``_LIBCPP_ABI_DO_NOT_EXPORT_VECTOR_BASE_COMMON`` +------------------------------------------------ +This removes ``__vector_base_common::__throw_length_error()`` and +``__vector_base_common::__throw_out_of_range()``. These symbols have been used by ``vector`` in the past, but are +not referenced from the headers anymore. + +``_LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10`` +---------------------------------------------- +This removes ``__itoa::__u32toa()`` and ``__iota::__u64toa``. These symbols have been used by ``to_chars`` in the past, +but are not referenced from the headers anymore. + +``_LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION`` +------------------------------------------------------- +This replaces the symbols that are exported for ``basic_string`` to avoid exporting functions which are likely to be +inlined as well as explicitly moving paths to the built library which are slow, improving fast-path inlining of multiple +functions. This flag is currently unstable. + + +Stability of the traits of a type +================================= +Whether a particular trait of a type is kept stable depends heavily on the type in question and the trait. The most +important trait of a type to keep stable is the triviality for the purpose of calls, since that directly affects the +function call ABI. Which types are considered non-trivial for the purpose of calls is defined in the +`Itanium ABI `_. +``is_trivially_copyable`` should also be kept stable usually, since many programs depend on this trait for their own +layouting. This isn't as rigid as the previous requirement though. + +There are multiple ABI flags that change traits of a struct: + +``_LIBCPP_ABI_ENABLE_UNIQUE_PTR_TRIVIAL_ABI`` +--------------------------------------------- +This flag adds ``[[clang::trivial_abi]]`` to ``unique_ptr``, which makes it trivial for the purpose of calls. + +``_LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI`` +--------------------------------------------- +This flag adds ``[[clang::trivial_abi]]`` to ``shared_ptr``, which makes it trivial for the purpose of calls. + + +Types that public aliases reference +=================================== +There are a lot of aliases that reference types with library internal names. For example, containers contain an +``iterator`` alias to a type with a library internal name. These have to always reference the same type, since the +mangling of user-defined function overloads would change otherwise. A notable exception to this are the alias templates +to type traits. There doesn't seem to be anybody who relies on these names staying the same, so it is OK to change what +these aliases actually reference. + +There are multiple ABI flags which change which type an alias references: + +``_LIBCPP_ABI_INCOMPLETE_TYPES_IN_DEQUE`` +----------------------------------------- +This changes ``deque::iterator`` to avoid requring complete types for ``deque``. + +``_LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE`` +------------------------------------------------- +This changes the unordered container's ``size_types`` aliases. + +``_LIBCPP_ABI_USE_WRAP_ITER_IN_STD_ARRAY`` and ``_LIBCPP_ABI_USE_WRAP_ITER_IN_STD_STRING_VIEW`` +----------------------------------------------------------------------------------------------- +This changes the ``iterator`` and ``const_iterator`` of ``array`` and ``string_view`` respectively to reference +``__wrap_iter`` instead, which makes it less likely for users to depend on non-portable implementation details. This is +especially useful because enabling bounded iterators hardening requires code not to make these assumptions. + +``_LIBCPP_ABI_BOUNDED_ITERATORS``, ``_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STRING``, ``_LIBCPP_ABI_BOUNDED_ITERATORS_IN_VECTOR``, and ``_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY`` +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +These flags change the ``iterator`` member of various classes to reference hardened iterators instead. See the +:ref:`hardening documentation ` for more details. + + +Meaning of values +================= +The meaning of specific values can usually not be changed, since programs compiled against older versions of the headers +may check for these values. These specific values don't have to be hard-coded, but can also depend on user input. + +There are multiple ABI flags that change the meaning of particular values: + +``_LIBCPP_ABI_REGEX_CONSTANTS_NONZERO`` +--------------------------------------- +This changes the value of ``regex_constants::syntax_option-type::ECMAScript`` to be standards-conforming. + +``_LIBCPP_ABI_FIX_CITYHASH_IMPLEMENTATION`` +------------------------------------------- +This flag fixes the implementation of CityHash used for ``hash``. The incorrect implementation of +CityHash has the roblem that it drops some bits on the floor. Fixing the implementation changes the hash of values, +resulting in an ABI break. + +inline namespaces +================= +Inline namespaces which contain types that are observable by the user need to be kept the same, since they affect +mangling. Almost all of libc++'s symbols are inside an inline namespace. By default that namespace is ``__1``, but can +be changed by the vendor by setting `LIBCXX_ABI_NAMESPACE` during CMake configuration. There is also +``_LIBCPP_ABI_NO_FILESYSTEM_INLINE_NAMESPACE`` to remove the ``__fs`` namespace from surrounding the ``filesystem`` +namespace. This shortens the mangling of the filesystem symbols a bit. diff --git a/libcxx/docs/UserDocumentation.rst b/libcxx/docs/UserDocumentation.rst index dbacb44735d08..f82554a4869fa 100644 --- a/libcxx/docs/UserDocumentation.rst +++ b/libcxx/docs/UserDocumentation.rst @@ -82,6 +82,8 @@ when ``-fexperimental-library`` is passed: version has shipped. The full policy is explained :ref:`here `. +.. _libcxx-configuration-macros: + Libc++ Configuration Macros =========================== diff --git a/libcxx/docs/VendorDocumentation.rst b/libcxx/docs/VendorDocumentation.rst index 959a28607d75d..aede8f9a81dd2 100644 --- a/libcxx/docs/VendorDocumentation.rst +++ b/libcxx/docs/VendorDocumentation.rst @@ -582,3 +582,8 @@ situations will give the same result: $ clang++ -stdlib=libc++ helloworld.cpp -lcxxrt .. _`libcxxrt`: https://github.com/libcxxrt/libcxxrt + +libc++'s ABI guarantees +======================= + +Libc++ provides several ABI guarantees, which are documented :ref:`here `. diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst index a8b0d5ce1ee97..9c957e9d20cb7 100644 --- a/libcxx/docs/index.rst +++ b/libcxx/docs/index.rst @@ -37,6 +37,7 @@ Getting Started with libc++ ReleaseNotes UserDocumentation VendorDocumentation + ABIGuarantees Contributing CodingGuidelines TestingLibcxx diff --git a/libcxx/include/__configuration/abi.h b/libcxx/include/__configuration/abi.h index cc4b930b3cf4a..a75cd0a675339 100644 --- a/libcxx/include/__configuration/abi.h +++ b/libcxx/include/__configuration/abi.h @@ -38,84 +38,47 @@ #endif #if _LIBCPP_ABI_VERSION >= 2 -// Change short string representation so that string data starts at offset 0, -// improving its alignment in some cases. -# define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT -// Fix deque iterator type in order to support incomplete types. -# define _LIBCPP_ABI_INCOMPLETE_TYPES_IN_DEQUE -// Fix undefined behavior in how std::list stores its linked nodes. +// TODO: Move the description of the remaining ABI flags to ABIGuarantees.rst or remove them. + +// Override the default return value of exception::what() for bad_function_call::what() +// with a string that is specific to bad_function_call (see http://wg21.link/LWG2233). +// This is an ABI break on platforms that sign and authenticate vtable function pointers +// because it changes the mangling of the virtual function located in the vtable, which +// changes how it gets signed. +# define _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE +// According to the Standard, `bitset::operator[] const` returns bool +# define _LIBCPP_ABI_BITSET_VECTOR_BOOL_CONST_SUBSCRIPT_RETURN_BOOL + +// In LLVM 20, we've changed to take these ABI breaks unconditionally. These flags only exist in case someone is running +// into the static_asserts we added to catch the ABI break and don't care that it is one. +// TODO(LLVM 22): Remove these flags # define _LIBCPP_ABI_LIST_REMOVE_NODE_POINTER_UB -// Fix undefined behavior in how __tree stores its end and parent nodes. # define _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB -// Fix undefined behavior in how __hash_table stores its pointer types. # define _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB # define _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB + +// These flags are documented in ABIGuarantees.rst +# define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT +# define _LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON +# define _LIBCPP_ABI_DO_NOT_EXPORT_VECTOR_BASE_COMMON +# define _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10 +# define _LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI +# define _LIBCPP_ABI_ENABLE_UNIQUE_PTR_TRIVIAL_ABI +# define _LIBCPP_ABI_FIX_CITYHASH_IMPLEMENTATION # define _LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE -// Give reverse_iterator one data member of type T, not two. -// Also, in C++17 and later, don't derive iterator types from std::iterator. +# define _LIBCPP_ABI_INCOMPLETE_TYPES_IN_DEQUE +# define _LIBCPP_ABI_IOS_ALLOW_ARBITRARY_FILL_VALUE +# define _LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING +# define _LIBCPP_ABI_NO_FILESYSTEM_INLINE_NAMESPACE # define _LIBCPP_ABI_NO_ITERATOR_BASES -// Use the smallest possible integer type to represent the index of the variant. -// Previously libc++ used "unsigned int" exclusively. -# define _LIBCPP_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION -// Unstable attempt to provide a more optimized std::function +# define _LIBCPP_ABI_NO_RANDOM_DEVICE_COMPATIBILITY_LAYOUT # define _LIBCPP_ABI_OPTIMIZED_FUNCTION -// All the regex constants must be distinct and nonzero. # define _LIBCPP_ABI_REGEX_CONSTANTS_NONZERO -// Re-worked external template instantiations for std::string with a focus on -// performance and fast-path inlining. # define _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION -// Enable clang::trivial_abi on std::unique_ptr. -# define _LIBCPP_ABI_ENABLE_UNIQUE_PTR_TRIVIAL_ABI -// Enable clang::trivial_abi on std::shared_ptr and std::weak_ptr -# define _LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI -// std::random_device holds some state when it uses an implementation that gets -// entropy from a file (see _LIBCPP_USING_DEV_RANDOM). When switching from this -// implementation to another one on a platform that has already shipped -// std::random_device, one needs to retain the same object layout to remain ABI -// compatible. This switch removes these workarounds for platforms that don't care -// about ABI compatibility. -# define _LIBCPP_ABI_NO_RANDOM_DEVICE_COMPATIBILITY_LAYOUT -// Don't export the legacy __basic_string_common class and its methods from the built library. -# define _LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON -// Don't export the legacy __vector_base_common class and its methods from the built library. -# define _LIBCPP_ABI_DO_NOT_EXPORT_VECTOR_BASE_COMMON -// According to the Standard, `bitset::operator[] const` returns bool -# define _LIBCPP_ABI_BITSET_VECTOR_BOOL_CONST_SUBSCRIPT_RETURN_BOOL -// Fix the implementation of CityHash used for std::hash. -// This is an ABI break because `std::hash` will return a different result, -// which means that hashing the same object in translation units built against -// different versions of libc++ can return inconsistent results. This is especially -// tricky since std::hash is used in the implementation of unordered containers. -// -// The incorrect implementation of CityHash has the problem that it drops some -// bits on the floor. -# define _LIBCPP_ABI_FIX_CITYHASH_IMPLEMENTATION -// Remove the base 10 implementation of std::to_chars from the dylib. -// The implementation moved to the header, but we still export the symbols from -// the dylib for backwards compatibility. -# define _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10 -// Define std::array/std::string_view iterators to be __wrap_iters instead of raw -// pointers, which prevents people from relying on a non-portable implementation -// detail. This is especially useful because enabling bounded iterators hardening -// requires code not to make these assumptions. # define _LIBCPP_ABI_USE_WRAP_ITER_IN_STD_ARRAY # define _LIBCPP_ABI_USE_WRAP_ITER_IN_STD_STRING_VIEW -// Dont' add an inline namespace for `std::filesystem` -# define _LIBCPP_ABI_NO_FILESYSTEM_INLINE_NAMESPACE -// std::basic_ios uses WEOF to indicate that the fill value is -// uninitialized. However, on platforms where the size of char_type is -// equal to or greater than the size of int_type and char_type is unsigned, -// std::char_traits::eq_int_type() cannot distinguish between WEOF -// and WCHAR_MAX. This ABI setting determines whether we should instead track whether the fill -// value has been initialized using a separate boolean, which changes the ABI. -# define _LIBCPP_ABI_IOS_ALLOW_ARBITRARY_FILL_VALUE -// Historically, libc++ used a type called `__compressed_pair` to reduce storage needs in cases of empty types (e.g. an -// empty allocator in std::vector). We switched to using `[[no_unique_address]]`. However, for ABI compatibility reasons -// we had to add artificial padding in a few places. -// -// This setting disables the addition of such artificial padding, leading to a more optimal -// representation for several types. -# define _LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING +# define _LIBCPP_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION + #elif _LIBCPP_ABI_VERSION == 1 # if !(defined(_LIBCPP_OBJECT_FORMAT_COFF) || defined(_LIBCPP_OBJECT_FORMAT_XCOFF)) // Enable compiling copies of now inline methods into the dylib to support @@ -145,35 +108,6 @@ // The macro below is used for all classes whose ABI have changed as part of fixing these bugs. #define _LIBCPP_ABI_LLVM18_NO_UNIQUE_ADDRESS __attribute__((__abi_tag__("llvm18_nua"))) -// Changes the iterator type of select containers (see below) to a bounded iterator that keeps track of whether it's -// within the bounds of the original container and asserts it on every dereference. -// -// ABI impact: changes the iterator type of the relevant containers. -// -// Supported containers: -// - `span`; -// - `string_view`. -// #define _LIBCPP_ABI_BOUNDED_ITERATORS - -// Changes the iterator type of `basic_string` to a bounded iterator that keeps track of whether it's within the bounds -// of the original container and asserts it on every dereference and when performing iterator arithmetics. -// -// ABI impact: changes the iterator type of `basic_string` and its specializations, such as `string` and `wstring`. -// #define _LIBCPP_ABI_BOUNDED_ITERATORS_IN_STRING - -// Changes the iterator type of `vector` to a bounded iterator that keeps track of whether it's within the bounds of the -// original container and asserts it on every dereference and when performing iterator arithmetics. Note: this doesn't -// yet affect `vector`. -// -// ABI impact: changes the iterator type of `vector` (except `vector`). -// #define _LIBCPP_ABI_BOUNDED_ITERATORS_IN_VECTOR - -// Changes the iterator type of `array` to a bounded iterator that keeps track of whether it's within the bounds of the -// container and asserts it on every dereference and when performing iterator arithmetic. -// -// ABI impact: changes the iterator type of `array`, its size and its layout. -// #define _LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY - // [[msvc::no_unique_address]] seems to mostly affect empty classes, so the padding scheme for Itanium doesn't work. #if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING) # define _LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING