From d51148dcb2412750920b5e3fa18c343cb7b4c173 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Wed, 19 Nov 2025 17:21:55 +0000 Subject: [PATCH 01/11] [libcxx] Recalculate union-member sizes in basic_string Fixes #51158: instantiating a `basic_string` with a character type of 3, 5, or greater than 8 wouldn't work properly, because the two members of its internal union disagreed on the location of the bit flag indicating which member was live. This commit completely rewrites the calculations of how much to pad the two sub-structures so that their lengths match. Two new static_asserts check that, after all the calculation is done, the compiler agrees that the structures really did end up the same size. A new test instantiates `basic_string` with every size of character type up to 16, and four different alignment requirements. The new code now potentially has to pad _both_ sub-structures, depending on what is and is not divisible by the size of the character type. This also means that the `__padding` type must have a method to zero it out (otherwise the `__grow_by` method stops being constexpr, which an existing test depends on). I've also added some extra checks to the existing test `push_back.pass.cpp`, which _could_ have found this issue by checking that pushing huge characters on to a string updated the length in the appropriate way. --- libcxx/include/string | 57 ++++++++- .../basic.string/awkward-char-types.pass.cpp | 114 ++++++++++++++++++ .../string_append/push_back.pass.cpp | 4 + 3 files changed, 169 insertions(+), 6 deletions(-) create mode 100644 libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp diff --git a/libcxx/include/string b/libcxx/include/string index 09fc6228c4fdb..50fcf5dc6fa44 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -719,10 +719,16 @@ struct __init_with_sentinel_tag {}; template struct __padding { char __padding_[_PaddingSize]; + constexpr void clear() { + __padding __initialized = {0}; + *this = __initialized; + } }; template <> -struct __padding<0> {}; +struct __padding<0> { + constexpr void clear() {} +}; template class basic_string { @@ -819,6 +825,33 @@ private: # ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT + // The __long structure contains at least a `pointer` to an allocated buffer, + // and two `size_type` (the second one being a bitfield container). This + // struct, only used by `sizeof`, is the minimum that will fit those fields. + struct __long_min { + pointer __data_; + size_type __size_; + size_type __bitfield_container; + }; + + // The __short structure has a byte-sized bitfield container at the end, and + // the rest of it can be taken up by value_type. We want at least two + // value_type, or more if they will fit. + static constexpr size_t __fit_cap = (sizeof(__long_min) - 1) / sizeof(value_type); + static constexpr size_t __min_cap = __fit_cap > 2 ? __fit_cap : 2; + + // Now we know how many value_type will fit, calculate how much space they + // take up in total; add one byte for the final bitfield container; and round + // up to the nearest multiple of the alignment. That's the total size of the + // structure. + static constexpr size_t __short_packed_size = sizeof(value_type) * __min_cap + 1; + union __union_alignment_check { __long_min __long_; value_type v; }; + static constexpr size_t __union_alignment = _LIBCPP_ALIGNOF(__union_alignment_check); + static constexpr size_t __full_size = (__short_packed_size + __union_alignment - 1) & -__union_alignment; + + // Now define both structures for real, with padding to ensure they are both + // exactly the calculated size. + struct __long { __long() = default; @@ -829,19 +862,23 @@ private: pointer __data_; size_type __size_; + _LIBCPP_NO_UNIQUE_ADDRESS __padding<__full_size - sizeof(__long_min)> __padding_; size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1; size_type __is_long_ : 1; }; - enum { __min_cap = (sizeof(__long) - 1) / sizeof(value_type) > 2 ? (sizeof(__long) - 1) / sizeof(value_type) : 2 }; - struct __short { value_type __data_[__min_cap]; - _LIBCPP_NO_UNIQUE_ADDRESS __padding __padding_; + _LIBCPP_NO_UNIQUE_ADDRESS __padding<__full_size - __short_packed_size> __padding_; unsigned char __size_ : 7; unsigned char __is_long_ : 1; }; + // Finally, check that we got it all right, and that both structures have + // exactly the expected size. + static_assert(sizeof(__long) == __full_size, "Miscalculated size for __long"); + static_assert(sizeof(__short) == __full_size, "Miscalculated size for __short"); + // The __endian_factor is required because the field we use to store the size // has one fewer bit than it would if it were not a bitfield. // @@ -887,6 +924,13 @@ private: }; size_type __size_; pointer __data_; + + // No padding is needed in this version of the structure, but we add a + // zero-sized __padding_ member anyway to match the + // `_LIBCPP_ABI_ALTERNATE_STRING_LAYOUT` version, so that + // `__padding_.clear()` can be performed unconditionally in code common to + // both layouts. + _LIBCPP_NO_UNIQUE_ADDRESS __padding<0> __padding_; }; enum { __min_cap = (sizeof(__long) - 1) / sizeof(value_type) > 2 ? (sizeof(__long) - 1) / sizeof(value_type) : 2 }; @@ -900,10 +944,10 @@ private: value_type __data_[__min_cap]; }; -# endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT - static_assert(sizeof(__short) == (sizeof(value_type) * (__min_cap + 1)), "__short has an unexpected size."); +# endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT + union __rep { __short __s; __long __l; @@ -2764,6 +2808,7 @@ _LIBCPP_DEPRECATED_("use __grow_by_without_replace") basic_string<_CharT, _Trait // This is -1 to make sure the caller sets the size properly, since old versions of this function didn't set the size // at all. __buffer.__size_ = -1; + __buffer.__padding_.clear(); __reset_internal_buffer(__buffer); } diff --git a/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp b/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp new file mode 100644 index 0000000000000..a4e258680cd2e --- /dev/null +++ b/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp @@ -0,0 +1,114 @@ +#include +#include +#include +#include + +#include "test_macros.h" + +template +void test_string() { + // Make a test string. + std::basic_string s; + LIBCPP_ASSERT(s.size() == 0); + + // Append enough chars to it that we must have switched over from a short + // string stored internally to a long one pointing to a dynamic buffer, + // causing a reallocation. + unsigned n = sizeof(s) / sizeof(Char) + 1; + for (unsigned i = 0; i < n; i++) { + s.push_back(Char::from_integer(i)); + LIBCPP_ASSERT(s.size() == i + 1); + } + + // Check that all the chars were correctly copied during the realloc. + for (unsigned i = 0; i < n; i++) { + LIBCPP_ASSERT(s[i] == Char::from_integer(i)); + } +} + +template +struct TestChar { + Integer values[N]; + + static TestChar from_integer(unsigned index) { + TestChar ch; + for (size_t i = 0; i < N; i++) + ch.values[i] = index + i; + return ch; + } + + bool operator==(const TestChar &other) const { + return 0 == memcmp(values, other.values, sizeof(values)); + } + bool operator<(const TestChar &other) const { + return 0 < memcmp(values, other.values, sizeof(values)); + } +}; + +template +struct std::char_traits> { + using char_type = TestChar; + using int_type = int; + using off_type = streamoff; + using pos_type = streampos; + using state_type = mbstate_t; + + static TEST_CONSTEXPR_CXX20 void assign(char_type& c1, const char_type& c2) { c1 = c2; } + static bool eq(char_type c1, char_type c2); + static bool lt(char_type c1, char_type c2); + + static int compare(const char_type* s1, const char_type* s2, std::size_t n); + static std::size_t length(const char_type* s); + static const char_type* find(const char_type* s, std::size_t n, const char_type& a); + static char_type* move(char_type* s1, const char_type* s2, std::size_t n); + static TEST_CONSTEXPR_CXX20 char_type* copy(char_type* s1, const char_type* s2, std::size_t n) { + std::copy_n(s2, n, s1); + return s1; + } + static TEST_CONSTEXPR_CXX20 char_type* assign(char_type* s, std::size_t n, char_type a) { + std::fill_n(s, n, a); + return s; + } + + static int_type not_eof(int_type c); + static char_type to_char_type(int_type c); + static int_type to_int_type(char_type c); + static bool eq_int_type(int_type c1, int_type c2); + static int_type eof(); +}; + +int main(int, char**) { + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + test_string>(); + + test_string>(); + test_string>(); + test_string>(); + test_string>(); + + test_string>(); + test_string>(); +} diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_append/push_back.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_append/push_back.pass.cpp index 8c2324c9d1759..34669c28d624c 100644 --- a/libcxx/test/std/strings/basic.string/string.modifiers/string_append/push_back.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_append/push_back.pass.cpp @@ -84,9 +84,13 @@ TEST_CONSTEXPR_CXX20 bool test() { // https://llvm.org/PR31454 std::basic_string s; VeryLarge vl = {}; + LIBCPP_ASSERT(s.size() == 0); s.push_back(vl); + LIBCPP_ASSERT(s.size() == 1); s.push_back(vl); + LIBCPP_ASSERT(s.size() == 2); s.push_back(vl); + LIBCPP_ASSERT(s.size() == 3); } return true; From 5834b98e5ff2245d0cd7212c006800930e3f0820 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 20 Nov 2025 13:49:22 +0000 Subject: [PATCH 02/11] git-clang-format --- .../basic.string/awkward-char-types.pass.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp b/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp index a4e258680cd2e..a649cd8f582f8 100644 --- a/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp +++ b/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp @@ -5,7 +5,7 @@ #include "test_macros.h" -template +template void test_string() { // Make a test string. std::basic_string s; @@ -26,7 +26,7 @@ void test_string() { } } -template +template struct TestChar { Integer values[N]; @@ -37,15 +37,11 @@ struct TestChar { return ch; } - bool operator==(const TestChar &other) const { - return 0 == memcmp(values, other.values, sizeof(values)); - } - bool operator<(const TestChar &other) const { - return 0 < memcmp(values, other.values, sizeof(values)); - } + bool operator==(const TestChar& other) const { return 0 == memcmp(values, other.values, sizeof(values)); } + bool operator<(const TestChar& other) const { return 0 < memcmp(values, other.values, sizeof(values)); } }; -template +template struct std::char_traits> { using char_type = TestChar; using int_type = int; From ce04fcb17c361465e91317a94e79ce6432e1eaaa Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 20 Nov 2025 13:56:08 +0000 Subject: [PATCH 03/11] Use ordinary assert instead of LIBCPP_ASSERT --- .../std/strings/basic.string/awkward-char-types.pass.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp b/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp index a649cd8f582f8..c94c2a2cb13ac 100644 --- a/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp +++ b/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp @@ -9,7 +9,7 @@ template void test_string() { // Make a test string. std::basic_string s; - LIBCPP_ASSERT(s.size() == 0); + assert(s.size() == 0); // Append enough chars to it that we must have switched over from a short // string stored internally to a long one pointing to a dynamic buffer, @@ -17,12 +17,12 @@ void test_string() { unsigned n = sizeof(s) / sizeof(Char) + 1; for (unsigned i = 0; i < n; i++) { s.push_back(Char::from_integer(i)); - LIBCPP_ASSERT(s.size() == i + 1); + assert(s.size() == i + 1); } // Check that all the chars were correctly copied during the realloc. for (unsigned i = 0; i < n; i++) { - LIBCPP_ASSERT(s[i] == Char::from_integer(i)); + assert(s[i] == Char::from_integer(i)); } } From 911d9ad88f8919d2150d9a49ba8b55d69bcf3744 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Mon, 24 Nov 2025 13:21:43 +0000 Subject: [PATCH 04/11] Replace __padding::clear() with __padding::empty() One of the buildbot failures was objecting to a constexpr function returning void, I guess because that's from a later C++ version. --- libcxx/include/string | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/libcxx/include/string b/libcxx/include/string index 8b2e599e17b2c..7814a6e15001e 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -719,15 +719,18 @@ struct __init_with_sentinel_tag {}; template struct __padding { char __padding_[_PaddingSize]; - constexpr void clear() { + static constexpr __padding empty() { __padding __initialized = {0}; - *this = __initialized; + return __initialized; } }; template <> struct __padding<0> { - constexpr void clear() {} + static constexpr __padding empty() { + __padding __initialized; + return __initialized; + } }; template @@ -928,7 +931,7 @@ private: // No padding is needed in this version of the structure, but we add a // zero-sized __padding_ member anyway to match the // `_LIBCPP_ABI_ALTERNATE_STRING_LAYOUT` version, so that - // `__padding_.clear()` can be performed unconditionally in code common to + // `__padding::empty()` can be used unconditionally in code common to // both layouts. _LIBCPP_NO_UNIQUE_ADDRESS __padding<0> __padding_; }; @@ -2817,7 +2820,7 @@ _LIBCPP_DEPRECATED_("use __grow_by_without_replace") basic_string<_CharT, _Trait // This is -1 to make sure the caller sets the size properly, since old versions of this function didn't set the size // at all. __buffer.__size_ = -1; - __buffer.__padding_.clear(); + __buffer.__padding_ = decltype(__buffer.__padding_)::empty(); __reset_internal_buffer(__buffer); } From 5bcb59a6e03b022cf49ff070970689a911702502 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Mon, 24 Nov 2025 13:27:41 +0000 Subject: [PATCH 05/11] Avoid using constexpr where C++03 will hate it Another CI failure occurred because of compiling in C++03 mode, where 'constexpr' isn't even a keyword. Replaced with _LIBCPP_CONSTEXPR_SINCE_20 where feasible, and enum otherwise. --- libcxx/include/string | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/libcxx/include/string b/libcxx/include/string index 7814a6e15001e..3c413bc8d47b8 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -719,7 +719,7 @@ struct __init_with_sentinel_tag {}; template struct __padding { char __padding_[_PaddingSize]; - static constexpr __padding empty() { + static _LIBCPP_CONSTEXPR_SINCE_CXX20 __padding empty() { __padding __initialized = {0}; return __initialized; } @@ -727,7 +727,7 @@ struct __padding { template <> struct __padding<0> { - static constexpr __padding empty() { + static _LIBCPP_CONSTEXPR_SINCE_CXX20 __padding empty() { __padding __initialized; return __initialized; } @@ -840,17 +840,17 @@ private: // The __short structure has a byte-sized bitfield container at the end, and // the rest of it can be taken up by value_type. We want at least two // value_type, or more if they will fit. - static constexpr size_t __fit_cap = (sizeof(__long_min) - 1) / sizeof(value_type); - static constexpr size_t __min_cap = __fit_cap > 2 ? __fit_cap : 2; + enum { __fit_cap = (sizeof(__long_min) - 1) / sizeof(value_type) }; + enum { __min_cap = __fit_cap > 2 ? __fit_cap : 2 }; // Now we know how many value_type will fit, calculate how much space they // take up in total; add one byte for the final bitfield container; and round // up to the nearest multiple of the alignment. That's the total size of the // structure. - static constexpr size_t __short_packed_size = sizeof(value_type) * __min_cap + 1; + enum { __short_packed_size = sizeof(value_type) * __min_cap + 1 }; union __union_alignment_check { __long_min __long_; value_type v; }; - static constexpr size_t __union_alignment = _LIBCPP_ALIGNOF(__union_alignment_check); - static constexpr size_t __full_size = (__short_packed_size + __union_alignment - 1) & -__union_alignment; + enum { __union_alignment = _LIBCPP_ALIGNOF(__union_alignment_check) }; + enum { __full_size = (__short_packed_size + __union_alignment - 1) & -__union_alignment }; // Now define both structures for real, with padding to ensure they are both // exactly the calculated size. From fbc90a649679cceac6b61b14aada26ec33e0c855 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Mon, 24 Nov 2025 13:35:42 +0000 Subject: [PATCH 06/11] clang-format *again* This one is particularly annoying because my local run of git-clang-format didn't pick it up and I had to round-trip through official CI to even find out there was a problem! --- libcxx/include/string | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libcxx/include/string b/libcxx/include/string index 3c413bc8d47b8..8ac66636c6007 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -848,7 +848,10 @@ private: // up to the nearest multiple of the alignment. That's the total size of the // structure. enum { __short_packed_size = sizeof(value_type) * __min_cap + 1 }; - union __union_alignment_check { __long_min __long_; value_type v; }; + union __union_alignment_check { + __long_min __long_; + value_type v; + }; enum { __union_alignment = _LIBCPP_ALIGNOF(__union_alignment_check) }; enum { __full_size = (__short_packed_size + __union_alignment - 1) & -__union_alignment }; From b76d2985a33ec36c9e96c42c07abeea79f024351 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Mon, 24 Nov 2025 13:46:45 +0000 Subject: [PATCH 07/11] Make the new test work in C++03 mode too Spaced out all the >> in template suffixes so that C++03 won't parse them as a right shift operator. --- .../basic.string/awkward-char-types.pass.cpp | 62 +++++++++---------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp b/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp index c94c2a2cb13ac..da13bf8744abf 100644 --- a/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp +++ b/libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp @@ -42,7 +42,7 @@ struct TestChar { }; template -struct std::char_traits> { +struct std::char_traits > { using char_type = TestChar; using int_type = int; using off_type = streamoff; @@ -74,37 +74,37 @@ struct std::char_traits> { }; int main(int, char**) { - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); - test_string>(); - test_string>(); - test_string>(); - test_string>(); + test_string >(); + test_string >(); + test_string >(); + test_string >(); - test_string>(); - test_string>(); + test_string >(); + test_string >(); } From 74e11ac22dcf8e43c52185045d1071bb8396cb93 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Mon, 24 Nov 2025 14:51:49 +0000 Subject: [PATCH 08/11] Cast enum values back to size_t This wasn't spotted until stage 3 of buildbots: having used `enum` to define all those constants, you now can't subtract one of them from another without provoking a warning about distinct integer types being used together. static_casting them all back to size_t makes the generic-abi-unstable build happy again. --- libcxx/include/string | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libcxx/include/string b/libcxx/include/string index 8ac66636c6007..42e2ef88c0cee 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -853,7 +853,7 @@ private: value_type v; }; enum { __union_alignment = _LIBCPP_ALIGNOF(__union_alignment_check) }; - enum { __full_size = (__short_packed_size + __union_alignment - 1) & -__union_alignment }; + enum { __full_size = (static_cast(__short_packed_size) + static_cast(__union_alignment) - 1) & -static_cast(__union_alignment) }; // Now define both structures for real, with padding to ensure they are both // exactly the calculated size. @@ -868,14 +868,14 @@ private: pointer __data_; size_type __size_; - _LIBCPP_NO_UNIQUE_ADDRESS __padding<__full_size - sizeof(__long_min)> __padding_; + _LIBCPP_NO_UNIQUE_ADDRESS __padding(__full_size) - sizeof(__long_min)> __padding_; size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1; size_type __is_long_ : 1; }; struct __short { value_type __data_[__min_cap]; - _LIBCPP_NO_UNIQUE_ADDRESS __padding<__full_size - __short_packed_size> __padding_; + _LIBCPP_NO_UNIQUE_ADDRESS __padding(__full_size) - static_cast(__short_packed_size)> __padding_; unsigned char __size_ : 7; unsigned char __is_long_ : 1; }; From 91cadd5b1e576f690f77f765c59e496feadaa274 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Mon, 24 Nov 2025 15:09:45 +0000 Subject: [PATCH 09/11] git-clang-format Ah! Now I understand why my local run keeps missing these formatting changes. It's because by default it only does C++ formatting on files that end in `.cpp`, so it doesn't search `string` by default, which is where I keep making changes. It's not that CI is running a more up-to-date clang-format which disagrees on style. It's that it's using a longer command line which reminds it to check extensionless C++ standard headers. --- libcxx/include/string | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/libcxx/include/string b/libcxx/include/string index 42e2ef88c0cee..99eaef703f36f 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -853,7 +853,10 @@ private: value_type v; }; enum { __union_alignment = _LIBCPP_ALIGNOF(__union_alignment_check) }; - enum { __full_size = (static_cast(__short_packed_size) + static_cast(__union_alignment) - 1) & -static_cast(__union_alignment) }; + enum { + __full_size = (static_cast(__short_packed_size) + static_cast(__union_alignment) - 1) & + -static_cast(__union_alignment) + }; // Now define both structures for real, with padding to ensure they are both // exactly the calculated size. @@ -875,7 +878,8 @@ private: struct __short { value_type __data_[__min_cap]; - _LIBCPP_NO_UNIQUE_ADDRESS __padding(__full_size) - static_cast(__short_packed_size)> __padding_; + _LIBCPP_NO_UNIQUE_ADDRESS __padding(__full_size) - static_cast(__short_packed_size)> + __padding_; unsigned char __size_ : 7; unsigned char __is_long_ : 1; }; From adfb6a73420012ec2f83e1690cd823bb6c497378 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 27 Nov 2025 16:53:51 +0000 Subject: [PATCH 10/11] Fix __align_allocation_size for awkward char sizes The ASan failure in CI complained that a buffer had been allocated as 7 chars (in the case where sizeof(char_type)==5) and freed as only 6. That's because the test was running in a configuration with __endian_factor==2, and __align_allocation_size is supposed to always return an even number in that situation, and it wasn't. Fixed by introducing a new more careful code path for non-power-of-2 char sizes, and also adjusting the caller so that it actually remembers the value it got back from __align_allocation_size, to pass the same value to free later. (I'm also suspicious of the __allocate_at_least call, which isn't using the returned count field. That too might be odd, which would stop us from storing it in the __long size field. At the moment this isn't failing because __allocate_at_least never allocates more than it's asked to.) --- libcxx/include/string | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/libcxx/include/string b/libcxx/include/string index 99eaef703f36f..74edd78dd64ce 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -2329,14 +2329,15 @@ private: __allocate_long_buffer(_Allocator& __alloc, size_type __capacity) { _LIBCPP_ASSERT_INTERNAL(!__fits_in_sso(__capacity), "Trying to allocate long buffer for a capacity what would fit into the small buffer"); - auto __buffer = std::__allocate_at_least(__alloc, __align_allocation_size(__capacity)); + size_type __aligned_capacity = __align_allocation_size(__capacity); + auto __buffer = std::__allocate_at_least(__alloc, __aligned_capacity); if (__libcpp_is_constant_evaluated()) { for (size_type __i = 0; __i != __buffer.count; ++__i) std::__construct_at(std::addressof(__buffer.ptr[__i])); } - return __long(__buffer, __capacity); + return __long(__buffer, __aligned_capacity); } // Replace the current buffer with __new_rep. Deallocate the old long buffer if it exists. @@ -2422,6 +2423,10 @@ private: } enum { __alignment = 8 }; + static _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_power_of_2(size_type __s) _NOEXCEPT { + return __s != 0 && (__s & (__s - 1)) == 0; + } + // This makes sure that we're using a capacity with some extra alignment, since allocators almost always over-align // the allocations anyways, improving memory usage. More importantly, this ensures that the lowest bit is never set // if __endian_factor == 2, allowing us to store whether we're in the long string inside the lowest bit. @@ -2429,11 +2434,28 @@ private: __align_allocation_size(size_type __size) _NOEXCEPT { _LIBCPP_ASSERT_INTERNAL( !__fits_in_sso(__size), "Trying to align allocation of a size which would fit into the SSO"); - const size_type __boundary = sizeof(value_type) < __alignment ? __alignment / sizeof(value_type) : __endian_factor; - size_type __guess = __align_it<__boundary>(__size + 1); - if (__guess == __min_cap + 1) - __guess += __endian_factor; + size_type __guess; + + if (__is_power_of_2(sizeof(value_type))) { + // If the size of a _Char is itself a power of 2, then we can align the + // total allocation size in bytes by aligning the count of characters to + // an appropriate power of 2. + const size_type __boundary = + sizeof(value_type) < __alignment ? __alignment / sizeof(value_type) : __endian_factor; + __guess = __align_it<__boundary>(__size + 1); + if (__guess == __min_cap + 1) + __guess += __endian_factor; + } else { + // Otherwise, we must align the size in bytes and then calculate the + // count of characters by division. + const size_type __even_size = __align_it<__endian_factor>(__size + 1); + const size_type __unaligned_bytes = __even_size * sizeof(value_type); + const size_type __aligned_bytes = __align_it<__alignment>(__unaligned_bytes); + __guess = (__aligned_bytes / (sizeof(value_type) * __endian_factor)) * __endian_factor; + } + + _LIBCPP_ASSERT_INTERNAL(__guess % __endian_factor == 0, "aligned allocation size is odd but __endian_factor == 2"); _LIBCPP_ASSERT_INTERNAL(__guess >= __size, "aligned allocation size is below the requested size"); return __guess; } From 8a07a8ea9a2d6967dac0e94b222791bc285038c9 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Fri, 28 Nov 2025 14:46:40 +0000 Subject: [PATCH 11/11] Undo my changes in __allocate_long_buffer I had misunderstood the call `__long(__buffer, __capacity)` at the end of the function. The second parameter to that constructor is actually the string _size_, not its capacity! So it's correct that it should be passing the original `__capacity` parameter from its caller, and not the version increased for alignment. --- libcxx/include/string | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libcxx/include/string b/libcxx/include/string index 74edd78dd64ce..3342e12523e3e 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -2329,15 +2329,14 @@ private: __allocate_long_buffer(_Allocator& __alloc, size_type __capacity) { _LIBCPP_ASSERT_INTERNAL(!__fits_in_sso(__capacity), "Trying to allocate long buffer for a capacity what would fit into the small buffer"); - size_type __aligned_capacity = __align_allocation_size(__capacity); - auto __buffer = std::__allocate_at_least(__alloc, __aligned_capacity); + auto __buffer = std::__allocate_at_least(__alloc, __align_allocation_size(__capacity)); if (__libcpp_is_constant_evaluated()) { for (size_type __i = 0; __i != __buffer.count; ++__i) std::__construct_at(std::addressof(__buffer.ptr[__i])); } - return __long(__buffer, __aligned_capacity); + return __long(__buffer, __capacity); } // Replace the current buffer with __new_rep. Deallocate the old long buffer if it exists.