Skip to content

Commit d51148d

Browse files
committed
[libcxx] Recalculate union-member sizes in basic_string
Fixes #51158: instantiating a `basic_string` with a character type of 3, 5, or greater than 8 wouldn't work properly, because the two members of its internal union disagreed on the location of the bit flag indicating which member was live. This commit completely rewrites the calculations of how much to pad the two sub-structures so that their lengths match. Two new static_asserts check that, after all the calculation is done, the compiler agrees that the structures really did end up the same size. A new test instantiates `basic_string` with every size of character type up to 16, and four different alignment requirements. The new code now potentially has to pad _both_ sub-structures, depending on what is and is not divisible by the size of the character type. This also means that the `__padding` type must have a method to zero it out (otherwise the `__grow_by` method stops being constexpr, which an existing test depends on). I've also added some extra checks to the existing test `push_back.pass.cpp`, which _could_ have found this issue by checking that pushing huge characters on to a string updated the length in the appropriate way.
1 parent bf07226 commit d51148d

File tree

3 files changed

+169
-6
lines changed

3 files changed

+169
-6
lines changed

libcxx/include/string

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -719,10 +719,16 @@ struct __init_with_sentinel_tag {};
719719
template <size_t _PaddingSize>
720720
struct __padding {
721721
char __padding_[_PaddingSize];
722+
constexpr void clear() {
723+
__padding __initialized = {0};
724+
*this = __initialized;
725+
}
722726
};
723727

724728
template <>
725-
struct __padding<0> {};
729+
struct __padding<0> {
730+
constexpr void clear() {}
731+
};
726732

727733
template <class _CharT, class _Traits, class _Allocator>
728734
class basic_string {
@@ -819,6 +825,33 @@ private:
819825

820826
# ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
821827

828+
// The __long structure contains at least a `pointer` to an allocated buffer,
829+
// and two `size_type` (the second one being a bitfield container). This
830+
// struct, only used by `sizeof`, is the minimum that will fit those fields.
831+
struct __long_min {
832+
pointer __data_;
833+
size_type __size_;
834+
size_type __bitfield_container;
835+
};
836+
837+
// The __short structure has a byte-sized bitfield container at the end, and
838+
// the rest of it can be taken up by value_type. We want at least two
839+
// value_type, or more if they will fit.
840+
static constexpr size_t __fit_cap = (sizeof(__long_min) - 1) / sizeof(value_type);
841+
static constexpr size_t __min_cap = __fit_cap > 2 ? __fit_cap : 2;
842+
843+
// Now we know how many value_type will fit, calculate how much space they
844+
// take up in total; add one byte for the final bitfield container; and round
845+
// up to the nearest multiple of the alignment. That's the total size of the
846+
// structure.
847+
static constexpr size_t __short_packed_size = sizeof(value_type) * __min_cap + 1;
848+
union __union_alignment_check { __long_min __long_; value_type v; };
849+
static constexpr size_t __union_alignment = _LIBCPP_ALIGNOF(__union_alignment_check);
850+
static constexpr size_t __full_size = (__short_packed_size + __union_alignment - 1) & -__union_alignment;
851+
852+
// Now define both structures for real, with padding to ensure they are both
853+
// exactly the calculated size.
854+
822855
struct __long {
823856
__long() = default;
824857

@@ -829,19 +862,23 @@ private:
829862

830863
pointer __data_;
831864
size_type __size_;
865+
_LIBCPP_NO_UNIQUE_ADDRESS __padding<__full_size - sizeof(__long_min)> __padding_;
832866
size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1;
833867
size_type __is_long_ : 1;
834868
};
835869

836-
enum { __min_cap = (sizeof(__long) - 1) / sizeof(value_type) > 2 ? (sizeof(__long) - 1) / sizeof(value_type) : 2 };
837-
838870
struct __short {
839871
value_type __data_[__min_cap];
840-
_LIBCPP_NO_UNIQUE_ADDRESS __padding<sizeof(value_type) - 1> __padding_;
872+
_LIBCPP_NO_UNIQUE_ADDRESS __padding<__full_size - __short_packed_size> __padding_;
841873
unsigned char __size_ : 7;
842874
unsigned char __is_long_ : 1;
843875
};
844876

877+
// Finally, check that we got it all right, and that both structures have
878+
// exactly the expected size.
879+
static_assert(sizeof(__long) == __full_size, "Miscalculated size for __long");
880+
static_assert(sizeof(__short) == __full_size, "Miscalculated size for __short");
881+
845882
// The __endian_factor is required because the field we use to store the size
846883
// has one fewer bit than it would if it were not a bitfield.
847884
//
@@ -887,6 +924,13 @@ private:
887924
};
888925
size_type __size_;
889926
pointer __data_;
927+
928+
// No padding is needed in this version of the structure, but we add a
929+
// zero-sized __padding_ member anyway to match the
930+
// `_LIBCPP_ABI_ALTERNATE_STRING_LAYOUT` version, so that
931+
// `__padding_.clear()` can be performed unconditionally in code common to
932+
// both layouts.
933+
_LIBCPP_NO_UNIQUE_ADDRESS __padding<0> __padding_;
890934
};
891935

892936
enum { __min_cap = (sizeof(__long) - 1) / sizeof(value_type) > 2 ? (sizeof(__long) - 1) / sizeof(value_type) : 2 };
@@ -900,10 +944,10 @@ private:
900944
value_type __data_[__min_cap];
901945
};
902946

903-
# endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
904-
905947
static_assert(sizeof(__short) == (sizeof(value_type) * (__min_cap + 1)), "__short has an unexpected size.");
906948

949+
# endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
950+
907951
union __rep {
908952
__short __s;
909953
__long __l;
@@ -2764,6 +2808,7 @@ _LIBCPP_DEPRECATED_("use __grow_by_without_replace") basic_string<_CharT, _Trait
27642808
// This is -1 to make sure the caller sets the size properly, since old versions of this function didn't set the size
27652809
// at all.
27662810
__buffer.__size_ = -1;
2811+
__buffer.__padding_.clear();
27672812
__reset_internal_buffer(__buffer);
27682813
}
27692814

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
#include <stdint.h>
2+
#include <string>
3+
#include <algorithm>
4+
#include <cassert>
5+
6+
#include "test_macros.h"
7+
8+
template<typename Char>
9+
void test_string() {
10+
// Make a test string.
11+
std::basic_string<Char> s;
12+
LIBCPP_ASSERT(s.size() == 0);
13+
14+
// Append enough chars to it that we must have switched over from a short
15+
// string stored internally to a long one pointing to a dynamic buffer,
16+
// causing a reallocation.
17+
unsigned n = sizeof(s) / sizeof(Char) + 1;
18+
for (unsigned i = 0; i < n; i++) {
19+
s.push_back(Char::from_integer(i));
20+
LIBCPP_ASSERT(s.size() == i + 1);
21+
}
22+
23+
// Check that all the chars were correctly copied during the realloc.
24+
for (unsigned i = 0; i < n; i++) {
25+
LIBCPP_ASSERT(s[i] == Char::from_integer(i));
26+
}
27+
}
28+
29+
template<typename Integer, size_t N>
30+
struct TestChar {
31+
Integer values[N];
32+
33+
static TestChar from_integer(unsigned index) {
34+
TestChar ch;
35+
for (size_t i = 0; i < N; i++)
36+
ch.values[i] = index + i;
37+
return ch;
38+
}
39+
40+
bool operator==(const TestChar &other) const {
41+
return 0 == memcmp(values, other.values, sizeof(values));
42+
}
43+
bool operator<(const TestChar &other) const {
44+
return 0 < memcmp(values, other.values, sizeof(values));
45+
}
46+
};
47+
48+
template<typename Integer, size_t N>
49+
struct std::char_traits<TestChar<Integer, N>> {
50+
using char_type = TestChar<Integer, N>;
51+
using int_type = int;
52+
using off_type = streamoff;
53+
using pos_type = streampos;
54+
using state_type = mbstate_t;
55+
56+
static TEST_CONSTEXPR_CXX20 void assign(char_type& c1, const char_type& c2) { c1 = c2; }
57+
static bool eq(char_type c1, char_type c2);
58+
static bool lt(char_type c1, char_type c2);
59+
60+
static int compare(const char_type* s1, const char_type* s2, std::size_t n);
61+
static std::size_t length(const char_type* s);
62+
static const char_type* find(const char_type* s, std::size_t n, const char_type& a);
63+
static char_type* move(char_type* s1, const char_type* s2, std::size_t n);
64+
static TEST_CONSTEXPR_CXX20 char_type* copy(char_type* s1, const char_type* s2, std::size_t n) {
65+
std::copy_n(s2, n, s1);
66+
return s1;
67+
}
68+
static TEST_CONSTEXPR_CXX20 char_type* assign(char_type* s, std::size_t n, char_type a) {
69+
std::fill_n(s, n, a);
70+
return s;
71+
}
72+
73+
static int_type not_eof(int_type c);
74+
static char_type to_char_type(int_type c);
75+
static int_type to_int_type(char_type c);
76+
static bool eq_int_type(int_type c1, int_type c2);
77+
static int_type eof();
78+
};
79+
80+
int main(int, char**) {
81+
test_string<TestChar<uint8_t, 1>>();
82+
test_string<TestChar<uint8_t, 2>>();
83+
test_string<TestChar<uint8_t, 3>>();
84+
test_string<TestChar<uint8_t, 4>>();
85+
test_string<TestChar<uint8_t, 5>>();
86+
test_string<TestChar<uint8_t, 6>>();
87+
test_string<TestChar<uint8_t, 7>>();
88+
test_string<TestChar<uint8_t, 8>>();
89+
test_string<TestChar<uint8_t, 9>>();
90+
test_string<TestChar<uint8_t, 10>>();
91+
test_string<TestChar<uint8_t, 11>>();
92+
test_string<TestChar<uint8_t, 12>>();
93+
test_string<TestChar<uint8_t, 13>>();
94+
test_string<TestChar<uint8_t, 14>>();
95+
test_string<TestChar<uint8_t, 15>>();
96+
test_string<TestChar<uint8_t, 16>>();
97+
98+
test_string<TestChar<uint16_t, 1>>();
99+
test_string<TestChar<uint16_t, 2>>();
100+
test_string<TestChar<uint16_t, 3>>();
101+
test_string<TestChar<uint16_t, 4>>();
102+
test_string<TestChar<uint16_t, 5>>();
103+
test_string<TestChar<uint16_t, 6>>();
104+
test_string<TestChar<uint16_t, 7>>();
105+
test_string<TestChar<uint16_t, 8>>();
106+
107+
test_string<TestChar<uint32_t, 1>>();
108+
test_string<TestChar<uint32_t, 2>>();
109+
test_string<TestChar<uint32_t, 3>>();
110+
test_string<TestChar<uint32_t, 4>>();
111+
112+
test_string<TestChar<uint64_t, 1>>();
113+
test_string<TestChar<uint64_t, 2>>();
114+
}

libcxx/test/std/strings/basic.string/string.modifiers/string_append/push_back.pass.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,13 @@ TEST_CONSTEXPR_CXX20 bool test() {
8484
// https://llvm.org/PR31454
8585
std::basic_string<VeryLarge> s;
8686
VeryLarge vl = {};
87+
LIBCPP_ASSERT(s.size() == 0);
8788
s.push_back(vl);
89+
LIBCPP_ASSERT(s.size() == 1);
8890
s.push_back(vl);
91+
LIBCPP_ASSERT(s.size() == 2);
8992
s.push_back(vl);
93+
LIBCPP_ASSERT(s.size() == 3);
9094
}
9195

9296
return true;

0 commit comments

Comments
 (0)