Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 86 additions & 10 deletions libcxx/include/string
Original file line number Diff line number Diff line change
Expand Up @@ -719,10 +719,19 @@ struct __init_with_sentinel_tag {};
template <size_t _PaddingSize>
struct __padding {
char __padding_[_PaddingSize];
static _LIBCPP_CONSTEXPR_SINCE_CXX20 __padding empty() {
__padding __initialized = {0};
return __initialized;
}
};

template <>
struct __padding<0> {};
struct __padding<0> {
static _LIBCPP_CONSTEXPR_SINCE_CXX20 __padding empty() {
__padding __initialized;
return __initialized;
}
};

template <class _CharT, class _Traits, class _Allocator>
class basic_string {
Expand Down Expand Up @@ -819,6 +828,39 @@ private:

# ifdef _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT

// The __long structure contains at least a `pointer` to an allocated buffer,
// and two `size_type` (the second one being a bitfield container). This
// struct, only used by `sizeof`, is the minimum that will fit those fields.
struct __long_min {
pointer __data_;
size_type __size_;
size_type __bitfield_container;
};

// The __short structure has a byte-sized bitfield container at the end, and
// the rest of it can be taken up by value_type. We want at least two
// value_type, or more if they will fit.
enum { __fit_cap = (sizeof(__long_min) - 1) / sizeof(value_type) };
enum { __min_cap = __fit_cap > 2 ? __fit_cap : 2 };

// Now we know how many value_type will fit, calculate how much space they
// take up in total; add one byte for the final bitfield container; and round
// up to the nearest multiple of the alignment. That's the total size of the
// structure.
enum { __short_packed_size = sizeof(value_type) * __min_cap + 1 };
union __union_alignment_check {
__long_min __long_;
value_type v;
};
enum { __union_alignment = _LIBCPP_ALIGNOF(__union_alignment_check) };
enum {
__full_size = (static_cast<size_t>(__short_packed_size) + static_cast<size_t>(__union_alignment) - 1) &
-static_cast<size_t>(__union_alignment)
};

// Now define both structures for real, with padding to ensure they are both
// exactly the calculated size.

struct __long {
__long() = default;

Expand All @@ -829,19 +871,24 @@ private:

pointer __data_;
size_type __size_;
_LIBCPP_NO_UNIQUE_ADDRESS __padding<static_cast<size_t>(__full_size) - sizeof(__long_min)> __padding_;
size_type __cap_ : sizeof(size_type) * CHAR_BIT - 1;
size_type __is_long_ : 1;
};

enum { __min_cap = (sizeof(__long) - 1) / sizeof(value_type) > 2 ? (sizeof(__long) - 1) / sizeof(value_type) : 2 };

struct __short {
value_type __data_[__min_cap];
_LIBCPP_NO_UNIQUE_ADDRESS __padding<sizeof(value_type) - 1> __padding_;
_LIBCPP_NO_UNIQUE_ADDRESS __padding<static_cast<size_t>(__full_size) - static_cast<size_t>(__short_packed_size)>
__padding_;
unsigned char __size_ : 7;
unsigned char __is_long_ : 1;
};

// Finally, check that we got it all right, and that both structures have
// exactly the expected size.
static_assert(sizeof(__long) == __full_size, "Miscalculated size for __long");
static_assert(sizeof(__short) == __full_size, "Miscalculated size for __short");

// The __endian_factor is required because the field we use to store the size
// has one fewer bit than it would if it were not a bitfield.
//
Expand Down Expand Up @@ -887,6 +934,13 @@ private:
};
size_type __size_;
pointer __data_;

// No padding is needed in this version of the structure, but we add a
// zero-sized __padding_ member anyway to match the
// `_LIBCPP_ABI_ALTERNATE_STRING_LAYOUT` version, so that
// `__padding::empty()` can be used unconditionally in code common to
// both layouts.
_LIBCPP_NO_UNIQUE_ADDRESS __padding<0> __padding_;
};

enum { __min_cap = (sizeof(__long) - 1) / sizeof(value_type) > 2 ? (sizeof(__long) - 1) / sizeof(value_type) : 2 };
Expand All @@ -900,10 +954,10 @@ private:
value_type __data_[__min_cap];
};

# endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT

static_assert(sizeof(__short) == (sizeof(value_type) * (__min_cap + 1)), "__short has an unexpected size.");

# endif // _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT

union __rep {
__short __s;
__long __l;
Expand Down Expand Up @@ -2368,18 +2422,39 @@ private:
}
enum { __alignment = 8 };

static _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_power_of_2(size_type __s) _NOEXCEPT {
return __s != 0 && (__s & (__s - 1)) == 0;
}

// This makes sure that we're using a capacity with some extra alignment, since allocators almost always over-align
// the allocations anyways, improving memory usage. More importantly, this ensures that the lowest bit is never set
// if __endian_factor == 2, allowing us to store whether we're in the long string inside the lowest bit.
_LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type
__align_allocation_size(size_type __size) _NOEXCEPT {
_LIBCPP_ASSERT_INTERNAL(
!__fits_in_sso(__size), "Trying to align allocation of a size which would fit into the SSO");
const size_type __boundary = sizeof(value_type) < __alignment ? __alignment / sizeof(value_type) : __endian_factor;
size_type __guess = __align_it<__boundary>(__size + 1);
if (__guess == __min_cap + 1)
__guess += __endian_factor;

size_type __guess;

if (__is_power_of_2(sizeof(value_type))) {
// If the size of a _Char is itself a power of 2, then we can align the
// total allocation size in bytes by aligning the count of characters to
// an appropriate power of 2.
const size_type __boundary =
sizeof(value_type) < __alignment ? __alignment / sizeof(value_type) : __endian_factor;
__guess = __align_it<__boundary>(__size + 1);
if (__guess == __min_cap + 1)
__guess += __endian_factor;
} else {
// Otherwise, we must align the size in bytes and then calculate the
// count of characters by division.
const size_type __even_size = __align_it<__endian_factor>(__size + 1);
const size_type __unaligned_bytes = __even_size * sizeof(value_type);
const size_type __aligned_bytes = __align_it<__alignment>(__unaligned_bytes);
__guess = (__aligned_bytes / (sizeof(value_type) * __endian_factor)) * __endian_factor;
}

_LIBCPP_ASSERT_INTERNAL(__guess % __endian_factor == 0, "aligned allocation size is odd but __endian_factor == 2");
_LIBCPP_ASSERT_INTERNAL(__guess >= __size, "aligned allocation size is below the requested size");
return __guess;
}
Expand Down Expand Up @@ -2773,6 +2848,7 @@ _LIBCPP_DEPRECATED_("use __grow_by_without_replace") basic_string<_CharT, _Trait
// This is -1 to make sure the caller sets the size properly, since old versions of this function didn't set the size
// at all.
__buffer.__size_ = -1;
__buffer.__padding_ = decltype(__buffer.__padding_)::empty();
__reset_internal_buffer(__buffer);
}

Expand Down
110 changes: 110 additions & 0 deletions libcxx/test/std/strings/basic.string/awkward-char-types.pass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#include <stdint.h>
#include <string>
#include <algorithm>
#include <cassert>

#include "test_macros.h"

template <typename Char>
void test_string() {
// Make a test string.
std::basic_string<Char> s;
assert(s.size() == 0);

// Append enough chars to it that we must have switched over from a short
// string stored internally to a long one pointing to a dynamic buffer,
// causing a reallocation.
unsigned n = sizeof(s) / sizeof(Char) + 1;
for (unsigned i = 0; i < n; i++) {
s.push_back(Char::from_integer(i));
assert(s.size() == i + 1);
}

// Check that all the chars were correctly copied during the realloc.
for (unsigned i = 0; i < n; i++) {
assert(s[i] == Char::from_integer(i));
}
}

template <typename Integer, size_t N>
struct TestChar {
Integer values[N];

static TestChar from_integer(unsigned index) {
TestChar ch;
for (size_t i = 0; i < N; i++)
ch.values[i] = index + i;
return ch;
}

bool operator==(const TestChar& other) const { return 0 == memcmp(values, other.values, sizeof(values)); }
bool operator<(const TestChar& other) const { return 0 < memcmp(values, other.values, sizeof(values)); }
};

template <typename Integer, size_t N>
struct std::char_traits<TestChar<Integer, N> > {
using char_type = TestChar<Integer, N>;
using int_type = int;
using off_type = streamoff;
using pos_type = streampos;
using state_type = mbstate_t;

static TEST_CONSTEXPR_CXX20 void assign(char_type& c1, const char_type& c2) { c1 = c2; }
static bool eq(char_type c1, char_type c2);
static bool lt(char_type c1, char_type c2);

static int compare(const char_type* s1, const char_type* s2, std::size_t n);
static std::size_t length(const char_type* s);
static const char_type* find(const char_type* s, std::size_t n, const char_type& a);
static char_type* move(char_type* s1, const char_type* s2, std::size_t n);
static TEST_CONSTEXPR_CXX20 char_type* copy(char_type* s1, const char_type* s2, std::size_t n) {
std::copy_n(s2, n, s1);
return s1;
}
static TEST_CONSTEXPR_CXX20 char_type* assign(char_type* s, std::size_t n, char_type a) {
std::fill_n(s, n, a);
return s;
}

static int_type not_eof(int_type c);
static char_type to_char_type(int_type c);
static int_type to_int_type(char_type c);
static bool eq_int_type(int_type c1, int_type c2);
static int_type eof();
};

int main(int, char**) {
test_string<TestChar<uint8_t, 1> >();
test_string<TestChar<uint8_t, 2> >();
test_string<TestChar<uint8_t, 3> >();
test_string<TestChar<uint8_t, 4> >();
test_string<TestChar<uint8_t, 5> >();
test_string<TestChar<uint8_t, 6> >();
test_string<TestChar<uint8_t, 7> >();
test_string<TestChar<uint8_t, 8> >();
test_string<TestChar<uint8_t, 9> >();
test_string<TestChar<uint8_t, 10> >();
test_string<TestChar<uint8_t, 11> >();
test_string<TestChar<uint8_t, 12> >();
test_string<TestChar<uint8_t, 13> >();
test_string<TestChar<uint8_t, 14> >();
test_string<TestChar<uint8_t, 15> >();
test_string<TestChar<uint8_t, 16> >();

test_string<TestChar<uint16_t, 1> >();
test_string<TestChar<uint16_t, 2> >();
test_string<TestChar<uint16_t, 3> >();
test_string<TestChar<uint16_t, 4> >();
test_string<TestChar<uint16_t, 5> >();
test_string<TestChar<uint16_t, 6> >();
test_string<TestChar<uint16_t, 7> >();
test_string<TestChar<uint16_t, 8> >();

test_string<TestChar<uint32_t, 1> >();
test_string<TestChar<uint32_t, 2> >();
test_string<TestChar<uint32_t, 3> >();
test_string<TestChar<uint32_t, 4> >();

test_string<TestChar<uint64_t, 1> >();
test_string<TestChar<uint64_t, 2> >();
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,13 @@ TEST_CONSTEXPR_CXX20 bool test() {
// https://llvm.org/PR31454
std::basic_string<VeryLarge> s;
VeryLarge vl = {};
LIBCPP_ASSERT(s.size() == 0);
s.push_back(vl);
LIBCPP_ASSERT(s.size() == 1);
s.push_back(vl);
LIBCPP_ASSERT(s.size() == 2);
s.push_back(vl);
LIBCPP_ASSERT(s.size() == 3);
}

return true;
Expand Down
Loading