Skip to content

Commit ab60910

Browse files
[libc++][format] Discard contents since null-terminator in character arrays in formatting (#116571)
Currently, built-in `char`/`wchar_t` arrays are assumed to be null-terminated sequence with the terminator being the last element in formatting. This doesn't conform to [format.arg]/6.9. > otherwise, if `decay_t<TD>` is `char_type*` or `const char_type*`, > initializes value with `static_cast<const char_type*>(v)`; The standard wording specifies that character arrays are decayed to pointers. When the null terminator is not the last element or there's no null terminator (the latter case is UB), libc++ currently produces different results. Also fixes and hardens `formatter<CharT[N], CharT>::format` in `<__format/formatter_string.h>`. These specializations are rarely used. Fixes #115935. Also checks the preconditions in this case, which fixes #116570.
1 parent fa985b5 commit ab60910

File tree

5 files changed

+86
-24
lines changed

5 files changed

+86
-24
lines changed

libcxx/include/__format/format_arg_store.h

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <__concepts/arithmetic.h>
1818
#include <__concepts/same_as.h>
1919
#include <__config>
20+
#include <__cstddef/size_t.h>
2021
#include <__format/concepts.h>
2122
#include <__format/format_arg.h>
2223
#include <__type_traits/conditional.h>
@@ -32,6 +33,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD
3233

3334
namespace __format {
3435

36+
template <class _Arr, class _Elem>
37+
inline constexpr bool __is_bounded_array_of = false;
38+
39+
template <class _Elem, size_t _Len>
40+
inline constexpr bool __is_bounded_array_of<_Elem[_Len], _Elem> = true;
41+
3542
/// \returns The @c __arg_t based on the type of the formatting argument.
3643
///
3744
/// \pre \c __formattable<_Tp, typename _Context::char_type>
@@ -110,7 +117,7 @@ consteval __arg_t __determine_arg_t() {
110117

111118
// Char array
112119
template <class _Context, class _Tp>
113-
requires(is_array_v<_Tp> && same_as<_Tp, typename _Context::char_type[extent_v<_Tp>]>)
120+
requires __is_bounded_array_of<_Tp, typename _Context::char_type>
114121
consteval __arg_t __determine_arg_t() {
115122
return __arg_t::__string_view;
116123
}
@@ -168,13 +175,14 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu
168175
static_assert(__arg != __arg_t::__none, "the supplied type is not formattable");
169176
static_assert(__formattable_with<_Tp, _Context>);
170177

178+
using __context_char_type = _Context::char_type;
171179
// Not all types can be used to directly initialize the
172180
// __basic_format_arg_value. First handle all types needing adjustment, the
173181
// final else requires no adjustment.
174182
if constexpr (__arg == __arg_t::__char_type)
175183

176184
# if _LIBCPP_HAS_WIDE_CHARACTERS
177-
if constexpr (same_as<typename _Context::char_type, wchar_t> && same_as<_Dp, char>)
185+
if constexpr (same_as<__context_char_type, wchar_t> && same_as<_Dp, char>)
178186
return basic_format_arg<_Context>{__arg, static_cast<wchar_t>(static_cast<unsigned char>(__value))};
179187
else
180188
# endif
@@ -189,14 +197,16 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu
189197
return basic_format_arg<_Context>{__arg, static_cast<unsigned long long>(__value)};
190198
else if constexpr (__arg == __arg_t::__string_view)
191199
// Using std::size on a character array will add the NUL-terminator to the size.
192-
if constexpr (is_array_v<_Dp>)
193-
return basic_format_arg<_Context>{
194-
__arg, basic_string_view<typename _Context::char_type>{__value, extent_v<_Dp> - 1}};
195-
else
196-
// When the _Traits or _Allocator are different an implicit conversion will
197-
// fail.
200+
if constexpr (__is_bounded_array_of<_Dp, __context_char_type>) {
201+
const __context_char_type* const __pbegin = std::begin(__value);
202+
const __context_char_type* const __pzero =
203+
char_traits<__context_char_type>::find(__pbegin, extent_v<_Dp>, __context_char_type{});
204+
_LIBCPP_ASSERT_VALID_INPUT_RANGE(__pzero != nullptr, "formatting a non-null-terminated array");
198205
return basic_format_arg<_Context>{
199-
__arg, basic_string_view<typename _Context::char_type>{__value.data(), __value.size()}};
206+
__arg, basic_string_view<__context_char_type>{__pbegin, static_cast<size_t>(__pzero - __pbegin)}};
207+
} else
208+
// When the _Traits or _Allocator are different an implicit conversion will fail.
209+
return basic_format_arg<_Context>{__arg, basic_string_view<__context_char_type>{__value.data(), __value.size()}};
200210
else if constexpr (__arg == __arg_t::__ptr)
201211
return basic_format_arg<_Context>{__arg, static_cast<const void*>(__value)};
202212
else if constexpr (__arg == __arg_t::__handle)

libcxx/include/__format/formatter_string.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,15 @@
1010
#ifndef _LIBCPP___FORMAT_FORMATTER_STRING_H
1111
#define _LIBCPP___FORMAT_FORMATTER_STRING_H
1212

13+
#include <__assert>
1314
#include <__config>
1415
#include <__format/concepts.h>
1516
#include <__format/format_parse_context.h>
1617
#include <__format/formatter.h>
1718
#include <__format/formatter_output.h>
1819
#include <__format/parser_std_format_spec.h>
1920
#include <__format/write_escaped.h>
21+
#include <cstddef>
2022
#include <string>
2123
#include <string_view>
2224

@@ -94,7 +96,9 @@ struct formatter<_CharT[_Size], _CharT> : public __formatter_string<_CharT> {
9496
template <class _FormatContext>
9597
_LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator
9698
format(const _CharT (&__str)[_Size], _FormatContext& __ctx) const {
97-
return _Base::format(basic_string_view<_CharT>(__str, _Size), __ctx);
99+
const _CharT* const __pzero = char_traits<_CharT>::find(__str, _Size, _CharT{});
100+
_LIBCPP_ASSERT_VALID_INPUT_RANGE(__pzero != nullptr, "formatting a non-null-terminated array");
101+
return _Base::format(basic_string_view<_CharT>(__str, static_cast<size_t>(__pzero - __str)), __ctx);
98102
}
99103
};
100104

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// <format>
10+
11+
// Formatting non-null-terminated character arrays.
12+
13+
// REQUIRES: std-at-least-c++20, has-unix-headers, libcpp-hardening-mode={{extensive|debug}}
14+
// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing
15+
16+
#include <format>
17+
18+
#include "check_assertion.h"
19+
20+
int main(int, char**) {
21+
{
22+
const char non_null_terminated[3]{'1', '2', '3'};
23+
TEST_LIBCPP_ASSERT_FAILURE(std::format("{}", non_null_terminated), "formatting a non-null-terminated array");
24+
}
25+
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
26+
{
27+
const wchar_t non_null_terminated[3]{L'1', L'2', L'3'};
28+
TEST_LIBCPP_ASSERT_FAILURE(std::format(L"{}", non_null_terminated), "formatting a non-null-terminated array");
29+
}
30+
#endif
31+
32+
return 0;
33+
}

libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,10 @@ struct Tester {
4141
constexpr Tester(const char (&r)[N]) { __builtin_memcpy(text, r, N); }
4242
char text[N];
4343

44-
// The size of the array shouldn't include the NUL character.
45-
static const std::size_t size = N - 1;
46-
4744
template <class CharT>
4845
void
4946
test(const std::basic_string<CharT>& expected, const std::basic_string_view<CharT>& fmt, std::size_t offset) const {
50-
using Str = CharT[size];
47+
using Str = CharT[N];
5148
std::basic_format_parse_context<CharT> parse_ctx{fmt};
5249
std::formatter<Str, CharT> formatter;
5350
static_assert(std::semiregular<decltype(formatter)>);
@@ -57,16 +54,25 @@ struct Tester {
5754
assert(std::to_address(it) == std::to_address(fmt.end()) - offset);
5855

5956
std::basic_string<CharT> result;
60-
auto out = std::back_inserter(result);
57+
auto out = std::back_inserter(result);
6158
using FormatCtxT = std::basic_format_context<decltype(out), CharT>;
6259

63-
std::basic_string<CharT> buffer{text, text + N};
64-
// Note not too found of this hack
65-
Str* data = reinterpret_cast<Str*>(const_cast<CharT*>(buffer.c_str()));
66-
67-
FormatCtxT format_ctx =
68-
test_format_context_create<decltype(out), CharT>(out, std::make_format_args<FormatCtxT>(*data));
69-
formatter.format(*data, format_ctx);
60+
if constexpr (std::is_same_v<CharT, char>) {
61+
FormatCtxT format_ctx =
62+
test_format_context_create<decltype(out), CharT>(out, std::make_format_args<FormatCtxT>(text));
63+
formatter.format(text, format_ctx);
64+
}
65+
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
66+
else {
67+
Str buffer;
68+
for (std::size_t i = 0; i != N; ++i) {
69+
buffer[i] = static_cast<CharT>(text[i]);
70+
}
71+
FormatCtxT format_ctx =
72+
test_format_context_create<decltype(out), CharT>(out, std::make_format_args<FormatCtxT>(buffer));
73+
formatter.format(buffer, format_ctx);
74+
}
75+
#endif
7076
assert(result == expected);
7177
}
7278

@@ -118,8 +124,8 @@ template <class CharT>
118124
void test_array() {
119125
test_helper_wrapper<" azAZ09,./<>?">(STR(" azAZ09,./<>?"), STR("}"));
120126

121-
std::basic_string<CharT> s(CSTR("abc\0abc"), 7);
122-
test_helper_wrapper<"abc\0abc">(s, STR("}"));
127+
// Contents after embedded null terminator are not formatted.
128+
test_helper_wrapper<"abc\0abc">(STR("abc"), STR("}"));
123129

124130
test_helper_wrapper<"world">(STR("world"), STR("}"));
125131
test_helper_wrapper<"world">(STR("world"), STR("_>}"));

libcxx/test/std/utilities/format/format.functions/format_tests.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3189,6 +3189,15 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
31893189
const CharT* data = buffer;
31903190
check(SV("hello 09azAZ!"), SV("hello {}"), data);
31913191
}
3192+
{
3193+
// https://github.com/llvm/llvm-project/issues/115935
3194+
// Contents after the embedded null character are discarded.
3195+
CharT buffer[] = {CharT('a'), CharT('b'), CharT('c'), 0, CharT('d'), CharT('e'), CharT('f'), 0};
3196+
check(SV("hello abc"), SV("hello {}"), buffer);
3197+
// Even when the last element of the array is not null character.
3198+
CharT buffer2[] = {CharT('a'), CharT('b'), CharT('c'), 0, CharT('d'), CharT('e'), CharT('f')};
3199+
check(SV("hello abc"), SV("hello {}"), buffer2);
3200+
}
31923201
{
31933202
std::basic_string<CharT> data = STR("world");
31943203
check(SV("hello world"), SV("hello {}"), data);

0 commit comments

Comments
 (0)