Skip to content

Commit 2956e37

Browse files
committed
Fix interaction between debug presentation, precision, and width for strings
1 parent f434546 commit 2956e37

File tree

2 files changed

+113
-50
lines changed

2 files changed

+113
-50
lines changed

include/fmt/format.h

Lines changed: 87 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -657,21 +657,9 @@ FMT_CONSTEXPR void for_each_codepoint(string_view s, F f) {
657657
} while (buf_ptr < buf + num_chars_left);
658658
}
659659

660-
template <typename Char>
661-
inline auto compute_width(basic_string_view<Char> s) -> size_t {
662-
return s.size();
663-
}
664-
665-
// Computes approximate display width of a UTF-8 string.
666-
FMT_CONSTEXPR inline auto compute_width(string_view s) -> size_t {
667-
size_t num_code_points = 0;
668-
// It is not a lambda for compatibility with C++14.
669-
struct count_code_points {
670-
size_t* count;
671-
FMT_CONSTEXPR auto operator()(uint32_t cp, string_view) const -> bool {
672-
*count += to_unsigned(
673-
1 +
674-
(cp >= 0x1100 &&
660+
FMT_CONSTEXPR auto display_width_of(uint32_t cp) noexcept -> size_t {
661+
return to_unsigned(
662+
1 + (cp >= 0x1100 &&
675663
(cp <= 0x115f || // Hangul Jamo init. consonants
676664
cp == 0x2329 || // LEFT-POINTING ANGLE BRACKET
677665
cp == 0x232a || // RIGHT-POINTING ANGLE BRACKET
@@ -689,12 +677,6 @@ FMT_CONSTEXPR inline auto compute_width(string_view s) -> size_t {
689677
(cp >= 0x1f300 && cp <= 0x1f64f) ||
690678
// Supplemental Symbols and Pictographs:
691679
(cp >= 0x1f900 && cp <= 0x1f9ff))));
692-
return true;
693-
}
694-
};
695-
// We could avoid branches by using utf8_decode directly.
696-
for_each_codepoint(s, count_code_points{&num_code_points});
697-
return num_code_points;
698680
}
699681

700682
template <typename T> struct is_integral : std::is_integral<T> {};
@@ -2128,35 +2110,98 @@ FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value,
21282110
return write_int<Char>(out, make_write_int_arg(value, specs.sign()), specs);
21292111
}
21302112

2131-
inline auto convert_precision_to_size(string_view s, size_t precision)
2132-
-> size_t {
2133-
size_t display_width = 0;
2134-
size_t result = s.size();
2135-
for_each_codepoint(s, [&](uint32_t, string_view sv) {
2136-
display_width += compute_width(sv);
2137-
// Stop when display width exceeds precision.
2138-
if (display_width > precision) {
2139-
result = to_unsigned(sv.begin() - s.begin());
2113+
template <typename Char, typename OutputIt,
2114+
FMT_ENABLE_IF(std::is_same<Char, char>::value)>
2115+
FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> s,
2116+
const format_specs& specs) -> OutputIt {
2117+
bool is_debug = specs.type() == presentation_type::debug;
2118+
if (specs.precision < 0 && specs.width == 0) {
2119+
auto&& it = reserve(out, s.size());
2120+
return is_debug ? write_escaped_string(it, s) : copy<char>(s, it);
2121+
}
2122+
2123+
size_t display_width_limit =
2124+
specs.precision < 0 ? SIZE_MAX : to_unsigned(specs.precision);
2125+
size_t display_width =
2126+
!is_debug || specs.precision == 0 ? 0 : 1; // Account for opening "
2127+
size_t size = !is_debug || specs.precision == 0 ? 0 : 1;
2128+
for_each_codepoint(s, [&](uint32_t cp, string_view sv) {
2129+
if (is_debug && needs_escape(cp)) {
2130+
counting_buffer<char> buf;
2131+
write_escaped_cp(basic_appender<char>(buf),
2132+
find_escape_result<char>{sv.begin(), sv.end(), cp});
2133+
// We're reinterpreting bytes as display width. That's okay
2134+
// because write_escaped_cp() only writes ASCII characters.
2135+
size_t cp_width = buf.count();
2136+
if (display_width + cp_width <= display_width_limit) {
2137+
display_width += cp_width;
2138+
size += cp_width;
2139+
// If this is the end of the string, account for closing "
2140+
if (display_width < display_width_limit && sv.end() == s.end()) {
2141+
++display_width;
2142+
++size;
2143+
}
2144+
return true;
2145+
}
2146+
2147+
size += display_width_limit - display_width;
2148+
display_width = display_width_limit;
21402149
return false;
21412150
}
2142-
return true;
2151+
2152+
size_t cp_width = display_width_of(cp);
2153+
if (cp_width + display_width <= display_width_limit) {
2154+
display_width += cp_width;
2155+
size += sv.size();
2156+
// If this is the end of the string, account for closing "
2157+
if (is_debug && display_width < display_width_limit &&
2158+
sv.end() == s.end()) {
2159+
++display_width;
2160+
++size;
2161+
}
2162+
return true;
2163+
}
2164+
2165+
return false;
21432166
});
2144-
return result;
2145-
}
21462167

2147-
template <typename Char, FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
2148-
auto convert_precision_to_size(basic_string_view<Char>, size_t precision)
2149-
-> size_t {
2150-
return precision;
2168+
struct bounded_output_iterator {
2169+
reserve_iterator<OutputIt> underlying_iterator;
2170+
size_t bound;
2171+
2172+
FMT_CONSTEXPR auto operator*() -> bounded_output_iterator& { return *this; }
2173+
FMT_CONSTEXPR auto operator++() -> bounded_output_iterator& {
2174+
return *this;
2175+
}
2176+
FMT_CONSTEXPR auto operator++(int) -> bounded_output_iterator& {
2177+
return *this;
2178+
}
2179+
FMT_CONSTEXPR auto operator=(char c) -> bounded_output_iterator& {
2180+
if (bound > 0) {
2181+
*underlying_iterator++ = c;
2182+
--bound;
2183+
}
2184+
return *this;
2185+
}
2186+
};
2187+
2188+
return write_padded<char>(
2189+
out, specs, size, display_width, [&](reserve_iterator<OutputIt> it) {
2190+
return is_debug
2191+
? write_escaped_string(bounded_output_iterator{it, size}, s)
2192+
.underlying_iterator
2193+
: copy<char>(s.data(), s.data() + size, it);
2194+
});
21512195
}
21522196

2153-
template <typename Char, typename OutputIt>
2197+
template <typename Char, typename OutputIt,
2198+
FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
21542199
FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> s,
21552200
const format_specs& specs) -> OutputIt {
21562201
auto data = s.data();
21572202
auto size = s.size();
21582203
if (specs.precision >= 0 && to_unsigned(specs.precision) < size)
2159-
size = convert_precision_to_size(s, to_unsigned(specs.precision));
2204+
size = to_unsigned(specs.precision);
21602205

21612206
bool is_debug = specs.type() == presentation_type::debug;
21622207
if (is_debug) {
@@ -2165,22 +2210,19 @@ FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> s,
21652210
size = buf.count();
21662211
}
21672212

2168-
size_t width = 0;
2169-
if (specs.width != 0) {
2170-
width =
2171-
is_debug ? size : compute_width(basic_string_view<Char>(data, size));
2172-
}
21732213
return write_padded<Char>(
2174-
out, specs, size, width, [=](reserve_iterator<OutputIt> it) {
2214+
out, specs, size, [&](reserve_iterator<OutputIt> it) {
21752215
return is_debug ? write_escaped_string(it, s)
21762216
: copy<Char>(data, data + size, it);
21772217
});
21782218
}
2219+
21792220
template <typename Char, typename OutputIt>
21802221
FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> s,
21812222
const format_specs& specs, locale_ref) -> OutputIt {
21822223
return write<Char>(out, s, specs);
21832224
}
2225+
21842226
template <typename Char, typename OutputIt>
21852227
FMT_CONSTEXPR auto write(OutputIt out, const Char* s, const format_specs& specs,
21862228
locale_ref) -> OutputIt {

test/format-test.cc

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -206,10 +206,6 @@ TEST(util_test, parse_nonnegative_int) {
206206
EXPECT_EQ(fmt::detail::parse_nonnegative_int(begin, end, -1), -1);
207207
}
208208

209-
TEST(format_impl_test, compute_width) {
210-
EXPECT_EQ(fmt::detail::compute_width("вожык"), 5);
211-
}
212-
213209
TEST(util_test, utf8_to_utf16) {
214210
auto u = fmt::detail::utf8_to_utf16("лошадка");
215211
EXPECT_EQ(L"\x043B\x043E\x0448\x0430\x0434\x043A\x0430", u.str());
@@ -887,13 +883,39 @@ TEST(format_test, width) {
887883
" 0xcafe");
888884
EXPECT_EQ(fmt::format("{:11}", 'x'), "x ");
889885
EXPECT_EQ(fmt::format("{:12}", "str"), "str ");
886+
EXPECT_EQ(fmt::format("{:*^5}", "🤡"), "*🤡**");
890887
EXPECT_EQ(fmt::format("{:*^6}", "🤡"), "**🤡**");
891888
EXPECT_EQ(fmt::format("{:*^8}", "你好"), "**你好**");
892889
EXPECT_EQ(fmt::format("{:#6}", 42.0), " 42.");
893890
EXPECT_EQ(fmt::format("{:6c}", static_cast<int>('x')), "x ");
894891
EXPECT_EQ(fmt::format("{:>06.0f}", 0.00884311), " 0");
895892
}
896893

894+
TEST(format_test, debug_presentation) {
895+
EXPECT_EQ(fmt::format("{:?}", ""), R"("")");
896+
897+
EXPECT_EQ(fmt::format("{:*<5.0?}", "\n"), R"(*****)");
898+
EXPECT_EQ(fmt::format("{:*<5.1?}", "\n"), R"("****)");
899+
EXPECT_EQ(fmt::format("{:*<5.2?}", "\n"), R"("\***)");
900+
EXPECT_EQ(fmt::format("{:*<5.3?}", "\n"), R"("\n**)");
901+
EXPECT_EQ(fmt::format("{:*<5.4?}", "\n"), R"("\n"*)");
902+
903+
EXPECT_EQ(fmt::format("{:*<5.1?}", "Σ"), R"("****)");
904+
EXPECT_EQ(fmt::format("{:*<5.2?}", "Σ"), R"("Σ***)");
905+
EXPECT_EQ(fmt::format("{:*<5.3?}", "Σ"), R"("Σ"**)");
906+
907+
EXPECT_EQ(fmt::format("{:*<5.1?}", ""), R"("****)");
908+
EXPECT_EQ(fmt::format("{:*<5.2?}", ""), R"("****)");
909+
EXPECT_EQ(fmt::format("{:*<5.3?}", ""), R"("笑**)");
910+
EXPECT_EQ(fmt::format("{:*<5.4?}", ""), R"("笑"*)");
911+
912+
EXPECT_EQ(fmt::format("{:*<8?}", "туда"), R"("туда"**)");
913+
EXPECT_EQ(fmt::format("{:*>8?}", "сюда"), R"(**"сюда")");
914+
EXPECT_EQ(fmt::format("{:*^8?}", "中心"), R"(*"中心"*)");
915+
916+
EXPECT_EQ(fmt::format("{:*^14?}", "A\t👈🤯ы猫"), R"(*"A\t👈🤯ы猫"*)");
917+
}
918+
897919
auto bad_dynamic_spec_msg = FMT_BUILTIN_TYPES
898920
? "width/precision is out of range"
899921
: "width/precision is not integer";
@@ -1134,7 +1156,6 @@ TEST(format_test, large_precision) {
11341156

11351157
TEST(format_test, utf8_precision) {
11361158
auto result = fmt::format("{:.4}", "caf\u00e9s"); // cafés
1137-
EXPECT_EQ(fmt::detail::compute_width(result), 4);
11381159
EXPECT_EQ(result, "caf\u00e9");
11391160
}
11401161

0 commit comments

Comments
 (0)