Skip to content

Commit a49524d

Browse files
committed
Breaking API changes! Rename char size() method to char_size(). Add string size methods in API.
1 parent 44f3560 commit a49524d

File tree

2 files changed

+138
-87
lines changed

2 files changed

+138
-87
lines changed

include/ww898/utf_converters.hpp

Lines changed: 80 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -45,17 +45,17 @@ static uint16_t const max_surrogate_high = 0xDBFF;
4545
static uint16_t const min_surrogate_low = 0xDC00;
4646
static uint16_t const max_surrogate_low = 0xDFFF;
4747

48-
inline bool is_surrogate_high(uint32_t const cp)
48+
inline bool is_surrogate_high(uint32_t const cp) throw()
4949
{
5050
return min_surrogate_high <= cp && cp <= max_surrogate_high;
5151
}
5252

53-
inline bool is_surrogate_low(uint32_t const cp)
53+
inline bool is_surrogate_low(uint32_t const cp) throw()
5454
{
5555
return min_surrogate_low <= cp && cp <= max_surrogate_low;
5656
}
5757

58-
inline bool is_surrogate(uint32_t const cp)
58+
inline bool is_surrogate(uint32_t const cp) throw()
5959
{
6060
return min_surrogate <= cp && cp <= max_surrogate;
6161
}
@@ -69,32 +69,33 @@ struct utf8 final
6969
static_assert(max_code_point == (1u << 31) - 1u, "Invalid maximum supported code point");
7070

7171
template<
72-
typename It>
73-
static size_t size(It & it)
72+
typename It,
73+
typename NextFn>
74+
static size_t sizech(It & it, NextFn && next_fn)
7475
{
75-
uint8_t const chf = *it;
76+
uint8_t const chf = *it++;
7677
if (chf < 0x80)
7778
return 1;
7879
else if (chf < 0xC0)
7980
throw std::runtime_error("Unexpected UTF8 slave symbol at master position");
80-
else if (chf < 0xE0)
81+
else if (next_fn(it), chf < 0xE0)
8182
return 2;
82-
else if (chf < 0xF0)
83+
else if (next_fn(it), chf < 0xF0)
8384
return 3;
84-
else if (chf < 0xF8)
85+
else if (next_fn(it), chf < 0xF8)
8586
return 4;
86-
else if (chf < 0xFC)
87+
else if (next_fn(it), chf < 0xFC)
8788
return 5;
88-
else if (chf < 0xFE)
89+
else if (next_fn(it), chf < 0xFE)
8990
return 6;
9091
else
9192
throw std::runtime_error("Invalid UTF8 master symbol");
9293
}
9394

9495
template<
9596
typename It,
96-
typename VerifyIt>
97-
static uint32_t read(It & it, VerifyIt && verify_it)
97+
typename VerifyFn>
98+
static uint32_t read(It & it, VerifyFn && verify_fn)
9899
{
99100
uint8_t const chf = *it++;
100101
if (chf < 0x80) // 0xxx_xxxx
@@ -132,7 +133,7 @@ struct utf8 final
132133
throw std::runtime_error("Invalid UTF8 master symbol");
133134
while (extra-- > 0)
134135
{
135-
verify_it();
136+
verify_fn(it);
136137
uint8_t const chn = *it++;
137138
if (chn < 0x80 || 0xC0 <= chn)
138139
throw std::runtime_error("Invalid UTF8 slave symbol");
@@ -196,29 +197,30 @@ struct utf16 final
196197
static_assert(max_code_point == 0x10000u + (1u << 20) - 1u, "Invalid maximum supported code point");
197198

198199
template<
199-
typename It>
200-
static size_t size(It & it)
200+
typename It,
201+
typename NextFn>
202+
static size_t sizech(It & it, NextFn && next_fn)
201203
{
202-
uint16_t const chf = *it;
204+
uint16_t const chf = *it++;
203205
if (chf < 0xD800 || 0xE000 <= chf)
204206
return 1;
205-
else if (chf < 0xDC00)
207+
else if (next_fn(it), chf < 0xDC00)
206208
return 2;
207209
else
208210
throw std::runtime_error("Unexpected UTF16 slave symbol at master position");
209211
}
210212

211213
template<
212214
typename It,
213-
typename VerifyIt>
214-
static uint32_t read(It & it, VerifyIt && verify_it)
215+
typename VerifyFn>
216+
static uint32_t read(It & it, VerifyFn && verify_fn)
215217
{
216218
uint16_t const chf = *it++;
217219
if (chf < 0xD800 || 0xE000 <= chf) // [0x0000‥0xD7FF] or [0xE000‥0xFFFF]
218220
return chf;
219-
else if (chf < 0xDC00) // [0xD800‥0xDBFF] [0xDC00‥0xDFFF]
221+
else if (chf < 0xDC00) // [0xD800‥0xDBFF] [0xDC00‥0xDFFF]
220222
{
221-
verify_it();
223+
verify_fn(it);
222224
uint16_t const chn = *it++;
223225
if (chn < 0xDC00 || 0xE000 <= chn)
224226
throw std::runtime_error("Invalid UTF16 slave symbol");
@@ -258,16 +260,18 @@ struct utf32 final
258260
static_assert(max_code_point == (1u << 31) - 1u, "Invalid maximum supported code point");
259261

260262
template<
261-
typename It>
262-
static size_t size(It &)
263+
typename It,
264+
typename NextFn>
265+
static size_t sizech(It & it, NextFn &&)
263266
{
267+
++it;
264268
return 1;
265269
}
266270

267271
template<
268272
typename It,
269-
typename VerifyIt>
270-
static uint32_t read(It & it, VerifyIt &&)
273+
typename VerifyFn>
274+
static uint32_t read(It & it, VerifyFn &&)
271275
{
272276
return *it++;
273277
}
@@ -286,9 +290,36 @@ struct utf32 final
286290
template<
287291
typename Utf,
288292
typename It>
289-
size_t size(It it)
293+
size_t sizech(It it)
294+
{
295+
return Utf::sizech(it, [] (It &) {});
296+
}
297+
298+
template<
299+
typename Utf,
300+
typename It>
301+
size_t sizez(It it)
290302
{
291-
return Utf::size(it);
303+
size_t size = 0;
304+
while (*it)
305+
size += Utf::sizech(it, [] (It & it) { ++it; });
306+
return size;
307+
}
308+
309+
template<
310+
typename Utf,
311+
typename It>
312+
size_t size(It it, It const eit)
313+
{
314+
auto const next_fn = [&eit] (It & it)
315+
{
316+
if (it++ == eit)
317+
throw std::runtime_error("Not enough input");
318+
};
319+
size_t size = 0;
320+
while (it != eit)
321+
size += Utf::sizech(it, next_fn);
322+
return size;
292323
}
293324

294325
template<
@@ -300,7 +331,7 @@ Oit convz(It it, Oit oit)
300331
{
301332
while (true)
302333
{
303-
auto const cp = Utf::read(it, [] {});
334+
auto const cp = Utf::read(it, [] (It &) {});
304335
if (!cp)
305336
return oit;
306337
Outf::write(cp, oit);
@@ -324,14 +355,16 @@ template<
324355
typename Oit>
325356
struct conv_strategy<Utf, Outf, It, Oit, false> final
326357
{
327-
static void func(It it, It const eit, Oit oit)
358+
Oit operator()(It it, It const eit, Oit oit)
328359
{
360+
auto const verify_fn = [&eit] (It & it)
361+
{
362+
if (it == eit)
363+
throw std::runtime_error("Not enough input");
364+
};
329365
while (it != eit)
330-
Outf::write(Utf::read(it, [&it, &eit]
331-
{
332-
if (it == eit)
333-
throw std::runtime_error("Not enough input");
334-
}), oit);
366+
Outf::write(Utf::read(it, verify_fn), oit);
367+
return oit;
335368
}
336369
};
337370

@@ -342,20 +375,22 @@ template<
342375
typename Oit>
343376
struct conv_strategy<Utf, Outf, It, Oit, true> final
344377
{
345-
static void func(It it, It const eit, Oit oit)
378+
Oit operator()(It it, It const eit, Oit oit)
346379
{
347380
if (static_cast<size_t>(eit - it) >= Utf::max_supported_symbol_size)
348381
{
349382
auto const fast_eit = eit - Utf::max_supported_symbol_size;
350383
while (it < fast_eit)
351-
Outf::write(Utf::read(it, [] {}), oit);
384+
Outf::write(Utf::read(it, [] (It &) {}), oit);
352385
}
386+
auto const verify_fn = [&eit] (It & it)
387+
{
388+
if (it == eit)
389+
throw std::runtime_error("Not enough input");
390+
};
353391
while (it != eit)
354-
Outf::write(Utf::read(it, [&it, &eit]
355-
{
356-
if (it == eit)
357-
throw std::runtime_error("Not enough input");
358-
}), oit);
392+
Outf::write(Utf::read(it, verify_fn), oit);
393+
return oit;
359394
}
360395
};
361396

@@ -369,12 +404,12 @@ template<
369404
bool is_random_access_iterator = std::is_base_of<
370405
std::random_access_iterator_tag,
371406
typename std::iterator_traits<typename std::decay<It>::type>::iterator_category>::value>
372-
void conv(It && it, It && eit, Oit && oit)
407+
Oit conv(It && it, It && eit, Oit && oit)
373408
{
374-
detail::conv_strategy<Utf, Outf,
409+
return detail::conv_strategy<Utf, Outf,
375410
typename std::decay<It>::type,
376411
typename std::decay<Oit>::type,
377-
is_random_access_iterator>::func(
412+
is_random_access_iterator>()(
378413
std::forward<It>(it),
379414
std::forward<It>(eit),
380415
std::forward<Oit>(oit));

test/utf_converters_test.cpp

Lines changed: 58 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -452,88 +452,104 @@ BOOST_DATA_TEST_CASE(conv_utf8_to_utf32_supported, boost::make_iterator_range(su
452452
BOOST_DATA_TEST_CASE(size_utf8, boost::make_iterator_range(unicode_test_data), tuple)
453453
{
454454
static auto const max_symbol_size = utf::utf8::max_unicode_symbol_size;
455-
std::string res;
456-
for (auto str = tuple.utf8.c_str(); *str;)
455+
size_t total_size = 0;
456+
for (auto str = tuple.utf8.data(); *str;)
457457
{
458-
auto const size = utf::size<utf::utf8>(str);
458+
auto const size = utf::sizech<utf::utf8>(str);
459459
BOOST_TEST_REQUIRE(!!size);
460460
BOOST_TEST_REQUIRE(size <= max_symbol_size);
461-
auto const end_str = str + size;
462-
res += std::string(str, end_str);
463-
str = end_str;
461+
total_size += size;
462+
str += size;
464463
}
465-
auto const success = res == tuple.utf8;
466-
BOOST_TEST_REQUIRE(success);
464+
465+
auto const total_size1 = utf::sizez<utf::utf8>(tuple.utf8.data());
466+
BOOST_TEST_REQUIRE(tuple.utf8.size() == total_size1);
467+
468+
auto const total_size2 = utf::size<utf::utf8>(tuple.utf8.begin(), tuple.utf8.end());
469+
BOOST_TEST_REQUIRE(tuple.utf8.size() == total_size2);
467470
}
468471

469472
BOOST_DATA_TEST_CASE(size_utf16, boost::make_iterator_range(unicode_test_data), tuple)
470473
{
471474
static auto const max_symbol_size = utf::utf16::max_unicode_symbol_size;
472-
std::u16string res;
473-
for (auto str = tuple.utf16.c_str(); *str;)
475+
size_t total_size = 0;
476+
for (auto str = tuple.utf16.data(); *str;)
474477
{
475-
auto const size = utf::size<utf::utf16>(str);
478+
auto const size = utf::sizech<utf::utf16>(str);
476479
BOOST_TEST_REQUIRE(!!size);
477480
BOOST_TEST_REQUIRE(size <= max_symbol_size);
478-
auto const end_str = str + size;
479-
res += std::u16string(str, end_str);
480-
str = end_str;
481+
total_size += size;
482+
str += size;
481483

482484
}
483-
auto const success = res == tuple.utf16;
484-
BOOST_TEST_REQUIRE(success);
485+
486+
auto const total_size1 = utf::sizez<utf::utf16>(tuple.utf16.data());
487+
BOOST_TEST_REQUIRE(tuple.utf16.size() == total_size1);
488+
489+
auto const total_size2 = utf::size<utf::utf16>(tuple.utf16.begin(), tuple.utf16.end());
490+
BOOST_TEST_REQUIRE(tuple.utf16.size() == total_size2);
485491
}
486492

487493
BOOST_DATA_TEST_CASE(size_utf32, boost::make_iterator_range(unicode_test_data), tuple)
488494
{
489495
static auto const max_symbol_size = utf::utf32::max_unicode_symbol_size;
490-
std::u32string res;
491-
for (auto str = tuple.utf32.c_str(); *str;)
496+
size_t total_size = 0;
497+
for (auto str = tuple.utf32.data(); *str;)
492498
{
493-
auto const size = utf::size<utf::utf32>(str);
499+
auto const size = utf::sizech<utf::utf32>(str);
494500
BOOST_TEST_REQUIRE(!!size);
495501
BOOST_TEST_REQUIRE(size <= max_symbol_size);
496-
auto const end_str = str + size;
497-
res += std::u32string(str, end_str);
498-
str = end_str;
499-
502+
total_size += size;
503+
str += size;
500504
}
501-
auto const success = res == tuple.utf32;
502-
BOOST_TEST_REQUIRE(success);
505+
506+
auto const total_size1 = utf::sizez<utf::utf32>(tuple.utf32.data());
507+
BOOST_TEST_REQUIRE(tuple.utf32.size() == total_size1);
508+
509+
auto const total_size2 = utf::size<utf::utf32>(tuple.utf32.begin(), tuple.utf32.end());
510+
BOOST_TEST_REQUIRE(tuple.utf32.size() == total_size2);
503511
}
504512

505513
BOOST_DATA_TEST_CASE(size_utf8_supported, boost::make_iterator_range(supported_test_data), tuple)
506514
{
507515
static auto const max_symbol_size = utf::utf8::max_supported_symbol_size;
508-
std::string res;
509-
for (auto str = tuple.utf8.c_str(); *str;)
516+
size_t total_size = 0;
517+
for (auto str = tuple.utf8.data(); *str;)
510518
{
511-
auto const size = utf::size<utf::utf8>(str);
519+
auto const size = utf::sizech<utf::utf8>(str);
512520
BOOST_TEST_REQUIRE(!!size);
513521
BOOST_TEST_REQUIRE(size <= max_symbol_size);
514-
auto const end_str = str + size;
515-
res += std::string(str, end_str);
516-
str = end_str;
522+
total_size += size;
523+
str += size;
517524
}
518-
auto const success = res == tuple.utf8;
519-
BOOST_TEST_REQUIRE(success);
525+
BOOST_TEST_REQUIRE(tuple.utf8.size() == total_size);
526+
527+
auto const total_size1 = utf::sizez<utf::utf8>(tuple.utf8.data());
528+
BOOST_TEST_REQUIRE(tuple.utf8.size() == total_size1);
529+
530+
auto const total_size2 = utf::size<utf::utf8>(tuple.utf8.begin(), tuple.utf8.end());
531+
BOOST_TEST_REQUIRE(tuple.utf8.size() == total_size2);
520532
}
521533

522534
BOOST_DATA_TEST_CASE(size_utf32_supported, boost::make_iterator_range(supported_test_data), tuple)
523535
{
524-
static auto const max_symbol_size = utf::utf32::max_supported_symbol_size;
525-
std::u32string res;
526-
for (auto str = tuple.utf32.c_str(); *str;)
536+
static size_t const max_symbol_size = utf::utf32::max_supported_symbol_size;
537+
size_t total_size = 0;
538+
for (auto str = tuple.utf32.data(); *str;)
527539
{
528-
auto const size = utf::size<utf::utf32>(str);
540+
auto const size = utf::sizech<utf::utf32>(str);
529541
BOOST_TEST_REQUIRE(!!size);
530542
BOOST_TEST_REQUIRE(size <= max_symbol_size);
531-
auto const end_str = str + size;
532-
res += std::u32string(str, end_str);
533-
str = end_str;
543+
total_size += size;
544+
str += size;
534545
}
535-
auto const success = res == tuple.utf32;
536-
BOOST_TEST_REQUIRE(success);
546+
BOOST_TEST_REQUIRE(tuple.utf32.size() == total_size);
547+
548+
auto const total_size1 = utf::sizez<utf::utf32>(tuple.utf32.data());
549+
BOOST_TEST_REQUIRE(tuple.utf32.size() == total_size1);
550+
551+
auto const total_size2 = utf::size<utf::utf32>(tuple.utf32.begin(), tuple.utf32.end());
552+
BOOST_TEST_REQUIRE(tuple.utf32.size() == total_size2);
537553
}
538554

539555
namespace {

0 commit comments

Comments
 (0)