@@ -19,100 +19,53 @@ namespace peg {
1919
2020std::string_view ast_node::unescaped_view () const
2121{
22- if (std::holds_alternative<std::uint16_t >(unescaped))
23- {
24- // The whole string_value was a single unicode character.
25- std::string utf8;
22+ auto result = std::visit (
23+ [](const auto & value) noexcept {
24+ return std::string_view { value };
25+ },
26+ unescaped);
2627
27- if (unescape::utf8_append_utf32 (utf8, std::get<std::uint16_t >(unescaped)))
28- {
29- const_cast <ast_node*>(this )->unescaped = std::move (utf8);
30- }
31- else
32- {
33- throw parse_error (" invalid escaped unicode code point" , this ->begin ());
34- }
35- }
36- else if (std::holds_alternative<std::list<string_or_utf16>>(unescaped))
28+ if (result.empty ())
3729 {
38- // First convert all of the consecutive unicode sequences to UTF-8 strings together.
39- auto & values = std::get<std::list<string_or_utf16>>(const_cast <ast_node*>(this )->unescaped );
40- const auto isUtf16 = [](const string_or_utf16& value) noexcept {
41- return std::holds_alternative<std::uint16_t >(value);
42- };
43- auto itrStart = std::find_if (values.begin (), values.end (), isUtf16);
44- auto itrEnd = std::find_if_not (itrStart, values.end (), isUtf16);
45- std::list<std::string> utf8;
46-
47- if (itrStart != itrEnd)
30+ if (children.size () > 1 )
4831 {
49- while (itrStart != itrEnd)
50- {
51- std::string unescaped;
52-
53- // Translate surrogate pairs (based on unescape::unescape_j from PEGTL)
54- for (auto itr = itrStart; itr != itrEnd; ++itr)
55- {
56- const auto c = std::get<std::uint16_t >(*itr);
57-
58- if ((0xd800 <= c) && (c <= 0xdbff ) && ++itr != itrEnd)
59- {
60- const auto d = std::get<std::uint16_t >(*itr);
61-
62- if ((0xdc00 <= d) && (d <= 0xdfff ))
63- {
64- (void )unescape::utf8_append_utf32 (unescaped,
65- (((c & 0x03ff ) << 10 ) | (d & 0x03ff )) + 0x10000 );
66- continue ;
67- }
68- }
69-
70- if (!unescape::utf8_append_utf32 (unescaped, c))
71- {
72- throw parse_error (" invalid escaped unicode code point" , this ->begin ());
73- }
74- }
75-
76- utf8.push_back (std::move (unescaped));
77-
78- values.erase (itrStart, itrEnd);
79- values.insert (itrEnd, std::string_view { utf8.back () });
80-
81- itrStart = std::find_if (itrEnd, values.end (), isUtf16);
82- itrEnd = std::find_if_not (itrStart, values.end (), isUtf16);
83- }
84- }
32+ std::string joined;
8533
86- // If the string_value had multiple unescaped sub-strings, concatenate them on
87- // demand and store the result as a std::string.
88- std::string joined;
34+ joined.reserve (std::accumulate (children.cbegin (),
35+ children.cend (),
36+ size_t (0 ),
37+ [](size_t total, const std::unique_ptr<ast_node>& child) {
38+ return total + child->unescaped_view ().size ();
39+ }));
8940
90- joined.reserve (std::accumulate (values.cbegin (),
91- values.cend (),
92- size_t (0 ),
93- [](size_t total, const auto & child) {
94- return total + std::get<std::string_view>(child).size ();
95- }));
41+ for (const auto & child : children)
42+ {
43+ joined.append (child->unescaped_view ());
44+ }
9645
97- for (const auto & child : values)
46+ const_cast <ast_node*>(this )->unescaped = std::move (joined);
47+ result = std::get<std::string>(unescaped);
48+ }
49+ else if (!children.empty ())
9850 {
99- joined.append (std::get<std::string_view>(child));
51+ const_cast <ast_node*>(this )->unescaped = children.front ()->unescaped_view ();
52+ result = std::get<std::string_view>(unescaped);
10053 }
54+ else if (has_content () && is_type<escaped_unicode>())
55+ {
56+ const auto content = string_view ();
57+ memory_input<> in (content.data (), content.size (), " escaped unicode" );
58+ std::string utf8;
10159
102- const_cast <ast_node*>( this )-> unescaped = std::move (joined );
103- }
60+ utf8. reserve ((content. size () + 1 ) / 2 );
61+ unescape::unescape_j::apply (in, utf8);
10462
105- // By this point it should always be a std::string_view or a std::string.
106- if (std::holds_alternative<std::string_view>(unescaped))
107- {
108- return std::get<std::string_view>(unescaped);
109- }
110- else if (std::holds_alternative<std::string>(unescaped))
111- {
112- return std::get<std::string>(unescaped);
63+ const_cast <ast_node*>(this )->unescaped = std::move (utf8);
64+ result = std::get<std::string>(unescaped);
65+ }
11366 }
11467
115- throw parse_error ( " unexpected sub-string " , this -> begin ()) ;
68+ return result ;
11669}
11770
11871using namespace tao ::graphqlpeg;
@@ -165,20 +118,6 @@ struct ast_selector<float_value> : std::true_type
165118template <>
166119struct ast_selector <escaped_unicode> : std::true_type
167120{
168- static void transform (std::unique_ptr<ast_node>& n)
169- {
170- if (n->has_content ())
171- {
172- auto content = n->string_view ();
173-
174- n->unescaped = unescape::unhex_string<uint16_t >(content.data () + 1 ,
175- content.data () + content.size ());
176-
177- return ;
178- }
179-
180- throw parse_error (" invalid escaped unicode code point" , n->begin ());
181- }
182121};
183122
184123template <>
@@ -263,34 +202,6 @@ struct ast_selector<block_quote_character> : std::true_type
263202template <>
264203struct ast_selector <string_value> : std::true_type
265204{
266- static void transform (std::unique_ptr<ast_node>& n)
267- {
268- if (!n->children .empty ())
269- {
270- if (n->children .size () > 1 )
271- {
272- std::list<ast_node::string_or_utf16> unescaped;
273-
274- std::transform (n->children .cbegin (),
275- n->children .cend (),
276- std::back_inserter (unescaped),
277- [](const auto & child) -> ast_node::string_or_utf16 {
278- if (std::holds_alternative<std::uint16_t >(child->unescaped ))
279- {
280- return { std::get<std::uint16_t >(child->unescaped ) };
281- }
282-
283- return { child->unescaped_view () };
284- });
285-
286- n->unescaped = std::move (unescaped);
287- }
288- else
289- {
290- n->unescaped = std::move (n->children .front ()->unescaped );
291- }
292- }
293- }
294205};
295206
296207template <>
0 commit comments