@@ -76,49 +76,49 @@ inline bool IsSurrogate(char32_t c, absl::string_view src,
7676//
7777// Unescapes C escape sequences and is the reverse of CEscape().
7878//
79- // If 'source' is valid, stores the unescaped string and its size in
80- // 'dest' and 'dest_len' respectively, and returns true. Otherwise
81- // returns false and optionally stores the error description in
82- // 'error'. Set 'error' to nullptr to disable error reporting.
79+ // If `src` is valid, stores the unescaped string `dst`, and returns
80+ // true. Otherwise returns false and optionally stores the error
81+ // description in `error`. Set `error` to nullptr to disable error
82+ // reporting.
8383//
84- // 'dest' should point to a buffer that is at least as big as 'source'.
85- // 'source' and 'dest' may be the same.
86- //
87- // NOTE: any changes to this function must also be reflected in the older
88- // UnescapeCEscapeSequences().
84+ // `src` and `dst` may use the same underlying buffer.
8985// ----------------------------------------------------------------------
90- bool CUnescapeInternal (absl::string_view source, bool leave_nulls_escaped,
91- absl::Nonnull< char *> dest ,
92- absl::Nonnull<ptrdiff_t *> dest_len ,
86+
87+ bool CUnescapeInternal ( absl::string_view src, bool leave_nulls_escaped ,
88+ absl::Nonnull<std::string *> dst ,
9389 absl::Nullable<std::string*> error) {
94- char * d = dest ;
95- const char * p = source. data ();
96- const char * end = p + source. size ();
97- const char * last_byte = end - 1 ;
90+ strings_internal::STLStringResizeUninitialized (dst, src. size ()) ;
91+
92+ absl::string_view::size_type p = 0 ; // Current src position.
93+ std::string::size_type d = 0 ; // Current dst position.
9894
99- // Small optimization for case where source = dest and there's no escaping
100- while (p == d && p < end && *p != ' \\ ' ) p++, d++;
95+ // When unescaping in-place, skip any prefix that does not have escaping.
96+ if (src.data () == dst->data ()) {
97+ while (p < src.size () && src[p] != ' \\ ' ) p++, d++;
98+ }
10199
102- while (p < end ) {
103- if (*p != ' \\ ' ) {
104- * d++ = * p++;
100+ while (p < src. size () ) {
101+ if (src[p] != ' \\ ' ) {
102+ (*dst)[ d++] = src[ p++] ;
105103 } else {
106- if (++p > last_byte) { // skip past the '\\'
107- if (error) *error = " String cannot end with \\ " ;
104+ if (++p >= src.size ()) { // skip past the '\\'
105+ if (error != nullptr ) {
106+ *error = " String cannot end with \\ " ;
107+ }
108108 return false ;
109109 }
110- switch (*p ) {
111- case ' a' : * d++ = ' \a ' ; break ;
112- case ' b' : * d++ = ' \b ' ; break ;
113- case ' f' : * d++ = ' \f ' ; break ;
114- case ' n' : * d++ = ' \n ' ; break ;
115- case ' r' : * d++ = ' \r ' ; break ;
116- case ' t' : * d++ = ' \t ' ; break ;
117- case ' v' : * d++ = ' \v ' ; break ;
118- case ' \\ ' : * d++ = ' \\ ' ; break ;
119- case ' ?' : * d++ = ' \? ' ; break ; // \? Who knew?
120- case ' \' ' : * d++ = ' \' ' ; break ;
121- case ' "' : * d++ = ' \" ' ; break ;
110+ switch (src[p] ) {
111+ case ' a' : (*dst)[ d++] = ' \a ' ; break ;
112+ case ' b' : (*dst)[ d++] = ' \b ' ; break ;
113+ case ' f' : (*dst)[ d++] = ' \f ' ; break ;
114+ case ' n' : (*dst)[ d++] = ' \n ' ; break ;
115+ case ' r' : (*dst)[ d++] = ' \r ' ; break ;
116+ case ' t' : (*dst)[ d++] = ' \t ' ; break ;
117+ case ' v' : (*dst)[ d++] = ' \v ' ; break ;
118+ case ' \\ ' : (*dst)[ d++] = ' \\ ' ; break ;
119+ case ' ?' : (*dst)[ d++] = ' \? ' ; break ;
120+ case ' \' ' : (*dst)[ d++] = ' \' ' ; break ;
121+ case ' "' : (*dst)[ d++] = ' \" ' ; break ;
122122 case ' 0' :
123123 case ' 1' :
124124 case ' 2' :
@@ -128,188 +128,170 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
128128 case ' 6' :
129129 case ' 7' : {
130130 // octal digit: 1 to 3 digits
131- const char * octal_start = p;
132- unsigned int ch = static_cast <unsigned int >(*p - ' 0' ); // digit 1
133- if (p < last_byte && is_octal_digit (p[ 1 ]))
134- ch = ch * 8 + static_cast <unsigned int >(* ++p - ' 0' ); // digit 2
135- if (p < last_byte && is_octal_digit (p[ 1 ]))
136- ch = ch * 8 + static_cast <unsigned int >(* ++p - ' 0' ); // digit 3
131+ auto octal_start = p;
132+ unsigned int ch = static_cast <unsigned int >(src[p] - ' 0' ); // digit 1
133+ if (p + 1 < src. size () && is_octal_digit (src[p + 1 ]))
134+ ch = ch * 8 + static_cast <unsigned int >(src[ ++p] - ' 0' ); // digit 2
135+ if (p + 1 < src. size () && is_octal_digit (src[p + 1 ]))
136+ ch = ch * 8 + static_cast <unsigned int >(src[ ++p] - ' 0' ); // digit 3
137137 if (ch > 0xff ) {
138- if (error) {
139- *error = " Value of \\ " +
140- std::string (octal_start,
141- static_cast < size_t >( p + 1 - octal_start)) +
142- " exceeds 0xff" ;
138+ if (error != nullptr ) {
139+ *error =
140+ " Value of \\ " +
141+ std::string (src. substr (octal_start, p + 1 - octal_start)) +
142+ " exceeds 0xff" ;
143143 }
144144 return false ;
145145 }
146146 if ((ch == 0 ) && leave_nulls_escaped) {
147147 // Copy the escape sequence for the null character
148- const size_t octal_size = static_cast < size_t >(p + 1 - octal_start) ;
149- *d++ = ' \\ ' ;
150- memmove (d, octal_start, octal_size) ;
151- d += octal_size;
148+ (*dst)[d++] = ' \\ ' ;
149+ while (octal_start <= p) {
150+ (*dst)[d++] = src[octal_start++] ;
151+ }
152152 break ;
153153 }
154- * d++ = static_cast <char >(ch);
154+ (*dst)[ d++] = static_cast <char >(ch);
155155 break ;
156156 }
157157 case ' x' :
158158 case ' X' : {
159- if (p >= last_byte) {
160- if (error) *error = " String cannot end with \\ x" ;
159+ if (p + 1 >= src.size ()) {
160+ if (error != nullptr ) {
161+ *error = " String cannot end with \\ x" ;
162+ }
161163 return false ;
162- } else if (!absl::ascii_isxdigit (static_cast <unsigned char >(p[1 ]))) {
163- if (error) *error = " \\ x cannot be followed by a non-hex digit" ;
164+ } else if (!absl::ascii_isxdigit (
165+ static_cast <unsigned char >(src[p + 1 ]))) {
166+ if (error != nullptr ) {
167+ *error = " \\ x cannot be followed by a non-hex digit" ;
168+ }
164169 return false ;
165170 }
166171 unsigned int ch = 0 ;
167- const char * hex_start = p;
168- while (p < last_byte &&
169- absl::ascii_isxdigit (static_cast <unsigned char >(p[ 1 ])))
172+ auto hex_start = p;
173+ while (p + 1 < src. size () &&
174+ absl::ascii_isxdigit (static_cast <unsigned char >(src[p + 1 ]))) {
170175 // Arbitrarily many hex digits
171- ch = (ch << 4 ) + hex_digit_to_int (*++p);
176+ ch = (ch << 4 ) + hex_digit_to_int (src[++p]);
177+ }
172178 if (ch > 0xFF ) {
173- if (error) {
179+ if (error != nullptr ) {
174180 *error = " Value of \\ " +
175- std::string (hex_start,
176- static_cast <size_t >(p + 1 - hex_start)) +
181+ std::string (src.substr (hex_start, p + 1 - hex_start)) +
177182 " exceeds 0xff" ;
178183 }
179184 return false ;
180185 }
181186 if ((ch == 0 ) && leave_nulls_escaped) {
182187 // Copy the escape sequence for the null character
183- const size_t hex_size = static_cast < size_t >(p + 1 - hex_start) ;
184- *d++ = ' \\ ' ;
185- memmove (d, hex_start, hex_size) ;
186- d += hex_size;
188+ (*dst)[d++] = ' \\ ' ;
189+ while (hex_start <= p) {
190+ (*dst)[d++] = src[hex_start++] ;
191+ }
187192 break ;
188193 }
189- * d++ = static_cast <char >(ch);
194+ (*dst)[ d++] = static_cast <char >(ch);
190195 break ;
191196 }
192197 case ' u' : {
193198 // \uhhhh => convert 4 hex digits to UTF-8
194199 char32_t rune = 0 ;
195- const char * hex_start = p;
196- if (p + 4 >= end) {
197- if (error) {
198- *error = " \\ u must be followed by 4 hex digits: \\ " +
199- std::string (hex_start,
200- static_cast <size_t >(p + 1 - hex_start));
200+ auto hex_start = p;
201+ if (p + 4 >= src.size ()) {
202+ if (error != nullptr ) {
203+ *error = " \\ u must be followed by 4 hex digits" ;
201204 }
202205 return false ;
203206 }
204207 for (int i = 0 ; i < 4 ; ++i) {
205208 // Look one char ahead.
206- if (absl::ascii_isxdigit (static_cast <unsigned char >(p[ 1 ]))) {
207- rune = (rune << 4 ) + hex_digit_to_int (* ++p); // Advance p.
209+ if (absl::ascii_isxdigit (static_cast <unsigned char >(src[p + 1 ]))) {
210+ rune = (rune << 4 ) + hex_digit_to_int (src[ ++p]);
208211 } else {
209- if (error) {
212+ if (error != nullptr ) {
210213 *error = " \\ u must be followed by 4 hex digits: \\ " +
211- std::string (hex_start,
212- static_cast <size_t >(p + 1 - hex_start));
214+ std::string (src.substr (hex_start, p + 1 - hex_start));
213215 }
214216 return false ;
215217 }
216218 }
217219 if ((rune == 0 ) && leave_nulls_escaped) {
218220 // Copy the escape sequence for the null character
219- *d++ = ' \\ ' ;
220- memmove (d, hex_start, 5 ); // u0000
221- d += 5 ;
221+ (*dst)[d++] = ' \\ ' ;
222+ while (hex_start <= p) {
223+ (*dst)[d++] = src[hex_start++];
224+ }
222225 break ;
223226 }
224- if (IsSurrogate (rune, absl::string_view (hex_start, 5 ), error)) {
227+ if (IsSurrogate (rune, src. substr (hex_start, 5 ), error)) {
225228 return false ;
226229 }
227- d += strings_internal::EncodeUTF8Char (d, rune);
230+ d += strings_internal::EncodeUTF8Char (dst-> data () + d, rune);
228231 break ;
229232 }
230233 case ' U' : {
231234 // \Uhhhhhhhh => convert 8 hex digits to UTF-8
232235 char32_t rune = 0 ;
233- const char * hex_start = p;
234- if (p + 8 >= end) {
235- if (error) {
236- *error = " \\ U must be followed by 8 hex digits: \\ " +
237- std::string (hex_start,
238- static_cast <size_t >(p + 1 - hex_start));
236+ auto hex_start = p;
237+ if (p + 8 >= src.size ()) {
238+ if (error != nullptr ) {
239+ *error = " \\ U must be followed by 8 hex digits" ;
239240 }
240241 return false ;
241242 }
242243 for (int i = 0 ; i < 8 ; ++i) {
243244 // Look one char ahead.
244- if (absl::ascii_isxdigit (static_cast <unsigned char >(p[ 1 ]))) {
245+ if (absl::ascii_isxdigit (static_cast <unsigned char >(src[p + 1 ]))) {
245246 // Don't change rune until we're sure this
246247 // is within the Unicode limit, but do advance p.
247- uint32_t newrune = (rune << 4 ) + hex_digit_to_int (* ++p);
248+ uint32_t newrune = (rune << 4 ) + hex_digit_to_int (src[ ++p] );
248249 if (newrune > 0x10FFFF ) {
249- if (error) {
250- *error = " Value of \\ " +
251- std::string (hex_start,
252- static_cast < size_t >( p + 1 - hex_start)) +
253- " exceeds Unicode limit (0x10FFFF)" ;
250+ if (error != nullptr ) {
251+ *error =
252+ " Value of \\ " +
253+ std::string (src. substr (hex_start, p + 1 - hex_start)) +
254+ " exceeds Unicode limit (0x10FFFF)" ;
254255 }
255256 return false ;
256257 } else {
257258 rune = newrune;
258259 }
259260 } else {
260- if (error) {
261+ if (error != nullptr ) {
261262 *error = " \\ U must be followed by 8 hex digits: \\ " +
262- std::string (hex_start,
263- static_cast <size_t >(p + 1 - hex_start));
263+ std::string (src.substr (hex_start, p + 1 - hex_start));
264264 }
265265 return false ;
266266 }
267267 }
268268 if ((rune == 0 ) && leave_nulls_escaped) {
269269 // Copy the escape sequence for the null character
270- *d++ = ' \\ ' ;
271- memmove (d, hex_start, 9 ); // U00000000
272- d += 9 ;
270+ (*dst)[d++] = ' \\ ' ;
271+ // U00000000
272+ while (hex_start <= p) {
273+ (*dst)[d++] = src[hex_start++];
274+ }
273275 break ;
274276 }
275- if (IsSurrogate (rune, absl::string_view (hex_start, 9 ), error)) {
277+ if (IsSurrogate (rune, src. substr (hex_start, 9 ), error)) {
276278 return false ;
277279 }
278- d += strings_internal::EncodeUTF8Char (d, rune);
280+ d += strings_internal::EncodeUTF8Char (dst-> data () + d, rune);
279281 break ;
280282 }
281283 default : {
282- if (error) *error = std::string (" Unknown escape sequence: \\ " ) + *p;
284+ if (error != nullptr ) {
285+ *error = std::string (" Unknown escape sequence: \\ " ) + src[p];
286+ }
283287 return false ;
284288 }
285289 }
286- p++; // read past letter we escaped
290+ p++; // Read past letter we escaped.
287291 }
288292 }
289- *dest_len = d - dest;
290- return true ;
291- }
292293
293- // ----------------------------------------------------------------------
294- // CUnescapeInternal()
295- //
296- // Same as above but uses a std::string for output. 'source' and 'dest'
297- // may be the same.
298- // ----------------------------------------------------------------------
299- bool CUnescapeInternal (absl::string_view source, bool leave_nulls_escaped,
300- absl::Nonnull<std::string*> dest,
301- absl::Nullable<std::string*> error) {
302- strings_internal::STLStringResizeUninitialized (dest, source.size ());
303-
304- ptrdiff_t dest_size;
305- if (!CUnescapeInternal (source,
306- leave_nulls_escaped,
307- &(*dest)[0 ],
308- &dest_size,
309- error)) {
310- return false ;
311- }
312- dest->erase (static_cast <size_t >(dest_size));
294+ dst->erase (d);
313295 return true ;
314296}
315297
0 commit comments