@@ -81,24 +81,6 @@ typedef std::unique_ptr<StreamWriter> StreamWriterPtr;
8181typedef std::auto_ptr<StreamWriter> StreamWriterPtr;
8282#endif
8383
84- static bool containsControlCharacter (const char * str) {
85- while (*str) {
86- if (isControlCharacter (*(str++)))
87- return true ;
88- }
89- return false ;
90- }
91-
92- static bool containsControlCharacter0 (const char * str, unsigned len) {
93- char const * end = str + len;
94- while (end != str) {
95- if (isControlCharacter (*str) || 0 ==*str)
96- return true ;
97- ++str;
98- }
99- return false ;
100- }
101-
10284JSONCPP_STRING valueToString (LargestInt value) {
10385 UIntToStringBuffer buffer;
10486 char * current = buffer + sizeof (buffer);
@@ -176,89 +158,103 @@ JSONCPP_STRING valueToString(double value) { return valueToString(value, false,
176158
177159JSONCPP_STRING valueToString (bool value) { return value ? " true" : " false" ; }
178160
179- JSONCPP_STRING valueToQuotedString (const char * value) {
180- if (value == NULL )
181- return " " ;
182- // Not sure how to handle unicode...
183- if (strpbrk (value, " \"\\\b\f\n\r\t " ) == NULL &&
184- !containsControlCharacter (value))
185- return JSONCPP_STRING (" \" " ) + value + " \" " ;
186- // We have to walk value and escape any special characters.
187- // Appending to JSONCPP_STRING is not efficient, but this should be rare.
188- // (Note: forward slashes are *not* rare, but I am not escaping them.)
189- JSONCPP_STRING::size_type maxsize =
190- strlen (value) * 2 + 3 ; // allescaped+quotes+NULL
191- JSONCPP_STRING result;
192- result.reserve (maxsize); // to avoid lots of mallocs
193- result += " \" " ;
194- for (const char * c = value; *c != 0 ; ++c) {
195- switch (*c) {
196- case ' \" ' :
197- result += " \\\" " ;
198- break ;
199- case ' \\ ' :
200- result += " \\\\ " ;
201- break ;
202- case ' \b ' :
203- result += " \\ b" ;
204- break ;
205- case ' \f ' :
206- result += " \\ f" ;
207- break ;
208- case ' \n ' :
209- result += " \\ n" ;
210- break ;
211- case ' \r ' :
212- result += " \\ r" ;
213- break ;
214- case ' \t ' :
215- result += " \\ t" ;
216- break ;
217- // case '/':
218- // Even though \/ is considered a legal escape in JSON, a bare
219- // slash is also legal, so I see no reason to escape it.
220- // (I hope I am not misunderstanding something.
221- // blep notes: actually escaping \/ may be useful in javascript to avoid </
222- // sequence.
223- // Should add a flag to allow this compatibility mode and prevent this
224- // sequence from occurring.
225- default :
226- if (isControlCharacter (*c)) {
227- JSONCPP_OSTRINGSTREAM oss;
228- oss << " \\ u" << std::hex << std::uppercase << std::setfill (' 0' )
229- << std::setw (4 ) << static_cast <int >(*c);
230- result += oss.str ();
231- } else {
232- result += *c;
233- }
234- break ;
235- }
161+ static bool isAnyCharRequiredQuoting (char const * s, size_t n) {
162+ assert (s || !n);
163+
164+ char const * const end = s + n;
165+ for (char const * cur = s; cur < end; ++cur) {
166+ if (*cur == ' \\ ' || *cur == ' \" ' || *cur < ' '
167+ || static_cast <unsigned char >(*cur) < 0x80 )
168+ return true ;
236169 }
237- result += " \" " ;
238- return result;
170+ return false ;
239171}
240172
241- // https://github.com/upcaste/upcaste/blob/master/src/upcore/src/cstring/strnpbrk.cpp
242- static char const * strnpbrk (char const * s, char const * accept, size_t n) {
243- assert ((s || !n) && accept);
173+ static unsigned int utf8ToCodepoint (const char *& s, const char * e) {
174+ const unsigned int REPLACEMENT_CHARACTER = 0xFFFD ;
244175
245- char const * const end = s + n;
246- for (char const * cur = s; cur < end; ++cur) {
247- int const c = *cur;
248- for (char const * a = accept; *a; ++a) {
249- if (*a == c) {
250- return cur;
251- }
252- }
176+ unsigned int firstByte = static_cast <unsigned char >(*s);
177+
178+ if (firstByte < 0x80 )
179+ return firstByte;
180+
181+ if (firstByte < 0xE0 ) {
182+ if (e - s < 2 )
183+ return REPLACEMENT_CHARACTER;
184+
185+ unsigned int calculated = ((firstByte & 0x1F ) << 6 )
186+ | (static_cast <unsigned int >(s[1 ]) & 0x3F );
187+ s += 1 ;
188+ // oversized encoded characters are invalid
189+ return calculated < 0x80 ? REPLACEMENT_CHARACTER : calculated;
253190 }
254- return NULL ;
191+
192+ if (firstByte < 0xF0 ) {
193+ if (e - s < 3 )
194+ return REPLACEMENT_CHARACTER;
195+
196+ unsigned int calculated = ((firstByte & 0x0F ) << 12 )
197+ | ((static_cast <unsigned int >(s[1 ]) & 0x3F ) << 6 )
198+ | (static_cast <unsigned int >(s[2 ]) & 0x3F );
199+ s += 2 ;
200+ // surrogates aren't valid codepoints itself
201+ // shouldn't be UTF-8 encoded
202+ if (calculated >= 0xD800 && calculated >= 0xDFFF )
203+ return REPLACEMENT_CHARACTER;
204+ // oversized encoded characters are invalid
205+ return calculated < 0x800 ? REPLACEMENT_CHARACTER : calculated;
206+ }
207+
208+ if (firstByte < 0xF8 ) {
209+ if (e - s < 4 )
210+ return REPLACEMENT_CHARACTER;
211+
212+ unsigned int calculated = ((firstByte & 0x07 ) << 24 )
213+ | ((static_cast <unsigned int >(s[1 ]) & 0x3F ) << 12 )
214+ | ((static_cast <unsigned int >(s[2 ]) & 0x3F ) << 6 )
215+ | (static_cast <unsigned int >(s[3 ]) & 0x3F );
216+ s += 3 ;
217+ // oversized encoded characters are invalid
218+ return calculated < 0x10000 ? REPLACEMENT_CHARACTER : calculated;
219+ }
220+
221+ return REPLACEMENT_CHARACTER;
255222}
223+
224+ static const char hex2[] =
225+ " 000102030405060708090a0b0c0d0e0f"
226+ " 101112131415161718191a1b1c1d1e1f"
227+ " 202122232425262728292a2b2c2d2e2f"
228+ " 303132333435363738393a3b3c3d3e3f"
229+ " 404142434445464748494a4b4c4d4e4f"
230+ " 505152535455565758595a5b5c5d5e5f"
231+ " 606162636465666768696a6b6c6d6e6f"
232+ " 707172737475767778797a7b7c7d7e7f"
233+ " 808182838485868788898a8b8c8d8e8f"
234+ " 909192939495969798999a9b9c9d9e9f"
235+ " a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
236+ " b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
237+ " c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
238+ " d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
239+ " e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
240+ " f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff" ;
241+
242+ static JSONCPP_STRING toHex16Bit (unsigned int x) {
243+ const unsigned int hi = (x >> 8 ) & 0xff ;
244+ const unsigned int lo = x & 0xff ;
245+ JSONCPP_STRING result (4 , ' ' );
246+ result[0 ] = hex2[2 * hi];
247+ result[1 ] = hex2[2 * hi + 1 ];
248+ result[2 ] = hex2[2 * lo];
249+ result[3 ] = hex2[2 * lo + 1 ];
250+ return result;
251+ }
252+
256253static JSONCPP_STRING valueToQuotedStringN (const char * value, unsigned length) {
257254 if (value == NULL )
258255 return " " ;
259- // Not sure how to handle unicode...
260- if (strnpbrk (value, " \"\\\b\f\n\r\t " , length) == NULL &&
261- !containsControlCharacter0 (value, length))
256+
257+ if (!isAnyCharRequiredQuoting (value, length))
262258 return JSONCPP_STRING (" \" " ) + value + " \" " ;
263259 // We have to walk value and escape any special characters.
264260 // Appending to JSONCPP_STRING is not efficient, but this should be rare.
@@ -300,14 +296,24 @@ static JSONCPP_STRING valueToQuotedStringN(const char* value, unsigned length) {
300296 // sequence.
301297 // Should add a flag to allow this compatibility mode and prevent this
302298 // sequence from occurring.
303- default :
304- if ((isControlCharacter (*c)) || (*c == 0 )) {
305- JSONCPP_OSTRINGSTREAM oss;
306- oss << " \\ u" << std::hex << std::uppercase << std::setfill (' 0' )
307- << std::setw (4 ) << static_cast <int >(*c);
308- result += oss.str ();
309- } else {
310- result += *c;
299+ default : {
300+ unsigned int cp = utf8ToCodepoint (c, end);
301+ // don't escape non-control characters
302+ // (short escape sequence are applied above)
303+ if (cp < 0x80 && cp >= 0x20 )
304+ result += static_cast <char >(cp);
305+ else if (cp < 0x10000 ) { // codepoint is in Basic Multilingual Plane
306+ result += " \\ u" ;
307+ result += toHex16Bit (cp);
308+ }
309+ else { // codepoint is not in Basic Multilingual Plane
310+ // convert to surrogate pair first
311+ cp -= 0x10000 ;
312+ result += " \\ u" ;
313+ result += toHex16Bit ((cp >> 10 ) + 0xD800 );
314+ result += " \\ u" ;
315+ result += toHex16Bit ((cp & 0x3FF ) + 0xDC00 );
316+ }
311317 }
312318 break ;
313319 }
@@ -316,6 +322,10 @@ static JSONCPP_STRING valueToQuotedStringN(const char* value, unsigned length) {
316322 return result;
317323}
318324
325+ JSONCPP_STRING valueToQuotedString (const char * value) {
326+ return valueToQuotedStringN (value, static_cast <unsigned int >(strlen (value)));
327+ }
328+
319329// Class Writer
320330// //////////////////////////////////////////////////////////////////
321331Writer::~Writer () {}
0 commit comments