@@ -12,7 +12,7 @@ struct variant2tuple<std::variant<As...>> { using type = std::tuple<As...>; };
1212// Re: printing
1313// -----------------------------------------------------------------------------
1414
15- // name: general
15+ // name: specific cases
1616#define JSON_NAME (type ) \
1717 inline std::string name (const type &) { return #type; }
1818
@@ -39,6 +39,17 @@ JSON_NAME(literal)
3939
4040#undef JSON_NAME
4141
42+ // name: for atom
43+ // Remark: If preprocessor directives are such that atom<T> == T, then name(),
44+ // as defined below, without the class = require<> SFINAE, would be infinitely
45+ // recursive. It presumably wouldn't be called, because, above, we define each
46+ // case we intend to use. To be exceedingly proper, though, we have the SFINAE.
47+ template <class T , class = require<!same<atom<T>,T>>>
48+ std::string name (const atom<T> &)
49+ {
50+ return name (T ());
51+ }
52+
4253// name: for number
4354inline std::string name (const number &n)
4455{
@@ -48,7 +59,7 @@ inline std::string name(const number &n)
4859 );
4960}
5061
51- // detail:: print, for array and object
62+ // print, for array and object
5263template <class ACTION , class T >
5364void print (
5465 const T &obj, std::ostream &os,
@@ -62,7 +73,7 @@ void print(
6273 os << std::setw (0 ) << str + ch + (colors ? json::color::reset : " " );
6374}
6475
65- // detail:: print, for other types
76+ // print, for other types
6677template <class ACTION , class T >
6778void print (
6879 const T &obj, std::ostream &os, const std::string &content,
@@ -147,7 +158,7 @@ inline void expect(
147158// nocasecmp
148159// Case-insensitive std::string comparison.
149160// The old C language strcasecmp() is nonstandard. A modern, true caseless
150- // std::string comparison would depend on, e.g., locale; but the following
161+ // std::string comparison would depend on, e.g., locale, but the following
151162// should suffice for our purposes.
152163inline bool nocasecmp (const std::string &one, const std::string &two)
153164{
@@ -207,4 +218,102 @@ literal many(
207218 return literal (flags & literal::self ? text+suffix : " " );
208219} // many
209220
221+
222+ // -----------------------------------------------------------------------------
223+ // Support for reading string escapes of the form \u####
224+ // -----------------------------------------------------------------------------
225+
226+ // ------------------------
227+ // Constants, functions
228+ // ------------------------
229+
230+ // Ranges for surrogates.
231+ // Remark: each of [himin..himax] and [lomin..lomax]
232+ // has 1024 values inclusive. Also, lomin == himax+1.
233+ inline constexpr int himin = 0xD800 ; // 0b 11011 000 00000000
234+ inline constexpr int himax = 0xDBFF ; // 0b 11011 011 11111111
235+ inline constexpr int lomin = 0xDC00 ; // 0b 11011 100 00000000
236+ inline constexpr int lomax = 0xDFFF ; // 0b 11011 111 11111111
237+ inline constexpr unsigned tweak =
238+ unsigned (himin << 10 ) + unsigned (lomin) - 0x10000u ;
239+
240+ // High/low surrogate?
241+ inline bool hi (const int p) { return himin <= p && p <= himax; }
242+ inline bool lo (const int p) { return lomin <= p && p <= lomax; }
243+
244+ // ------------------------
245+ // codepoint
246+ // ------------------------
247+
248+ // \uabcd ==> (a << 12) + (b << 8) + (c << 4) + (d << 0)
249+ inline int codepoint (
250+ const std::string &context, std::istream &is,
251+ std::string &token // representation of hex number, for diagnostic printing
252+ ) {
253+ int ret = 0 , ch;
254+ token = " \\ u" ;
255+ for (const unsigned shift : { 12u , 8u , 4u , 0u }) {
256+ if ((ch = is.get ()) == EOF)
257+ error (context +
258+ " Expected 4-character hex code; reached EOF instead." , &is);
259+ else if (' 0' <= ch && ch <= ' 9' ) ret += int ((unsigned (ch)-48u ) << shift);
260+ else if (' A' <= ch && ch <= ' F' ) ret += int ((unsigned (ch)-55u ) << shift);
261+ else if (' a' <= ch && ch <= ' f' ) ret += int ((unsigned (ch)-87u ) << shift);
262+ else
263+ error (context +
264+ " Invalid hex digit found while reading \\ u####." , &is);
265+ token += ch;
266+ }
267+ return ret;
268+ }
269+
270+ // ------------------------
271+ // unicode
272+ // ------------------------
273+
274+ inline void unicode (
275+ const std::string &context, std::istream &is,
276+ std::string &str
277+ ) {
278+ std::string one, two;
279+ int first = codepoint (context,is,one), second;
280+
281+ static const std::string
282+ hi_before_lo = " A high surrogate must precede the low surrogate" ,
283+ lo_follow_hi = " A low surrogate must follow the high surrogate" ;
284+
285+ if (lo (first))
286+ error (context + hi_before_lo + " " + one + " .\n " +
287+ " There was no such high surrogate." , &is);
288+ if (hi (first)) {
289+ if (is.get () != ' \\ ' || is.get () != ' u' )
290+ error (context + lo_follow_hi + " " + one + " .\n " +
291+ " There is no such low surrogate." , &is);
292+ if (!lo (second = codepoint (context,is,two)))
293+ error (context + lo_follow_hi + " " + one + " .\n " +
294+ two + " is not a low surrogate." , &is);
295+ first = (unsigned (first) << 10u ) + unsigned (second) - tweak;
296+ }
297+
298+ if (first <= 127 ) {
299+ // 0bbbbbbb (ASCII case)
300+ str += int (first);
301+ } else if (first <= 2047 ) {
302+ // 110bbbbb 10bbbbbb
303+ str += int (0b11000000u | ((unsigned (first) >> 6u ) ));
304+ str += int (0b10000000u | ((unsigned (first) ) & 0b00111111u ));
305+ } else if (first <= 65535 ) {
306+ // 1110bbbb 10bbbbbb 10bbbbbb
307+ str += int (0b11100000u | ((unsigned (first) >> 12u ) ));
308+ str += int (0b10000000u | ((unsigned (first) >> 6u ) & 0b00111111u ));
309+ str += int (0b10000000u | ((unsigned (first) ) & 0b00111111u ));
310+ } else {
311+ // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
312+ str += int (0b11110000u | ((unsigned (first) >> 18u ) ));
313+ str += int (0b10000000u | ((unsigned (first) >> 12u ) & 0b00111111u ));
314+ str += int (0b10000000u | ((unsigned (first) >> 6u ) & 0b00111111u ));
315+ str += int (0b10000000u | ((unsigned (first) ) & 0b00111111u ));
316+ }
317+ }
318+
210319} // namespace detail
0 commit comments