@@ -15,6 +15,10 @@ struct unicode_cpt_flags {
1515 SYMBOL = 0x0040 , // regex: \p{S}
1616 CONTROL = 0x0080 , // regex: \p{C}
1717 MASK_CATEGORIES = 0x00FF ,
18+ WHITESPACE = 0x0100 ,
19+ LOWERCASE = 0x0200 ,
20+ UPPERCASE = 0x0400 ,
21+ NFD = 0x0800 ,
1822 };
1923
2024 // codepoint type
@@ -34,11 +38,49 @@ struct unicode_cpt_flags {
3438
3539 // decode from uint16
3640 inline unicode_cpt_flags (const uint16_t flags = 0 ) {
41+ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
3742 *reinterpret_cast <uint16_t *>(this ) = flags;
43+ #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
44+ is_undefined = (flags & UNDEFINED) ? 1 : 0 ;
45+ is_number = (flags & NUMBER) ? 1 : 0 ;
46+ is_letter = (flags & LETTER) ? 1 : 0 ;
47+ is_separator = (flags & SEPARATOR) ? 1 : 0 ;
48+ is_accent_mark = (flags & ACCENT_MARK) ? 1 : 0 ;
49+ is_punctuation = (flags & PUNCTUATION) ? 1 : 0 ;
50+ is_symbol = (flags & SYMBOL) ? 1 : 0 ;
51+ is_control = (flags & CONTROL) ? 1 : 0 ;
52+ is_whitespace = (flags & WHITESPACE) ? 1 : 0 ;
53+ is_lowercase = (flags & LOWERCASE) ? 1 : 0 ;
54+ is_uppercase = (flags & UPPERCASE) ? 1 : 0 ;
55+ is_nfd = (flags & NFD) ? 1 : 0 ;
56+ #else
57+ #error Unexpected or undefined __BYTE_ORDER__
58+ #endif
3859 }
3960
4061 inline uint16_t as_uint () const {
62+ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4163 return *reinterpret_cast <const uint16_t *>(this );
64+ #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
65+ uint16_t result =
66+ is_undefined * UNDEFINED
67+ + is_number * NUMBER
68+ + is_letter * LETTER
69+ + is_separator * SEPARATOR
70+ + is_accent_mark * ACCENT_MARK
71+ + is_punctuation * PUNCTUATION
72+ + is_symbol * SYMBOL
73+ + is_control * CONTROL
74+ + is_whitespace * WHITESPACE
75+ + is_lowercase * LOWERCASE
76+ + is_uppercase * UPPERCASE
77+ + is_nfd * NFD
78+ ;
79+
80+ return result;
81+ #else
82+ #error Unexpected or undefined __BYTE_ORDER__
83+ #endif
4284 }
4385
4486 inline uint16_t category_flag () const {
0 commit comments