Skip to content

Commit fa8fc31

Browse files
Fix unicode flags conversion from and to uint16_t
Bitfields are allocated in different order on s390x
1 parent 27c19c4 commit fa8fc31

File tree

1 file changed

+42
-0
lines changed

1 file changed

+42
-0
lines changed

src/unicode.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ struct unicode_cpt_flags {
1515
SYMBOL = 0x0040, // regex: \p{S}
1616
CONTROL = 0x0080, // regex: \p{C}
1717
MASK_CATEGORIES = 0x00FF,
18+
WHITESPACE = 0x0100,
19+
LOWERCASE = 0x0200,
20+
UPPERCASE = 0x0400,
21+
NFD = 0x0800,
1822
};
1923

2024
// codepoint type
@@ -34,11 +38,49 @@ struct unicode_cpt_flags {
3438

3539
// decode from uint16
3640
inline unicode_cpt_flags(const uint16_t flags = 0) {
41+
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
3742
*reinterpret_cast<uint16_t*>(this) = flags;
43+
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
44+
is_undefined = (flags & UNDEFINED) ? 1 : 0;
45+
is_number = (flags & NUMBER) ? 1 : 0;
46+
is_letter = (flags & LETTER) ? 1 : 0;
47+
is_separator = (flags & SEPARATOR) ? 1 : 0;
48+
is_accent_mark = (flags & ACCENT_MARK) ? 1 : 0;
49+
is_punctuation = (flags & PUNCTUATION) ? 1 : 0;
50+
is_symbol = (flags & SYMBOL) ? 1 : 0;
51+
is_control = (flags & CONTROL) ? 1 : 0;
52+
is_whitespace = (flags & WHITESPACE) ? 1 : 0;
53+
is_lowercase = (flags & LOWERCASE) ? 1 : 0;
54+
is_uppercase = (flags & UPPERCASE) ? 1 : 0;
55+
is_nfd = (flags & NFD) ? 1 : 0;
56+
#else
57+
#error Unexpected or undefined __BYTE_ORDER__
58+
#endif
3859
}
3960

4061
inline uint16_t as_uint() const {
62+
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4163
return *reinterpret_cast<const uint16_t*>(this);
64+
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
65+
uint16_t result =
66+
is_undefined * UNDEFINED
67+
+ is_number * NUMBER
68+
+ is_letter * LETTER
69+
+ is_separator * SEPARATOR
70+
+ is_accent_mark * ACCENT_MARK
71+
+ is_punctuation * PUNCTUATION
72+
+ is_symbol * SYMBOL
73+
+ is_control * CONTROL
74+
+ is_whitespace * WHITESPACE
75+
+ is_lowercase * LOWERCASE
76+
+ is_uppercase * UPPERCASE
77+
+ is_nfd * NFD
78+
;
79+
80+
return result;
81+
#else
82+
#error Unexpected or undefined __BYTE_ORDER__
83+
#endif
4284
}
4385

4486
inline uint16_t category_flag() const {

0 commit comments

Comments
 (0)