@@ -550,7 +550,7 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16)
550
550
}
551
551
c = (c << 4 ) | h ;
552
552
}
553
- if (c >= 0xd800 && c < 0xdc00 &&
553
+ if (is_hi_surrogate ( c ) &&
554
554
allow_utf16 == 2 && p [0 ] == '\\' && p [1 ] == 'u' ) {
555
555
/* convert an escaped surrogate pair into a
556
556
unicode char */
@@ -561,9 +561,9 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16)
561
561
break ;
562
562
c1 = (c1 << 4 ) | h ;
563
563
}
564
- if (i == 4 && c1 >= 0xdc00 && c1 < 0xe000 ) {
564
+ if (i == 4 && is_lo_surrogate ( c1 ) ) {
565
565
p += 6 ;
566
- c = ((( c & 0x3ff ) << 10 ) | ( c1 & 0x3ff )) + 0x10000 ;
566
+ c = from_surrogate ( c , c1 ) ;
567
567
}
568
568
}
569
569
}
@@ -1092,10 +1092,10 @@ static int re_parse_group_name(char *buf, int buf_size, const uint8_t **pp)
1092
1092
break ;
1093
1093
} else if (c >= 128 ) {
1094
1094
c = unicode_from_utf8 (p , UTF8_CHAR_LEN_MAX , & p );
1095
- if (c >= 0xD800 && c <= 0xDBFF ) {
1095
+ if (is_hi_surrogate ( c ) ) {
1096
1096
d = unicode_from_utf8 (p , UTF8_CHAR_LEN_MAX , & p1 );
1097
- if (d >= 0xDC00 && d <= 0xDFFF ) {
1098
- c = 0x10000 + 0x400 * ( c - 0xD800 ) + ( d - 0xDC00 );
1097
+ if (is_lo_surrogate ( d ) ) {
1098
+ c = from_surrogate ( c , d );
1099
1099
p = p1 ;
1100
1100
}
1101
1101
}
@@ -1935,88 +1935,81 @@ static BOOL is_word_char(uint32_t c)
1935
1935
if (cbuf_type == 0) { \
1936
1936
c = *cptr++; \
1937
1937
} else { \
1938
- uint32_t __c1; \
1939
- c = *(uint16_t *)cptr; \
1940
- cptr += 2; \
1941
- if (c >= 0xd800 && c < 0xdc00 && \
1942
- cbuf_type == 2 && cptr < cbuf_end) { \
1943
- __c1 = *(uint16_t *)cptr; \
1944
- if (__c1 >= 0xdc00 && __c1 < 0xe000) { \
1945
- c = (((c & 0x3ff) << 10) | (__c1 & 0x3ff)) + 0x10000; \
1946
- cptr += 2; \
1947
- } \
1948
- } \
1938
+ const uint16_t *_p = (uint16_t *)cptr; \
1939
+ const uint16_t *_end = (uint16_t *)cbuf_end; \
1940
+ c = *_p++; \
1941
+ if (is_hi_surrogate(c)) \
1942
+ if (cbuf_type == 2) \
1943
+ if (_p < _end) \
1944
+ if (is_lo_surrogate(*_p)) \
1945
+ c = from_surrogate(c, *_p++); \
1946
+ cptr = (void *) _p; \
1949
1947
} \
1950
1948
} while (0)
1951
1949
1952
- #define PEEK_CHAR (c , cptr , cbuf_end ) \
1953
- do { \
1954
- if (cbuf_type == 0) { \
1955
- c = cptr[0]; \
1956
- } else { \
1957
- uint32_t __c1; \
1958
- c = ((uint16_t *)cptr)[0]; \
1959
- if (c >= 0xd800 && c < 0xdc00 && \
1960
- cbuf_type == 2 && (cptr + 2) < cbuf_end) { \
1961
- __c1 = ((uint16_t *)cptr)[1]; \
1962
- if (__c1 >= 0xdc00 && __c1 < 0xe000) { \
1963
- c = (((c & 0x3ff) << 10) | (__c1 & 0x3ff)) + 0x10000; \
1964
- } \
1965
- } \
1966
- } \
1950
+ #define PEEK_CHAR (c , cptr , cbuf_end ) \
1951
+ do { \
1952
+ if (cbuf_type == 0) { \
1953
+ c = cptr[0]; \
1954
+ } else { \
1955
+ const uint16_t *_p = (uint16_t *)cptr; \
1956
+ const uint16_t *_end = (uint16_t *)cbuf_end; \
1957
+ c = *_p++; \
1958
+ if (is_hi_surrogate(c)) \
1959
+ if (cbuf_type == 2) \
1960
+ if (_p < _end) \
1961
+ if (is_lo_surrogate(*_p)) \
1962
+ c = from_surrogate(c, *_p++); \
1963
+ } \
1967
1964
} while (0)
1968
1965
1969
- #define PEEK_PREV_CHAR (c , cptr , cbuf_start ) \
1970
- do { \
1971
- if (cbuf_type == 0) { \
1972
- c = cptr[-1]; \
1973
- } else { \
1974
- uint32_t __c1; \
1975
- c = ((uint16_t *)cptr)[-1]; \
1976
- if (c >= 0xdc00 && c < 0xe000 && \
1977
- cbuf_type == 2 && (cptr - 4) >= cbuf_start) { \
1978
- __c1 = ((uint16_t *)cptr)[-2]; \
1979
- if (__c1 >= 0xd800 && __c1 < 0xdc00 ) { \
1980
- c = (((__c1 & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000; \
1981
- } \
1982
- } \
1966
+ #define PEEK_PREV_CHAR (c , cptr , cbuf_start ) \
1967
+ do { \
1968
+ if (cbuf_type == 0) { \
1969
+ c = cptr[-1]; \
1970
+ } else { \
1971
+ const uint16_t *_p = (uint16_t *)cptr - 1; \
1972
+ const uint16_t *_start = (uint16_t *)cbuf_start; \
1973
+ c = *_p; \
1974
+ if (is_lo_surrogate(c)) \
1975
+ if (cbuf_type == 2) \
1976
+ if (_p > _start) \
1977
+ if (is_hi_surrogate(*--_p)) \
1978
+ c = from_surrogate(*_p, c); \
1983
1979
} \
1984
1980
} while (0)
1985
1981
1986
- #define GET_PREV_CHAR (c , cptr , cbuf_start ) \
1987
- do { \
1988
- if (cbuf_type == 0) { \
1989
- cptr--; \
1990
- c = cptr[0]; \
1991
- } else { \
1992
- uint32_t __c1; \
1993
- cptr -= 2; \
1994
- c = ((uint16_t *)cptr)[0]; \
1995
- if (c >= 0xdc00 && c < 0xe000 && \
1996
- cbuf_type == 2 && cptr > cbuf_start) { \
1997
- __c1 = ((uint16_t *)cptr)[-1]; \
1998
- if (__c1 >= 0xd800 && __c1 < 0xdc00 ) { \
1999
- cptr -= 2; \
2000
- c = (((__c1 & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000; \
2001
- } \
2002
- } \
1982
+ #define GET_PREV_CHAR (c , cptr , cbuf_start ) \
1983
+ do { \
1984
+ if (cbuf_type == 0) { \
1985
+ cptr--; \
1986
+ c = cptr[0]; \
1987
+ } else { \
1988
+ const uint16_t *_p = (uint16_t *)cptr - 1; \
1989
+ const uint16_t *_start = (uint16_t *)cbuf_start; \
1990
+ c = *_p; \
1991
+ if (is_lo_surrogate(c)) \
1992
+ if (cbuf_type == 2) \
1993
+ if (_p > _start) \
1994
+ if (is_hi_surrogate(*--_p)) \
1995
+ c = from_surrogate(*_p, c); \
1996
+ cptr = (void *) _p; \
2003
1997
} \
2004
1998
} while (0)
2005
1999
2006
- #define PREV_CHAR (cptr , cbuf_start ) \
2007
- do { \
2008
- if (cbuf_type == 0) { \
2009
- cptr--; \
2010
- } else { \
2011
- cptr -= 2; \
2012
- if (cbuf_type == 2) { \
2013
- c = ((uint16_t *)cptr)[0]; \
2014
- if (c >= 0xdc00 && c < 0xe000 && cptr > cbuf_start) { \
2015
- c = ((uint16_t *)cptr)[-1]; \
2016
- if (c >= 0xd800 && c < 0xdc00) \
2017
- cptr -= 2; \
2018
- } \
2019
- } \
2000
+ #define PREV_CHAR (cptr , cbuf_start ) \
2001
+ do { \
2002
+ if (cbuf_type == 0) { \
2003
+ cptr--; \
2004
+ } else { \
2005
+ const uint16_t *_p = (uint16_t *)cptr - 1; \
2006
+ const uint16_t *_start = (uint16_t *)cbuf_start; \
2007
+ if (is_lo_surrogate(*_p)) \
2008
+ if (cbuf_type == 2) \
2009
+ if (_p > _start) \
2010
+ if (is_hi_surrogate(_p[-1])) \
2011
+ _p--; \
2012
+ cptr = (void *) _p; \
2020
2013
} \
2021
2014
} while (0)
2022
2015
0 commit comments