2525#include " src/__support/macros/config.h"
2626#include " src/__support/str_to_num_result.h"
2727#include " src/__support/uint128.h"
28+ #include " src/__support/wctype_utils.h"
2829
2930namespace LIBC_NAMESPACE_DECL {
3031namespace internal {
3132
3233// Returns the idx to the first character in src that is not a whitespace
33- // character (as determined by isspace())
34+ // character (as determined by isspace() / iswspace())
35+ template <typename CharType>
3436LIBC_INLINE size_t
35- first_non_whitespace (const char *__restrict src,
37+ first_non_whitespace (const CharType *__restrict src,
3638 size_t src_len = cpp::numeric_limits<size_t >::max()) {
3739 size_t src_cur = 0 ;
38- while (src_cur < src_len && internal::isspace (src[src_cur])) {
40+ while (src_cur < src_len) {
41+ if constexpr (cpp::is_same_v<CharType, char >) {
42+ if (!internal::isspace (src[src_cur])) break ;
43+ } else {
44+ if (!internal::iswspace (src[src_cur])) break ;
45+ }
3946 ++src_cur;
4047 }
4148 return src_cur;
4249}
4350
51+ // Returns 1 if 'src' starts with + or - sign, and returns 0 otherwise.
52+ // Writes the sign value to |is_positive|.
53+ template <typename CharType>
54+ LIBC_INLINE static size_t
55+ consume_sign (const CharType *__restrict src, bool *is_positive) {
56+ *is_positive = true ;
57+ if constexpr (cpp::is_same_v<CharType, char >) {
58+ if (*src == ' +' || *src == ' -' ) {
59+ *is_positive = (*src == ' +' );
60+ return 1 ;
61+ }
62+ } else {
63+ if (*src == L' +' || *src == L' -' ) {
64+ *is_positive = (*src == L' +' );
65+ return 1 ;
66+ }
67+ }
68+ return 0 ;
69+ }
70+
4471// checks if the next 3 characters of the string pointer are the start of a
4572// hexadecimal number. Does not advance the string pointer.
46- LIBC_INLINE bool
47- is_hex_start ( const char *__restrict src,
48- size_t src_len = cpp::numeric_limits< size_t >::max() ) {
73+ template < typename CharType>
74+ LIBC_INLINE static bool
75+ is_hex_start ( const CharType *__restrict src, size_t src_len) {
4976 if (src_len < 3 )
5077 return false ;
51- return *src == ' 0' && tolower (*(src + 1 )) == ' x' && isalnum (*(src + 2 )) &&
52- b36_char_to_int (*(src + 2 )) < 16 ;
78+ if constexpr (cpp::is_same_v<CharType, char >) {
79+ return src[0 ] == ' 0' && tolower (src[1 ]) == ' x' && isalnum (src[2 ]) &&
80+ b36_char_to_int (src[2 ]) < 16 ;
81+ } else {
82+ return src[0 ] == L' 0' && towlower (src[1 ]) == L' x' && iswalnum (src[2 ]) &&
83+ b36_wchar_to_int (src[2 ]) < 16 ;
84+ }
5385}
5486
5587// Takes the address of the string pointer and parses the base from the start of
5688// it.
57- LIBC_INLINE int infer_base (const char *__restrict src, size_t src_len) {
89+ template <typename CharType>
90+ LIBC_INLINE static int infer_base (const CharType *__restrict src,
91+ size_t src_len) {
5892 // A hexadecimal number is defined as "the prefix 0x or 0X followed by a
5993 // sequence of the decimal digits and the letters a (or A) through f (or F)
6094 // with values 10 through 15 respectively." (C standard 6.4.4.1)
@@ -63,8 +97,13 @@ LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
6397 // An octal number is defined as "the prefix 0 optionally followed by a
6498 // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
6599 // number that starts with 0, including just 0, is an octal number.
66- if (src_len > 0 && src[0 ] == ' 0' )
67- return 8 ;
100+ if (src_len > 0 ) {
101+ if constexpr (cpp::is_same_v<CharType, char >) {
102+ if (src[0 ] == ' 0' ) return 8 ;
103+ } else {
104+ if (src[0 ] == L' 0' ) return 8 ;
105+ }
106+ }
68107 // A decimal number is defined as beginning "with a nonzero digit and
69108 // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
70109 return 10 ;
@@ -77,41 +116,33 @@ LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
77116// -----------------------------------------------------------------------------
78117// Takes a pointer to a string and the base to convert to. This function is used
79118// as the backend for all of the string to int functions.
80- template <class T >
119+ template <typename T, typename CharType >
81120LIBC_INLINE StrToNumResult<T>
82- strtointeger (const char *__restrict src, int base,
121+ strtointeger (const CharType *__restrict src, int base,
83122 const size_t src_len = cpp::numeric_limits<size_t >::max()) {
84123 using ResultType = make_integral_or_big_int_unsigned_t <T>;
85124
86- ResultType result = 0 ;
87-
88- bool is_number = false ;
89- size_t src_cur = 0 ;
90- int error_val = 0 ;
91-
92125 if (src_len == 0 )
93126 return {0 , 0 , 0 };
94127
95128 if (base < 0 || base == 1 || base > 36 )
96129 return {0 , 0 , EINVAL};
97130
98- src_cur = first_non_whitespace (src, src_len);
99-
100- char result_sign = ' +' ;
101- if (src[src_cur] == ' +' || src[src_cur] == ' -' ) {
102- result_sign = src[src_cur];
103- ++src_cur;
131+ size_t src_cur = first_non_whitespace (src, src_len);
132+ if (src_cur == src_len) {
133+ return {0 , 0 , 0 };
104134 }
105135
136+ bool is_positive;
137+ src_cur += consume_sign (src + src_cur, &is_positive);
138+
106139 if (base == 0 )
107140 base = infer_base (src + src_cur, src_len - src_cur);
108141
109142 if (base == 16 && is_hex_start (src + src_cur, src_len - src_cur))
110143 src_cur = src_cur + 2 ;
111144
112145 constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
113- const bool is_positive = (result_sign == ' +' );
114-
115146 ResultType constexpr NEGATIVE_MAX =
116147 !IS_UNSIGNED ? static_cast <ResultType>(cpp::numeric_limits<T>::max ()) + 1
117148 : cpp::numeric_limits<T>::max ();
@@ -120,8 +151,19 @@ strtointeger(const char *__restrict src, int base,
120151 ResultType const abs_max_div_by_base =
121152 abs_max / static_cast <ResultType>(base);
122153
123- while (src_cur < src_len && isalnum (src[src_cur])) {
124- int cur_digit = b36_char_to_int (src[src_cur]);
154+ bool is_number = false ;
155+ int error_val = 0 ;
156+ ResultType result = 0 ;
157+ while (src_cur < src_len) {
158+ int cur_digit;
159+ if constexpr (cpp::is_same_v<CharType, char >) {
160+ if (!isalnum (src[src_cur])) break ;
161+ cur_digit = b36_char_to_int (src[src_cur]);
162+ } else {
163+ if (!iswalnum (src[src_cur])) break ;
164+ cur_digit = b36_wchar_to_int (src[src_cur]);
165+ }
166+
125167 if (cur_digit >= base)
126168 break ;
127169
0 commit comments