Skip to content

Commit e1dae47

Browse files
committed
[libc] Templatize strtointeger implementation.
* Removes the copy-pasta implementation of wcstointeger, and migrate the wcsto* family of functions to use a template version of strtointeger. * Fixes the out-of-bound read in the original implementation(s) when the entire input string consists of whitespaces (then the sign check can access OOB memory) The code is currently slightly peppered with "if constexpr" statements to distinguish between char and wchar_t. We can probably simplify it in subsequent changes by: * using overrides, so that internal::isalnum() is overriden for both char and wchar_t (since C++ luckily allows us to reuse names). * this wouldn't help for direct comparison with literals - for this as a somewhat ugly workaround like is_char_literal(c, '0', L'0')
1 parent 314754c commit e1dae47

File tree

11 files changed

+135
-259
lines changed

11 files changed

+135
-259
lines changed

libc/src/__support/CMakeLists.txt

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -179,19 +179,7 @@ add_header_library(
179179
DEPENDS
180180
.ctype_utils
181181
.str_to_num_result
182-
libc.hdr.errno_macros
183-
libc.src.__support.CPP.limits
184-
libc.src.__support.CPP.type_traits
185-
libc.src.__support.common
186-
)
187-
188-
add_header_library(
189-
wcs_to_integer
190-
HDRS
191-
wcs_to_integer.h
192-
DEPENDS
193182
.wctype_utils
194-
.str_to_num_result
195183
libc.hdr.errno_macros
196184
libc.src.__support.CPP.limits
197185
libc.src.__support.CPP.type_traits

libc/src/__support/str_to_integer.h

Lines changed: 71 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -25,36 +25,70 @@
2525
#include "src/__support/macros/config.h"
2626
#include "src/__support/str_to_num_result.h"
2727
#include "src/__support/uint128.h"
28+
#include "src/__support/wctype_utils.h"
2829

2930
namespace LIBC_NAMESPACE_DECL {
3031
namespace internal {
3132

3233
// Returns the idx to the first character in src that is not a whitespace
33-
// character (as determined by isspace())
34+
// character (as determined by isspace() / iswspace())
35+
template <typename CharType>
3436
LIBC_INLINE size_t
35-
first_non_whitespace(const char *__restrict src,
37+
first_non_whitespace(const CharType *__restrict src,
3638
size_t src_len = cpp::numeric_limits<size_t>::max()) {
3739
size_t src_cur = 0;
38-
while (src_cur < src_len && internal::isspace(src[src_cur])) {
40+
while (src_cur < src_len) {
41+
if constexpr (cpp::is_same_v<CharType, char>) {
42+
if (!internal::isspace(src[src_cur])) break;
43+
} else {
44+
if (!internal::iswspace(src[src_cur])) break;
45+
}
3946
++src_cur;
4047
}
4148
return src_cur;
4249
}
4350

51+
// Returns 1 if 'src' starts with + or - sign, and returns 0 otherwise.
52+
// Writes the sign value to |is_positive|.
53+
template <typename CharType>
54+
LIBC_INLINE static size_t
55+
consume_sign(const CharType *__restrict src, bool *is_positive) {
56+
*is_positive = true;
57+
if constexpr (cpp::is_same_v<CharType, char>) {
58+
if (*src == '+' || *src == '-') {
59+
*is_positive = (*src == '+');
60+
return 1;
61+
}
62+
} else {
63+
if (*src == L'+' || *src == L'-') {
64+
*is_positive = (*src == L'+');
65+
return 1;
66+
}
67+
}
68+
return 0;
69+
}
70+
4471
// checks if the next 3 characters of the string pointer are the start of a
4572
// hexadecimal number. Does not advance the string pointer.
46-
LIBC_INLINE bool
47-
is_hex_start(const char *__restrict src,
48-
size_t src_len = cpp::numeric_limits<size_t>::max()) {
73+
template<typename CharType>
74+
LIBC_INLINE static bool
75+
is_hex_start(const CharType *__restrict src, size_t src_len) {
4976
if (src_len < 3)
5077
return false;
51-
return *src == '0' && tolower(*(src + 1)) == 'x' && isalnum(*(src + 2)) &&
52-
b36_char_to_int(*(src + 2)) < 16;
78+
if constexpr (cpp::is_same_v<CharType, char>) {
79+
return src[0] == '0' && tolower(src[1]) == 'x' && isalnum(src[2]) &&
80+
b36_char_to_int(src[2]) < 16;
81+
} else {
82+
return src[0] == L'0' && towlower(src[1]) == L'x' && iswalnum(src[2]) &&
83+
b36_wchar_to_int(src[2]) < 16;
84+
}
5385
}
5486

5587
// Takes the address of the string pointer and parses the base from the start of
5688
// it.
57-
LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
89+
template<typename CharType>
90+
LIBC_INLINE static int infer_base(const CharType *__restrict src,
91+
size_t src_len) {
5892
// A hexadecimal number is defined as "the prefix 0x or 0X followed by a
5993
// sequence of the decimal digits and the letters a (or A) through f (or F)
6094
// with values 10 through 15 respectively." (C standard 6.4.4.1)
@@ -63,8 +97,13 @@ LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
6397
// An octal number is defined as "the prefix 0 optionally followed by a
6498
// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
6599
// number that starts with 0, including just 0, is an octal number.
66-
if (src_len > 0 && src[0] == '0')
67-
return 8;
100+
if (src_len > 0) {
101+
if constexpr (cpp::is_same_v<CharType, char>) {
102+
if (src[0] == '0') return 8;
103+
} else {
104+
if (src[0] == L'0') return 8;
105+
}
106+
}
68107
// A decimal number is defined as beginning "with a nonzero digit and
69108
// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
70109
return 10;
@@ -77,41 +116,33 @@ LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
77116
// -----------------------------------------------------------------------------
78117
// Takes a pointer to a string and the base to convert to. This function is used
79118
// as the backend for all of the string to int functions.
80-
template <class T>
119+
template <typename T, typename CharType>
81120
LIBC_INLINE StrToNumResult<T>
82-
strtointeger(const char *__restrict src, int base,
121+
strtointeger(const CharType *__restrict src, int base,
83122
const size_t src_len = cpp::numeric_limits<size_t>::max()) {
84123
using ResultType = make_integral_or_big_int_unsigned_t<T>;
85124

86-
ResultType result = 0;
87-
88-
bool is_number = false;
89-
size_t src_cur = 0;
90-
int error_val = 0;
91-
92125
if (src_len == 0)
93126
return {0, 0, 0};
94127

95128
if (base < 0 || base == 1 || base > 36)
96129
return {0, 0, EINVAL};
97130

98-
src_cur = first_non_whitespace(src, src_len);
99-
100-
char result_sign = '+';
101-
if (src[src_cur] == '+' || src[src_cur] == '-') {
102-
result_sign = src[src_cur];
103-
++src_cur;
131+
size_t src_cur = first_non_whitespace(src, src_len);
132+
if (src_cur == src_len) {
133+
return {0, 0, 0};
104134
}
105135

136+
bool is_positive;
137+
src_cur += consume_sign(src + src_cur, &is_positive);
138+
106139
if (base == 0)
107140
base = infer_base(src + src_cur, src_len - src_cur);
108141

109142
if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur))
110143
src_cur = src_cur + 2;
111144

112145
constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
113-
const bool is_positive = (result_sign == '+');
114-
115146
ResultType constexpr NEGATIVE_MAX =
116147
!IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
117148
: cpp::numeric_limits<T>::max();
@@ -120,8 +151,19 @@ strtointeger(const char *__restrict src, int base,
120151
ResultType const abs_max_div_by_base =
121152
abs_max / static_cast<ResultType>(base);
122153

123-
while (src_cur < src_len && isalnum(src[src_cur])) {
124-
int cur_digit = b36_char_to_int(src[src_cur]);
154+
bool is_number = false;
155+
int error_val = 0;
156+
ResultType result = 0;
157+
while (src_cur < src_len) {
158+
int cur_digit;
159+
if constexpr (cpp::is_same_v<CharType, char>) {
160+
if (!isalnum(src[src_cur])) break;
161+
cur_digit = b36_char_to_int(src[src_cur]);
162+
} else {
163+
if (!iswalnum(src[src_cur])) break;
164+
cur_digit = b36_wchar_to_int(src[src_cur]);
165+
}
166+
125167
if (cur_digit >= base)
126168
break;
127169

libc/src/__support/wcs_to_integer.h

Lines changed: 0 additions & 155 deletions
This file was deleted.

libc/src/wchar/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ add_entrypoint_object(
6363
wcstol.h
6464
DEPENDS
6565
libc.src.errno.errno
66-
libc.src.__support.wcs_to_integer
66+
libc.src.__support.str_to_integer
6767
)
6868

6969
add_entrypoint_object(
@@ -74,7 +74,7 @@ add_entrypoint_object(
7474
wcstoll.h
7575
DEPENDS
7676
libc.src.errno.errno
77-
libc.src.__support.wcs_to_integer
77+
libc.src.__support.str_to_integer
7878
)
7979

8080
add_entrypoint_object(
@@ -85,7 +85,7 @@ add_entrypoint_object(
8585
wcstoul.h
8686
DEPENDS
8787
libc.src.errno.errno
88-
libc.src.__support.wcs_to_integer
88+
libc.src.__support.str_to_integer
8989
)
9090

9191
add_entrypoint_object(
@@ -96,7 +96,7 @@ add_entrypoint_object(
9696
wcstoull.h
9797
DEPENDS
9898
libc.src.errno.errno
99-
libc.src.__support.wcs_to_integer
99+
libc.src.__support.str_to_integer
100100
)
101101

102102
add_entrypoint_object(

libc/src/wchar/wcstol.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@
1010
#include "src/__support/common.h"
1111
#include "src/__support/libc_errno.h"
1212
#include "src/__support/macros/config.h"
13-
#include "src/__support/wcs_to_integer.h"
13+
#include "src/__support/str_to_integer.h"
1414

1515
namespace LIBC_NAMESPACE_DECL {
1616

1717
LLVM_LIBC_FUNCTION(long, wcstol,
1818
(const wchar_t *__restrict str, wchar_t **__restrict str_end,
1919
int base)) {
20-
auto result = internal::wcstointeger<long>(str, base);
20+
auto result = internal::strtointeger<long>(str, base);
2121
if (result.has_error())
2222
libc_errno = result.error;
2323

0 commit comments

Comments
 (0)