Skip to content

Commit 315dfe5

Browse files
authored
[libc] Templatize strtointeger implementation. (llvm#165884)
* Removes the copy-pasta implementation of wcstointeger, and migrate the wcsto* family of functions to use a template version of strtointeger. * Fixes the out-of-bound read in the original implementation(s) when the entire input string consists of whitespaces (then the sign check can access OOB memory) The code is currently slightly peppered with "if constexpr" statements to distinguish between char and wchar_t. We can probably simplify it in subsequent changes by: * using overrides, so that internal::isalnum() is overriden for both char and wchar_t (since C++ luckily allows us to reuse names). * this wouldn't help for direct comparison with literals - for this as a somewhat ugly workaround like is_char_literal(c, '0', L'0')
1 parent 4a5692d commit 315dfe5

File tree

12 files changed

+139
-261
lines changed

12 files changed

+139
-261
lines changed

libc/src/__support/CMakeLists.txt

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -179,19 +179,7 @@ add_header_library(
179179
DEPENDS
180180
.ctype_utils
181181
.str_to_num_result
182-
libc.hdr.errno_macros
183-
libc.src.__support.CPP.limits
184-
libc.src.__support.CPP.type_traits
185-
libc.src.__support.common
186-
)
187-
188-
add_header_library(
189-
wcs_to_integer
190-
HDRS
191-
wcs_to_integer.h
192-
DEPENDS
193182
.wctype_utils
194-
.str_to_num_result
195183
libc.hdr.errno_macros
196184
libc.src.__support.CPP.limits
197185
libc.src.__support.CPP.type_traits

libc/src/__support/str_to_integer.h

Lines changed: 69 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -25,36 +25,63 @@
2525
#include "src/__support/macros/config.h"
2626
#include "src/__support/str_to_num_result.h"
2727
#include "src/__support/uint128.h"
28+
#include "src/__support/wctype_utils.h"
2829

2930
namespace LIBC_NAMESPACE_DECL {
3031
namespace internal {
3132

3233
// Returns the idx to the first character in src that is not a whitespace
33-
// character (as determined by isspace())
34+
// character (as determined by isspace() / iswspace())
35+
template <typename CharType>
3436
LIBC_INLINE size_t
35-
first_non_whitespace(const char *__restrict src,
37+
first_non_whitespace(const CharType *__restrict src,
3638
size_t src_len = cpp::numeric_limits<size_t>::max()) {
3739
size_t src_cur = 0;
38-
while (src_cur < src_len && internal::isspace(src[src_cur])) {
40+
while (src_cur < src_len) {
41+
if constexpr (cpp::is_same_v<CharType, char>) {
42+
if (!internal::isspace(src[src_cur]))
43+
break;
44+
} else {
45+
if (!internal::iswspace(src[src_cur]))
46+
break;
47+
}
3948
++src_cur;
4049
}
4150
return src_cur;
4251
}
4352

53+
// Returns +1, -1, or 0 if 'src' starts with (respectively)
54+
// plus sign, minus sign, or neither.
55+
template <typename CharType>
56+
LIBC_INLINE static int get_sign(const CharType *__restrict src) {
57+
if constexpr (cpp::is_same_v<CharType, char>) {
58+
return (src[0] == '+') ? 1 : (src[0] == '-' ? -1 : 0);
59+
} else {
60+
return (src[0] == L'+') ? 1 : (src[0] == L'-' ? -1 : 0);
61+
}
62+
}
63+
4464
// checks if the next 3 characters of the string pointer are the start of a
4565
// hexadecimal number. Does not advance the string pointer.
46-
LIBC_INLINE bool
47-
is_hex_start(const char *__restrict src,
48-
size_t src_len = cpp::numeric_limits<size_t>::max()) {
66+
template <typename CharType>
67+
LIBC_INLINE static bool is_hex_start(const CharType *__restrict src,
68+
size_t src_len) {
4969
if (src_len < 3)
5070
return false;
51-
return *src == '0' && tolower(*(src + 1)) == 'x' && isalnum(*(src + 2)) &&
52-
b36_char_to_int(*(src + 2)) < 16;
71+
if constexpr (cpp::is_same_v<CharType, char>) {
72+
return src[0] == '0' && tolower(src[1]) == 'x' && isalnum(src[2]) &&
73+
b36_char_to_int(src[2]) < 16;
74+
} else {
75+
return src[0] == L'0' && towlower(src[1]) == L'x' && iswalnum(src[2]) &&
76+
b36_wchar_to_int(src[2]) < 16;
77+
}
5378
}
5479

5580
// Takes the address of the string pointer and parses the base from the start of
5681
// it.
57-
LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
82+
template <typename CharType>
83+
LIBC_INLINE static int infer_base(const CharType *__restrict src,
84+
size_t src_len) {
5885
// A hexadecimal number is defined as "the prefix 0x or 0X followed by a
5986
// sequence of the decimal digits and the letters a (or A) through f (or F)
6087
// with values 10 through 15 respectively." (C standard 6.4.4.1)
@@ -63,8 +90,15 @@ LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
6390
// An octal number is defined as "the prefix 0 optionally followed by a
6491
// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
6592
// number that starts with 0, including just 0, is an octal number.
66-
if (src_len > 0 && src[0] == '0')
67-
return 8;
93+
if (src_len > 0) {
94+
if constexpr (cpp::is_same_v<CharType, char>) {
95+
if (src[0] == '0')
96+
return 8;
97+
} else {
98+
if (src[0] == L'0')
99+
return 8;
100+
}
101+
}
68102
// A decimal number is defined as beginning "with a nonzero digit and
69103
// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
70104
return 10;
@@ -77,41 +111,34 @@ LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
77111
// -----------------------------------------------------------------------------
78112
// Takes a pointer to a string and the base to convert to. This function is used
79113
// as the backend for all of the string to int functions.
80-
template <class T>
114+
template <typename T, typename CharType>
81115
LIBC_INLINE StrToNumResult<T>
82-
strtointeger(const char *__restrict src, int base,
116+
strtointeger(const CharType *__restrict src, int base,
83117
const size_t src_len = cpp::numeric_limits<size_t>::max()) {
84118
using ResultType = make_integral_or_big_int_unsigned_t<T>;
85119

86-
ResultType result = 0;
87-
88-
bool is_number = false;
89-
size_t src_cur = 0;
90-
int error_val = 0;
91-
92120
if (src_len == 0)
93121
return {0, 0, 0};
94122

95123
if (base < 0 || base == 1 || base > 36)
96124
return {0, 0, EINVAL};
97125

98-
src_cur = first_non_whitespace(src, src_len);
99-
100-
char result_sign = '+';
101-
if (src[src_cur] == '+' || src[src_cur] == '-') {
102-
result_sign = src[src_cur];
103-
++src_cur;
126+
size_t src_cur = first_non_whitespace(src, src_len);
127+
if (src_cur == src_len) {
128+
return {0, 0, 0};
104129
}
105130

131+
int sign = get_sign(src + src_cur);
132+
bool is_positive = (sign >= 0);
133+
src_cur += (sign != 0);
134+
106135
if (base == 0)
107136
base = infer_base(src + src_cur, src_len - src_cur);
108137

109138
if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur))
110139
src_cur = src_cur + 2;
111140

112141
constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
113-
const bool is_positive = (result_sign == '+');
114-
115142
ResultType constexpr NEGATIVE_MAX =
116143
!IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
117144
: cpp::numeric_limits<T>::max();
@@ -120,8 +147,21 @@ strtointeger(const char *__restrict src, int base,
120147
ResultType const abs_max_div_by_base =
121148
abs_max / static_cast<ResultType>(base);
122149

123-
while (src_cur < src_len && isalnum(src[src_cur])) {
124-
int cur_digit = b36_char_to_int(src[src_cur]);
150+
bool is_number = false;
151+
int error_val = 0;
152+
ResultType result = 0;
153+
while (src_cur < src_len) {
154+
int cur_digit;
155+
if constexpr (cpp::is_same_v<CharType, char>) {
156+
if (!isalnum(src[src_cur]))
157+
break;
158+
cur_digit = b36_char_to_int(src[src_cur]);
159+
} else {
160+
if (!iswalnum(src[src_cur]))
161+
break;
162+
cur_digit = b36_wchar_to_int(src[src_cur]);
163+
}
164+
125165
if (cur_digit >= base)
126166
break;
127167

libc/src/__support/wcs_to_integer.h

Lines changed: 0 additions & 155 deletions
This file was deleted.

libc/src/wchar/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ add_entrypoint_object(
6363
wcstol.h
6464
DEPENDS
6565
libc.src.errno.errno
66-
libc.src.__support.wcs_to_integer
66+
libc.src.__support.str_to_integer
6767
)
6868

6969
add_entrypoint_object(
@@ -74,7 +74,7 @@ add_entrypoint_object(
7474
wcstoll.h
7575
DEPENDS
7676
libc.src.errno.errno
77-
libc.src.__support.wcs_to_integer
77+
libc.src.__support.str_to_integer
7878
)
7979

8080
add_entrypoint_object(
@@ -85,7 +85,7 @@ add_entrypoint_object(
8585
wcstoul.h
8686
DEPENDS
8787
libc.src.errno.errno
88-
libc.src.__support.wcs_to_integer
88+
libc.src.__support.str_to_integer
8989
)
9090

9191
add_entrypoint_object(
@@ -96,7 +96,7 @@ add_entrypoint_object(
9696
wcstoull.h
9797
DEPENDS
9898
libc.src.errno.errno
99-
libc.src.__support.wcs_to_integer
99+
libc.src.__support.str_to_integer
100100
)
101101

102102
add_entrypoint_object(

libc/src/wchar/wcstol.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@
1010
#include "src/__support/common.h"
1111
#include "src/__support/libc_errno.h"
1212
#include "src/__support/macros/config.h"
13-
#include "src/__support/wcs_to_integer.h"
13+
#include "src/__support/str_to_integer.h"
1414

1515
namespace LIBC_NAMESPACE_DECL {
1616

1717
LLVM_LIBC_FUNCTION(long, wcstol,
1818
(const wchar_t *__restrict str, wchar_t **__restrict str_end,
1919
int base)) {
20-
auto result = internal::wcstointeger<long>(str, base);
20+
auto result = internal::strtointeger<long>(str, base);
2121
if (result.has_error())
2222
libc_errno = result.error;
2323

0 commit comments

Comments
 (0)