Skip to content

Commit 120689e

Browse files
authored
[libc] Migrate ctype_utils to use char instead of int where applicable. (#166225)
Functions like isalpha / tolower can operate on chars internally. This allows us to get rid of unnecessary casts and open a way to creating wchar_t overloads with the same names (e.g. for isalpha), that would simplify templated code for conversion functions (see 315dfe5). Add the int->char converstion to public entrypoints implementation instead. We also need to introduce bounds check on the input argument values - these functions' behavior is unspecified if the argument is neither EOF nor fits in "unsigned char" range, but the tests we've had verified that they always return false for small negative values. To preserve this behavior, cover it explicitly.
1 parent c193eea commit 120689e

37 files changed

+196
-86
lines changed

libc/src/__support/ctype_utils.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ namespace internal {
2727
// as well as a way to support non-ASCII character encodings.
2828

2929
// Similarly, do not change these functions to use case ranges. e.g.
30-
// bool islower(int ch) {
30+
// bool islower(char ch) {
3131
// switch(ch) {
3232
// case 'a'...'z':
3333
// return true;
@@ -37,7 +37,7 @@ namespace internal {
3737
// EBCDIC. Technically we could use some smaller ranges, but that's even harder
3838
// to read.
3939

40-
LIBC_INLINE static constexpr bool islower(int ch) {
40+
LIBC_INLINE static constexpr bool islower(char ch) {
4141
switch (ch) {
4242
case 'a':
4343
case 'b':
@@ -71,7 +71,7 @@ LIBC_INLINE static constexpr bool islower(int ch) {
7171
}
7272
}
7373

74-
LIBC_INLINE static constexpr bool isupper(int ch) {
74+
LIBC_INLINE static constexpr bool isupper(char ch) {
7575
switch (ch) {
7676
case 'A':
7777
case 'B':
@@ -105,7 +105,7 @@ LIBC_INLINE static constexpr bool isupper(int ch) {
105105
}
106106
}
107107

108-
LIBC_INLINE static constexpr bool isdigit(int ch) {
108+
LIBC_INLINE static constexpr bool isdigit(char ch) {
109109
switch (ch) {
110110
case '0':
111111
case '1':
@@ -123,7 +123,7 @@ LIBC_INLINE static constexpr bool isdigit(int ch) {
123123
}
124124
}
125125

126-
LIBC_INLINE static constexpr int tolower(int ch) {
126+
LIBC_INLINE static constexpr char tolower(char ch) {
127127
switch (ch) {
128128
case 'A':
129129
return 'a';
@@ -182,7 +182,7 @@ LIBC_INLINE static constexpr int tolower(int ch) {
182182
}
183183
}
184184

185-
LIBC_INLINE static constexpr int toupper(int ch) {
185+
LIBC_INLINE static constexpr char toupper(char ch) {
186186
switch (ch) {
187187
case 'a':
188188
return 'A';
@@ -241,7 +241,7 @@ LIBC_INLINE static constexpr int toupper(int ch) {
241241
}
242242
}
243243

244-
LIBC_INLINE static constexpr bool isalpha(int ch) {
244+
LIBC_INLINE static constexpr bool isalpha(char ch) {
245245
switch (ch) {
246246
case 'a':
247247
case 'b':
@@ -301,7 +301,7 @@ LIBC_INLINE static constexpr bool isalpha(int ch) {
301301
}
302302
}
303303

304-
LIBC_INLINE static constexpr bool isalnum(int ch) {
304+
LIBC_INLINE static constexpr bool isalnum(char ch) {
305305
switch (ch) {
306306
case 'a':
307307
case 'b':
@@ -371,7 +371,7 @@ LIBC_INLINE static constexpr bool isalnum(int ch) {
371371
}
372372
}
373373

374-
LIBC_INLINE static constexpr int b36_char_to_int(int ch) {
374+
LIBC_INLINE static constexpr int b36_char_to_int(char ch) {
375375
switch (ch) {
376376
case '0':
377377
return 0;
@@ -476,7 +476,7 @@ LIBC_INLINE static constexpr int b36_char_to_int(int ch) {
476476
}
477477
}
478478

479-
LIBC_INLINE static constexpr int int_to_b36_char(int num) {
479+
LIBC_INLINE static constexpr char int_to_b36_char(int num) {
480480
// Can't actually use LIBC_ASSERT here because it depends on integer_to_string
481481
// which depends on this.
482482

@@ -559,7 +559,7 @@ LIBC_INLINE static constexpr int int_to_b36_char(int num) {
559559
}
560560
}
561561

562-
LIBC_INLINE static constexpr bool isspace(int ch) {
562+
LIBC_INLINE static constexpr bool isspace(char ch) {
563563
switch (ch) {
564564
case ' ':
565565
case '\t':
@@ -574,7 +574,7 @@ LIBC_INLINE static constexpr bool isspace(int ch) {
574574
}
575575

576576
// not yet encoding independent.
577-
LIBC_INLINE static constexpr bool isgraph(int ch) {
577+
LIBC_INLINE static constexpr bool isgraph(char ch) {
578578
return 0x20 < ch && ch < 0x7f;
579579
}
580580

libc/src/__support/integer_to_string.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -378,9 +378,8 @@ template <typename T, typename Fmt = radix::Dec> class IntegerToString {
378378
using UNSIGNED_T = make_integral_or_big_int_unsigned_t<T>;
379379

380380
LIBC_INLINE static char digit_char(uint8_t digit) {
381-
const int result = internal::int_to_b36_char(digit);
382-
return static_cast<char>(Fmt::IS_UPPERCASE ? internal::toupper(result)
383-
: result);
381+
const char result = internal::int_to_b36_char(digit);
382+
return Fmt::IS_UPPERCASE ? internal::toupper(result) : result;
384383
}
385384

386385
LIBC_INLINE static void

libc/src/ctype/CMakeLists.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ add_entrypoint_object(
66
isalnum.h
77
DEPENDS
88
libc.include.ctype
9+
libc.src.__support.CPP.limits
910
libc.src.__support.ctype_utils
1011
)
1112

@@ -16,6 +17,7 @@ add_entrypoint_object(
1617
HDRS
1718
isalpha.h
1819
DEPENDS
20+
libc.src.__support.CPP.limits
1921
libc.src.__support.ctype_utils
2022
)
2123

@@ -50,6 +52,7 @@ add_entrypoint_object(
5052
HDRS
5153
isdigit.h
5254
DEPENDS
55+
libc.src.__support.CPP.limits
5356
libc.src.__support.ctype_utils
5457
)
5558

@@ -60,6 +63,7 @@ add_entrypoint_object(
6063
HDRS
6164
isgraph.h
6265
DEPENDS
66+
libc.src.__support.CPP.limits
6367
libc.src.__support.ctype_utils
6468
)
6569

@@ -70,6 +74,7 @@ add_entrypoint_object(
7074
HDRS
7175
islower.h
7276
DEPENDS
77+
libc.src.__support.CPP.limits
7378
libc.src.__support.ctype_utils
7479
)
7580

@@ -88,6 +93,7 @@ add_entrypoint_object(
8893
HDRS
8994
ispunct.h
9095
DEPENDS
96+
libc.src.__support.CPP.limits
9197
libc.src.__support.ctype_utils
9298
)
9399

@@ -97,6 +103,9 @@ add_entrypoint_object(
97103
isspace.cpp
98104
HDRS
99105
isspace.h
106+
DEPENDS
107+
libc.src.__support.CPP.limits
108+
libc.src.__support.ctype_utils
100109
)
101110

102111
add_entrypoint_object(
@@ -106,6 +115,7 @@ add_entrypoint_object(
106115
HDRS
107116
isupper.h
108117
DEPENDS
118+
libc.src.__support.CPP.limits
109119
libc.src.__support.ctype_utils
110120
)
111121

@@ -116,6 +126,7 @@ add_entrypoint_object(
116126
HDRS
117127
isxdigit.h
118128
DEPENDS
129+
libc.src.__support.CPP.limits
119130
libc.src.__support.ctype_utils
120131
)
121132

@@ -126,6 +137,7 @@ add_entrypoint_object(
126137
HDRS
127138
tolower.h
128139
DEPENDS
140+
libc.src.__support.CPP.limits
129141
libc.src.__support.ctype_utils
130142
)
131143

@@ -144,6 +156,7 @@ add_entrypoint_object(
144156
HDRS
145157
toupper.h
146158
DEPENDS
159+
libc.src.__support.CPP.limits
147160
libc.src.__support.ctype_utils
148161
)
149162

@@ -160,6 +173,7 @@ add_entrypoint_object(
160173
isalnum_l.h
161174
DEPENDS
162175
libc.include.ctype
176+
libc.src.__support.CPP.limits
163177
libc.src.__support.ctype_utils
164178
libc.hdr.types.locale_t
165179
)
@@ -171,6 +185,7 @@ add_entrypoint_object(
171185
HDRS
172186
isalpha_l.h
173187
DEPENDS
188+
libc.src.__support.CPP.limits
174189
libc.src.__support.ctype_utils
175190
libc.hdr.types.locale_t
176191
)
@@ -202,6 +217,7 @@ add_entrypoint_object(
202217
HDRS
203218
isdigit_l.h
204219
DEPENDS
220+
libc.src.__support.CPP.limits
205221
libc.src.__support.ctype_utils
206222
libc.hdr.types.locale_t
207223
)
@@ -224,6 +240,7 @@ add_entrypoint_object(
224240
HDRS
225241
islower_l.h
226242
DEPENDS
243+
libc.src.__support.CPP.limits
227244
libc.src.__support.ctype_utils
228245
libc.hdr.types.locale_t
229246
)
@@ -257,6 +274,8 @@ add_entrypoint_object(
257274
isspace_l.h
258275
DEPENDS
259276
libc.hdr.types.locale_t
277+
libc.src.__support.CPP.limits
278+
libc.src.__support.ctype_utils
260279
)
261280

262281
add_entrypoint_object(
@@ -266,6 +285,7 @@ add_entrypoint_object(
266285
HDRS
267286
isupper_l.h
268287
DEPENDS
288+
libc.src.__support.CPP.limits
269289
libc.src.__support.ctype_utils
270290
libc.hdr.types.locale_t
271291
)
@@ -277,6 +297,7 @@ add_entrypoint_object(
277297
HDRS
278298
isxdigit_l.h
279299
DEPENDS
300+
libc.src.__support.CPP.limits
280301
libc.src.__support.ctype_utils
281302
libc.hdr.types.locale_t
282303
)
@@ -288,6 +309,7 @@ add_entrypoint_object(
288309
HDRS
289310
tolower_l.h
290311
DEPENDS
312+
libc.src.__support.CPP.limits
291313
libc.src.__support.ctype_utils
292314
libc.hdr.types.locale_t
293315
)
@@ -299,6 +321,7 @@ add_entrypoint_object(
299321
HDRS
300322
toupper_l.h
301323
DEPENDS
324+
libc.src.__support.CPP.limits
302325
libc.src.__support.ctype_utils
303326
libc.hdr.types.locale_t
304327
)

libc/src/ctype/isalnum.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,18 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "src/ctype/isalnum.h"
10-
#include "src/__support/ctype_utils.h"
1110

11+
#include "src/__support/CPP/limits.h"
1212
#include "src/__support/common.h"
13+
#include "src/__support/ctype_utils.h"
1314
#include "src/__support/macros/config.h"
1415

1516
namespace LIBC_NAMESPACE_DECL {
1617

1718
LLVM_LIBC_FUNCTION(int, isalnum, (int c)) {
18-
return static_cast<int>(internal::isalnum(static_cast<unsigned>(c)));
19+
if (c < 0 || c > cpp::numeric_limits<unsigned char>::max())
20+
return 0;
21+
return static_cast<int>(internal::isalnum(static_cast<char>(c)));
1922
}
2023

2124
} // namespace LIBC_NAMESPACE_DECL

libc/src/ctype/isalnum_l.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,18 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "src/ctype/isalnum_l.h"
10-
#include "src/__support/ctype_utils.h"
1110

11+
#include "src/__support/CPP/limits.h"
1212
#include "src/__support/common.h"
13+
#include "src/__support/ctype_utils.h"
1314
#include "src/__support/macros/config.h"
1415

1516
namespace LIBC_NAMESPACE_DECL {
1617

1718
LLVM_LIBC_FUNCTION(int, isalnum_l, (int c, locale_t)) {
18-
return static_cast<int>(internal::isalnum(static_cast<unsigned>(c)));
19+
if (c < 0 || c > cpp::numeric_limits<unsigned char>::max())
20+
return 0;
21+
return static_cast<int>(internal::isalnum(static_cast<char>(c)));
1922
}
2023

2124
} // namespace LIBC_NAMESPACE_DECL

libc/src/ctype/isalpha.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,17 @@
88

99
#include "src/ctype/isalpha.h"
1010

11+
#include "src/__support/CPP/limits.h"
1112
#include "src/__support/common.h"
1213
#include "src/__support/ctype_utils.h"
1314
#include "src/__support/macros/config.h"
1415

1516
namespace LIBC_NAMESPACE_DECL {
1617

1718
LLVM_LIBC_FUNCTION(int, isalpha, (int c)) {
18-
return static_cast<int>(internal::isalpha(static_cast<unsigned>(c)));
19+
if (c < 0 || c > cpp::numeric_limits<unsigned char>::max())
20+
return 0;
21+
return static_cast<int>(internal::isalpha(static_cast<char>(c)));
1922
}
2023

2124
} // namespace LIBC_NAMESPACE_DECL

libc/src/ctype/isalpha_l.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,17 @@
88

99
#include "src/ctype/isalpha_l.h"
1010

11+
#include "src/__support/CPP/limits.h"
1112
#include "src/__support/common.h"
1213
#include "src/__support/ctype_utils.h"
1314
#include "src/__support/macros/config.h"
1415

1516
namespace LIBC_NAMESPACE_DECL {
1617

1718
LLVM_LIBC_FUNCTION(int, isalpha_l, (int c, locale_t)) {
18-
return static_cast<int>(internal::isalpha(static_cast<unsigned>(c)));
19+
if (c < 0 || c > cpp::numeric_limits<unsigned char>::max())
20+
return 0;
21+
return static_cast<int>(internal::isalpha(static_cast<char>(c)));
1922
}
2023

2124
} // namespace LIBC_NAMESPACE_DECL

libc/src/ctype/isdigit.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,18 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "src/ctype/isdigit.h"
10+
11+
#include "src/__support/CPP/limits.h"
1012
#include "src/__support/common.h"
1113
#include "src/__support/ctype_utils.h"
1214
#include "src/__support/macros/config.h"
1315

1416
namespace LIBC_NAMESPACE_DECL {
1517

1618
LLVM_LIBC_FUNCTION(int, isdigit, (int c)) {
17-
return static_cast<int>(internal::isdigit(static_cast<unsigned>(c)));
19+
if (c < 0 || c > cpp::numeric_limits<unsigned char>::max())
20+
return 0;
21+
return static_cast<int>(internal::isdigit(static_cast<char>(c)));
1822
}
1923

2024
} // namespace LIBC_NAMESPACE_DECL

0 commit comments

Comments
 (0)