-
Couldn't load subscription status.
- Fork 15k
[libc++] Optimize ctype::to{lower,upper} #145344
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -697,6 +697,20 @@ const ctype_base::mask ctype_base::graph; | |
|
|
||
| // template <> class ctype<wchar_t>; | ||
|
|
||
| template <class CharT> | ||
| static CharT to_upper_impl(CharT c) { | ||
| if (c < 'a' || c > 'z') | ||
| return c; | ||
| return c & ~0x20; | ||
| } | ||
|
|
||
| template <class CharT> | ||
| static CharT to_lower_impl(CharT c) { | ||
| if (c < 'A' || c > 'Z') | ||
| return c; | ||
| return c | 0x20; | ||
| } | ||
|
|
||
|
Comment on lines
+700
to
+713
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code does not work for non-ASCII encodings since the assumption of char ordering cannot be assumed. Is there any reason why we cannot simply call C functions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assuming system call to :tolower(c) and touppoer(c) are macro expanded and/or get inlined the performance should be equivalent to above implementation, isn't it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need the classic C locale, not the currently installed locale. |
||
| #if _LIBCPP_HAS_WIDE_CHARACTERS | ||
| constinit locale::id ctype<wchar_t>::id; | ||
|
|
||
|
|
@@ -726,48 +740,19 @@ const wchar_t* ctype<wchar_t>::do_scan_not(mask m, const char_type* low, const c | |
| return low; | ||
| } | ||
|
|
||
| wchar_t ctype<wchar_t>::do_toupper(char_type c) const { | ||
| # ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE | ||
| return std::__libcpp_isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c; | ||
| # elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__) | ||
| return std::__libcpp_isascii(c) ? ctype<char>::__classic_upper_table()[c] : c; | ||
| # else | ||
| return (std::__libcpp_isascii(c) && __locale::__iswlower(c, _LIBCPP_GET_C_LOCALE)) ? c - L'a' + L'A' : c; | ||
| # endif | ||
| } | ||
| wchar_t ctype<wchar_t>::do_toupper(char_type c) const { return to_upper_impl(c); } | ||
|
|
||
| const wchar_t* ctype<wchar_t>::do_toupper(char_type* low, const char_type* high) const { | ||
| for (; low != high; ++low) | ||
| # ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE | ||
| *low = std::__libcpp_isascii(*low) ? _DefaultRuneLocale.__mapupper[*low] : *low; | ||
| # elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__) | ||
| *low = std::__libcpp_isascii(*low) ? ctype<char>::__classic_upper_table()[*low] : *low; | ||
| # else | ||
| *low = | ||
| (std::__libcpp_isascii(*low) && __locale::__islower(*low, _LIBCPP_GET_C_LOCALE)) ? (*low - L'a' + L'A') : *low; | ||
| # endif | ||
| *low = to_upper_impl(*low); | ||
| return low; | ||
| } | ||
|
|
||
| wchar_t ctype<wchar_t>::do_tolower(char_type c) const { | ||
| # ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE | ||
| return std::__libcpp_isascii(c) ? _DefaultRuneLocale.__maplower[c] : c; | ||
| # elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__) | ||
| return std::__libcpp_isascii(c) ? ctype<char>::__classic_lower_table()[c] : c; | ||
| # else | ||
| return (std::__libcpp_isascii(c) && __locale::__isupper(c, _LIBCPP_GET_C_LOCALE)) ? c - L'A' + 'a' : c; | ||
| # endif | ||
| } | ||
| wchar_t ctype<wchar_t>::do_tolower(char_type c) const { return to_lower_impl(c); } | ||
|
|
||
| const wchar_t* ctype<wchar_t>::do_tolower(char_type* low, const char_type* high) const { | ||
| for (; low != high; ++low) | ||
| # ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE | ||
| *low = std::__libcpp_isascii(*low) ? _DefaultRuneLocale.__maplower[*low] : *low; | ||
| # elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__) | ||
| *low = std::__libcpp_isascii(*low) ? ctype<char>::__classic_lower_table()[*low] : *low; | ||
| # else | ||
| *low = (std::__libcpp_isascii(*low) && __locale::__isupper(*low, _LIBCPP_GET_C_LOCALE)) ? *low - L'A' + L'a' : *low; | ||
| # endif | ||
| *low = to_lower_impl(*low); | ||
| return low; | ||
| } | ||
|
|
||
|
|
@@ -811,59 +796,19 @@ ctype<char>::~ctype() { | |
| delete[] __tab_; | ||
| } | ||
|
|
||
| char ctype<char>::do_toupper(char_type c) const { | ||
| #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE | ||
| return std::__libcpp_isascii(c) ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(c)]) : c; | ||
| #elif defined(__NetBSD__) | ||
| return static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]); | ||
| #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__) | ||
| return std::__libcpp_isascii(c) ? static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c; | ||
| #else | ||
| return (std::__libcpp_isascii(c) && __locale::__islower(c, _LIBCPP_GET_C_LOCALE)) ? c - 'a' + 'A' : c; | ||
| #endif | ||
| } | ||
| char ctype<char>::do_toupper(char_type c) const { return to_upper_impl(c); } | ||
|
|
||
| const char* ctype<char>::do_toupper(char_type* low, const char_type* high) const { | ||
| for (; low != high; ++low) | ||
| #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE | ||
| *low = std::__libcpp_isascii(*low) | ||
| ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(*low)]) | ||
| : *low; | ||
| #elif defined(__NetBSD__) | ||
| *low = static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(*low)]); | ||
| #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__) | ||
| *low = std::__libcpp_isascii(*low) ? static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low; | ||
| #else | ||
| *low = (std::__libcpp_isascii(*low) && __locale::__islower(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'a' + 'A' : *low; | ||
| #endif | ||
| *low = to_upper_impl(*low); | ||
| return low; | ||
| } | ||
|
|
||
| char ctype<char>::do_tolower(char_type c) const { | ||
| #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE | ||
| return std::__libcpp_isascii(c) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(c)]) : c; | ||
| #elif defined(__NetBSD__) | ||
| return static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(c)]); | ||
| #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__) | ||
| return std::__libcpp_isascii(c) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c; | ||
| #else | ||
| return (std::__libcpp_isascii(c) && __locale::__isupper(c, _LIBCPP_GET_C_LOCALE)) ? c - 'A' + 'a' : c; | ||
| #endif | ||
| } | ||
| char ctype<char>::do_tolower(char_type c) const { return to_lower_impl(c); } | ||
|
|
||
| const char* ctype<char>::do_tolower(char_type* low, const char_type* high) const { | ||
| for (; low != high; ++low) | ||
| #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE | ||
| *low = std::__libcpp_isascii(*low) | ||
| ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(*low)]) | ||
| : *low; | ||
| #elif defined(__NetBSD__) | ||
| *low = static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(*low)]); | ||
| #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__) | ||
| *low = std::__libcpp_isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low; | ||
| #else | ||
| *low = (std::__libcpp_isascii(*low) && __locale::__isupper(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'A' + 'a' : *low; | ||
| #endif | ||
| *low = to_lower_impl(*low); | ||
| return low; | ||
| } | ||
|
|
||
|
|
@@ -1010,36 +955,6 @@ const ctype<char>::mask* ctype<char>::classic_table() noexcept { | |
| } | ||
| #endif | ||
|
|
||
| #if defined(__GLIBC__) | ||
| const int* ctype<char>::__classic_lower_table() noexcept { return _LIBCPP_GET_C_LOCALE->__ctype_tolower; } | ||
|
|
||
| const int* ctype<char>::__classic_upper_table() noexcept { return _LIBCPP_GET_C_LOCALE->__ctype_toupper; } | ||
| #elif defined(__NetBSD__) | ||
| const short* ctype<char>::__classic_lower_table() noexcept { return _C_tolower_tab_ + 1; } | ||
|
|
||
| const short* ctype<char>::__classic_upper_table() noexcept { return _C_toupper_tab_ + 1; } | ||
|
|
||
| #elif defined(__EMSCRIPTEN__) | ||
| const int* ctype<char>::__classic_lower_table() noexcept { return *__ctype_tolower_loc(); } | ||
|
|
||
| const int* ctype<char>::__classic_upper_table() noexcept { return *__ctype_toupper_loc(); } | ||
| #elif defined(__MVS__) | ||
| const unsigned short* ctype<char>::__classic_lower_table() _NOEXCEPT { | ||
| # if defined(__NATIVE_ASCII_F) | ||
| return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->lower); | ||
| # else | ||
| return const_cast<const unsigned short*>(__ctype + __TOLOWER_INDEX); | ||
| # endif | ||
| } | ||
| const unsigned short* ctype<char>::__classic_upper_table() _NOEXCEPT { | ||
| # if defined(__NATIVE_ASCII_F) | ||
| return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->upper); | ||
| # else | ||
| return const_cast<const unsigned short*>(__ctype + __TOUPPER_INDEX); | ||
| # endif | ||
| } | ||
| #endif // __GLIBC__ || __NETBSD__ || __EMSCRIPTEN__ || __MVS__ | ||
|
|
||
| // template <> class ctype_byname<char> | ||
|
|
||
| ctype_byname<char>::ctype_byname(const char* name, size_t refs) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removing those function breaks ABI. The z/OS intends to keep them around. Wonder why this was not discussed among different vendors.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These functions were never used outside the dylib, so no program can ever reference them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see these were used inside locale.cpp only so they should never be externalized. In theory once the symbol is exported nothing stops of being called externally though I would think this is unlikely.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, exactly. This is a libc++-internal function. We've never used it ourselves in the headers, so it's your own fault if you call them. It's even more unlikely anybody even tried to use them, since it's not available on most platforms either.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To be clear we never called this function except the internal code which was removed in this PR. After reviewing this internally we decided to remove those symbols. I appreciate your time and the clarity you've provided.