From cec23f3dd392d5f5ceeefbc68f796b0e3c522eaa Mon Sep 17 00:00:00 2001 From: Luca Bacci Date: Wed, 24 Sep 2025 11:14:25 +0200 Subject: [PATCH] [libc++][windows] Use _wsetlocale() in __locale_guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Querying the current locale string on Windows should always be done with _wsetlocale(). The OS and the CRT support localized language and country names, for example "Norwegian Bokmål_Norway". Narrow setlocale() internally calls _wsetlocale() and converts the returned wide string using the current LC_CTYPE charset. However the string may not be representable in the current LC_CTYPE charset. Additionally, if the LC_CTYPE charset is changed after the query, the returned string becomes invalidly-encoded and cannot be used to restore the locale. This is a problem for code that temporarily changes the thread locale using RAII methods. Fixes #160478 --- .../locale_base_api/locale_guard.h | 16 ++++-- libcxx/include/__locale_dir/support/windows.h | 16 ++++-- .../windows_non_ascii_locale_names.pass.cpp | 52 +++++++++++++++++++ 3 files changed, 78 insertions(+), 6 deletions(-) create mode 100644 libcxx/test/libcxx/localization/locales/windows_non_ascii_locale_names.pass.cpp diff --git a/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h b/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h index e3583634e4322..7065874ea6d32 100644 --- a/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h +++ b/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h @@ -45,7 +45,11 @@ struct __libcpp_locale_guard { // each category. In the second case, we know at least one category won't // be what we want, so we only have to check the first case. if (std::strcmp(__l.__get_locale(), __lc) != 0) { - __locale_all = _strdup(__lc); + // Use wsetlocale to query the current locale string. This avoids a lossy + // conversion of the locale string from UTF-16 to the current LC_CTYPE + // charset. The Windows CRT allows language / country strings outside of + // ASCII, e.g. "Norwegian Bokm\u00E5l_Norway.utf8". + __locale_all = _wcsdup(__wsetlocale(nullptr)); if (__locale_all == nullptr) __throw_bad_alloc(); __setlocale(__l.__get_locale()); @@ -57,7 +61,7 @@ struct __libcpp_locale_guard { // for the different categories in the same format as returned by // setlocale(LC_ALL, nullptr). if (__locale_all != nullptr) { - __setlocale(__locale_all); + __wsetlocale(__locale_all); free(__locale_all); } _configthreadlocale(__status); @@ -68,8 +72,14 @@ struct __libcpp_locale_guard { __throw_bad_alloc(); return __new_locale; } + static const wchar_t* __wsetlocale(const wchar_t* __locale) { + const wchar_t* __new_locale = _wsetlocale(LC_ALL, __locale); + if (__new_locale == nullptr) + __throw_bad_alloc(); + return __new_locale; + } int __status; - char* __locale_all = nullptr; + wchar_t* __locale_all = nullptr; }; #endif diff --git a/libcxx/include/__locale_dir/support/windows.h b/libcxx/include/__locale_dir/support/windows.h index 0df8709f118d0..39391ea1fd9c4 100644 --- a/libcxx/include/__locale_dir/support/windows.h +++ b/libcxx/include/__locale_dir/support/windows.h @@ -162,6 +162,12 @@ inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, const char* __loc std::__throw_bad_alloc(); return __new_locale; } +inline _LIBCPP_HIDE_FROM_ABI wchar_t* __wsetlocale(int __category, const wchar_t* __locale) { + wchar_t* __new_locale = ::_wsetlocale(__category, __locale); + if (__new_locale == nullptr) + std::__throw_bad_alloc(); + return __new_locale; +} _LIBCPP_EXPORTED_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc); #endif // _LIBCPP_BUILDING_LIBRARY @@ -309,7 +315,11 @@ struct __locale_guard { // each category. In the second case, we know at least one category won't // be what we want, so we only have to check the first case. if (std::strcmp(__l.__get_locale(), __lc) != 0) { - __locale_all = _strdup(__lc); + // Use wsetlocale to query the current locale string. This avoids a lossy + // conversion of the locale string from UTF-16 to the current LC_CTYPE + // charset. The Windows CRT allows language / country strings outside of + // ASCII, e.g. "Norwegian Bokm\u00E5l_Norway.utf8". + __locale_all = _wcsdup(__locale::__wsetlocale(LC_ALL, nullptr)); if (__locale_all == nullptr) std::__throw_bad_alloc(); __locale::__setlocale(LC_ALL, __l.__get_locale()); @@ -321,13 +331,13 @@ struct __locale_guard { // for the different categories in the same format as returned by // setlocale(LC_ALL, nullptr). if (__locale_all != nullptr) { - __locale::__setlocale(LC_ALL, __locale_all); + __locale::__wsetlocale(LC_ALL, __locale_all); free(__locale_all); } _configthreadlocale(__status); } int __status; - char* __locale_all = nullptr; + wchar_t* __locale_all = nullptr; }; #endif // _LIBCPP_BUILDING_LIBRARY diff --git a/libcxx/test/libcxx/localization/locales/windows_non_ascii_locale_names.pass.cpp b/libcxx/test/libcxx/localization/locales/windows_non_ascii_locale_names.pass.cpp new file mode 100644 index 0000000000000..b4b7934986d99 --- /dev/null +++ b/libcxx/test/libcxx/localization/locales/windows_non_ascii_locale_names.pass.cpp @@ -0,0 +1,52 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// REQUIRES: windows + +// The C RunTime library on Windows supports locale strings with +// characters outside the ASCII range. This poses challenges for +// code that temporarily set a custom thread locale. +// +// https://github.com/llvm/llvm-project/issues/160478 + +#include +#include +#include +#include + +#include +#include +#include + +#include "test_macros.h" + +void locale_name_replace_codepage(std::string& locale_name, const std::string& codepage) { + auto dot_position = locale_name.rfind('.'); + LIBCPP_ASSERT(dot_position != std::string::npos); + + locale_name = locale_name.substr(0, dot_position) + codepage; +} + +int main(int, char**) { + _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); + + std::string locale_name = std::setlocale(LC_ALL, "norwegian-bokmal"); + + const auto& not_ascii = [](char c) { return (c & 0x80) != 0; }; + LIBCPP_ASSERT(std::any_of(locale_name.begin(), locale_name.end(), not_ascii)); + + locale_name_replace_codepage(locale_name, ".437"); + LIBCPP_ASSERT(std::setlocale(LC_ALL, locale_name.c_str())); + + std::cerr.imbue(std::locale::classic()); + std::cerr << std::setprecision(2) << 0.1 << std::endl; + + return EXIT_SUCCESS; +}