Skip to content

Commit 422a60e

Browse files
cooppCooper Partin
andauthored
Update locale setting logic to support UTF-8 only for Apple and Linux distros (microsoft#6488)
This commit fixes the setlocale( ) logic to include the UTF-8 supported string value for Mariner distros. It also introduces a new RAII class `ScopedLocale' which ensure that the locale setting is set/reset during string conversion operations. Fixes: microsoft#6201 --------- Co-authored-by: Cooper Partin <[email protected]> Co-authored-by: cooppunix <cooppunix&mariner.com>
1 parent 0781ded commit 422a60e

31 files changed

+134
-150
lines changed

include/dxc/Test/HlslTestUtils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ using namespace std;
7777
const char *pTmpA = (a); \
7878
const char *pTmpB = (b); \
7979
if (0 != strcmp(pTmpA, pTmpB)) { \
80-
CA2W conv(pTmpB, CP_UTF8); \
80+
CA2W conv(pTmpB); \
8181
WEX::Logging::Log::Comment(conv); \
8282
const char *pA = pTmpA; \
8383
const char *pB = pTmpB; \

include/dxc/WinAdapter.h

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@
126126
// Used by HRESULT <--> WIN32 error code conversion
127127
#define SEVERITY_ERROR 1
128128
#define FACILITY_WIN32 7
129-
#define HRESULT_CODE(hr) ((hr)&0xFFFF)
129+
#define HRESULT_CODE(hr) ((hr) & 0xFFFF)
130130
#define MAKE_HRESULT(severity, facility, code) \
131131
((HRESULT)(((unsigned long)(severity) << 31) | \
132132
((unsigned long)(facility) << 16) | ((unsigned long)(code))))
@@ -238,7 +238,7 @@
238238

239239
#define HRESULT_FROM_WIN32(x) \
240240
(HRESULT)(x) <= 0 ? (HRESULT)(x) \
241-
: (HRESULT)(((x)&0x0000FFFF) | (7 << 16) | 0x80000000)
241+
: (HRESULT)(((x) & 0x0000FFFF) | (7 << 16) | 0x80000000)
242242

243243
//===----------------------------------------------------------------------===//
244244
//
@@ -912,32 +912,40 @@ unsigned int SysStringLen(const BSTR bstrString);
912912
#define CP_ACP 0
913913
#define CP_UTF8 65001 // UTF-8 translation.
914914

915-
// Convert Windows codepage value to locale string
916-
const char *CPToLocale(uint32_t CodePage);
915+
// RAII style mechanism for setting/unsetting a locale for the specified Windows
916+
// codepage
917+
class ScopedLocale {
918+
const char *m_prevLocale;
919+
920+
public:
921+
explicit ScopedLocale(uint32_t codePage)
922+
: m_prevLocale(setlocale(LC_ALL, nullptr)) {
923+
assert((codePage == CP_UTF8) &&
924+
"Support for Linux only handles UTF8 code pages");
925+
setlocale(LC_ALL, "en_US.UTF-8");
926+
}
927+
~ScopedLocale() {
928+
if (m_prevLocale != nullptr) {
929+
setlocale(LC_ALL, m_prevLocale);
930+
}
931+
}
932+
};
917933

918934
// The t_nBufferLength parameter is part of the published interface, but not
919935
// used here.
920936
template <int t_nBufferLength = 128> class CW2AEX {
921937
public:
922-
CW2AEX(LPCWSTR psz, UINT nCodePage = CP_UTF8) {
923-
const char *locale = CPToLocale(nCodePage);
924-
if (locale == nullptr) {
925-
// Current Implementation only supports CP_UTF8, and CP_ACP
926-
assert(false && "CW2AEX implementation for Linux only handles "
927-
"UTF8 and ACP code pages");
928-
return;
929-
}
938+
CW2AEX(LPCWSTR psz) {
939+
ScopedLocale locale(CP_UTF8);
930940

931941
if (!psz) {
932942
m_psz = NULL;
933943
return;
934944
}
935945

936-
locale = setlocale(LC_ALL, locale);
937946
int len = (wcslen(psz) + 1) * 4;
938947
m_psz = new char[len];
939948
std::wcstombs(m_psz, psz, len);
940-
setlocale(LC_ALL, locale);
941949
}
942950

943951
~CW2AEX() { delete[] m_psz; }
@@ -952,25 +960,17 @@ typedef CW2AEX<> CW2A;
952960
// used here.
953961
template <int t_nBufferLength = 128> class CA2WEX {
954962
public:
955-
CA2WEX(LPCSTR psz, UINT nCodePage = CP_UTF8) {
956-
const char *locale = CPToLocale(nCodePage);
957-
if (locale == nullptr) {
958-
// Current Implementation only supports CP_UTF8, and CP_ACP
959-
assert(false && "CA2WEX implementation for Linux only handles "
960-
"UTF8 and ACP code pages");
961-
return;
962-
}
963+
CA2WEX(LPCSTR psz) {
964+
ScopedLocale locale(CP_UTF8);
963965

964966
if (!psz) {
965967
m_psz = NULL;
966968
return;
967969
}
968970

969-
locale = setlocale(LC_ALL, locale);
970971
int len = strlen(psz) + 1;
971972
m_psz = new wchar_t[len];
972973
std::mbstowcs(m_psz, psz, len);
973-
setlocale(LC_ALL, locale);
974974
}
975975

976976
~CA2WEX() { delete[] m_psz; }

lib/DxcSupport/Unicode.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
// MultiByteToWideChar which is a Windows-specific method.
2525
// This is a very simplistic implementation for non-Windows platforms. This
2626
// implementation completely ignores CodePage and dwFlags.
27-
int MultiByteToWideChar(uint32_t CodePage, uint32_t /*dwFlags*/,
27+
int MultiByteToWideChar(uint32_t /*CodePage*/, uint32_t /*dwFlags*/,
2828
const char *lpMultiByteStr, int cbMultiByte,
2929
wchar_t *lpWideCharStr, int cchWideChar) {
3030

@@ -52,8 +52,8 @@ int MultiByteToWideChar(uint32_t CodePage, uint32_t /*dwFlags*/,
5252
}
5353

5454
size_t rv;
55-
const char *locale = CPToLocale(CodePage);
56-
locale = setlocale(LC_ALL, locale);
55+
const char *prevLocale = setlocale(LC_ALL, nullptr);
56+
setlocale(LC_ALL, "en_US.UTF-8");
5757
if (lpMultiByteStr[cbMultiByte - 1] != '\0') {
5858
char *srcStr = (char *)malloc((cbMultiByte + 1) * sizeof(char));
5959
strncpy(srcStr, lpMultiByteStr, cbMultiByte);
@@ -63,7 +63,10 @@ int MultiByteToWideChar(uint32_t CodePage, uint32_t /*dwFlags*/,
6363
} else {
6464
rv = mbstowcs(lpWideCharStr, lpMultiByteStr, cchWideChar);
6565
}
66-
setlocale(LC_ALL, locale);
66+
67+
if (prevLocale)
68+
setlocale(LC_ALL, prevLocale);
69+
6770
if (rv == (size_t)cbMultiByte)
6871
return rv;
6972
return rv + 1; // mbstowcs excludes the terminating character
@@ -72,7 +75,7 @@ int MultiByteToWideChar(uint32_t CodePage, uint32_t /*dwFlags*/,
7275
// WideCharToMultiByte is a Windows-specific method.
7376
// This is a very simplistic implementation for non-Windows platforms. This
7477
// implementation completely ignores CodePage and dwFlags.
75-
int WideCharToMultiByte(uint32_t CodePage, uint32_t /*dwFlags*/,
78+
int WideCharToMultiByte(uint32_t /*CodePage*/, uint32_t /*dwFlags*/,
7679
const wchar_t *lpWideCharStr, int cchWideChar,
7780
char *lpMultiByteStr, int cbMultiByte,
7881
const char * /*lpDefaultChar*/,
@@ -105,8 +108,8 @@ int WideCharToMultiByte(uint32_t CodePage, uint32_t /*dwFlags*/,
105108
}
106109

107110
size_t rv;
108-
const char *locale = CPToLocale(CodePage);
109-
locale = setlocale(LC_ALL, locale);
111+
const char *prevLocale = setlocale(LC_ALL, nullptr);
112+
setlocale(LC_ALL, "en_US.UTF-8");
110113
if (lpWideCharStr[cchWideChar - 1] != L'\0') {
111114
wchar_t *srcStr = (wchar_t *)malloc((cchWideChar + 1) * sizeof(wchar_t));
112115
wcsncpy(srcStr, lpWideCharStr, cchWideChar);
@@ -116,7 +119,10 @@ int WideCharToMultiByte(uint32_t CodePage, uint32_t /*dwFlags*/,
116119
} else {
117120
rv = wcstombs(lpMultiByteStr, lpWideCharStr, cbMultiByte);
118121
}
119-
setlocale(LC_ALL, locale);
122+
123+
if (prevLocale)
124+
setlocale(LC_ALL, prevLocale);
125+
120126
if (rv == (size_t)cchWideChar)
121127
return rv;
122128
return rv + 1; // mbstowcs excludes the terminating character

lib/DxcSupport/WinAdapter.cpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -60,24 +60,6 @@ unsigned int SysStringLen(const BSTR bstrString) {
6060

6161
return blobIn[0] / sizeof(OLECHAR);
6262
}
63-
//===---------------------- Char converstion ------------------------------===//
64-
65-
const char *CPToLocale(uint32_t CodePage) {
66-
#ifdef __APPLE__
67-
static const char *utf8 = "en_US.UTF-8";
68-
static const char *iso88591 = "en_US.ISO8859-1";
69-
#else
70-
static const char *utf8 = "en_US.utf8";
71-
static const char *iso88591 = "en_US.iso88591";
72-
#endif
73-
if (CodePage == CP_UTF8) {
74-
return utf8;
75-
} else if (CodePage == CP_ACP) {
76-
// Experimentation suggests that ACP is expected to be ISO-8859-1
77-
return iso88591;
78-
}
79-
return nullptr;
80-
}
8163

8264
//===--------------------------- CHandle -------------------------------===//
8365

lib/DxcSupport/dxcapi.use.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ static std::string GetWin32ErrorMessage(DWORD err) {
5858
void IFT_Data(HRESULT hr, LPCWSTR data) {
5959
if (SUCCEEDED(hr))
6060
return;
61-
CW2A pData(data, CP_UTF8);
61+
CW2A pData(data);
6262
std::string errMsg;
6363
if (HRESULT_IS_WIN32ERR(hr)) {
6464
DWORD err = HRESULT_AS_WIN32ERR(hr);

lib/DxilDia/DxcPixCompilationInfo.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ static void MDStringOperandToBSTR(llvm::MDOperand const &mdOperand,
105105
llvm::dyn_cast<llvm::MDString>(mdOperand)->getString();
106106
std::string StringWithTerminator(MetadataAsStringRef.begin(),
107107
MetadataAsStringRef.size());
108-
CA2W cv(StringWithTerminator.c_str(), CP_UTF8);
108+
CA2W cv(StringWithTerminator.c_str());
109109
CComBSTR BStr;
110110
BStr.Append(cv);
111111
BStr.Append(L"\0", 1);
@@ -168,7 +168,7 @@ STDMETHODIMP CompilationInfo::GetArguments(BSTR *pArguments) {
168168
}
169169

170170
std::string str(strRef.begin(), strRef.size());
171-
CA2W cv(str.c_str(), CP_UTF8);
171+
CA2W cv(str.c_str());
172172
pBSTR.Append(cv);
173173
pBSTR.Append(L" ", 1);
174174
}
@@ -201,7 +201,7 @@ STDMETHODIMP CompilationInfo::GetMacroDefinitions(BSTR *pMacroDefinitions) {
201201
str = name + "=" + definition;
202202
}
203203

204-
CA2W cv(str.c_str(), CP_UTF8);
204+
CA2W cv(str.c_str());
205205
pBSTR.Append(L"-D", 2);
206206
pBSTR.Append(cv);
207207
pBSTR.Append(L" ", 1);
@@ -218,7 +218,7 @@ CompilationInfo::GetEntryPointFile(BSTR *pEntryPointFile) {
218218
->getString();
219219
std::string str(strRef.begin(),
220220
strRef.size()); // To make sure str is null terminated
221-
CA2W cv(str.c_str(), CP_UTF8);
221+
CA2W cv(str.c_str());
222222
CComBSTR pBSTR;
223223
pBSTR.Append(cv);
224224
*pEntryPointFile = pBSTR.Detach();
@@ -227,7 +227,7 @@ CompilationInfo::GetEntryPointFile(BSTR *pEntryPointFile) {
227227

228228
STDMETHODIMP
229229
CompilationInfo::GetHlslTarget(BSTR *pHlslTarget) {
230-
CA2W cv(m_pSession->DxilModuleRef().GetShaderModel()->GetName(), CP_UTF8);
230+
CA2W cv(m_pSession->DxilModuleRef().GetShaderModel()->GetName());
231231
CComBSTR pBSTR;
232232
pBSTR.Append(cv);
233233
*pHlslTarget = pBSTR.Detach();
@@ -237,7 +237,7 @@ CompilationInfo::GetHlslTarget(BSTR *pHlslTarget) {
237237
STDMETHODIMP
238238
CompilationInfo::GetEntryPoint(BSTR *pEntryPoint) {
239239
auto name = m_pSession->DxilModuleRef().GetEntryFunctionName();
240-
CA2W cv(name.c_str(), CP_UTF8);
240+
CA2W cv(name.c_str());
241241
CComBSTR pBSTR;
242242
pBSTR.Append(cv);
243243
*pEntryPoint = pBSTR.Detach();

lib/DxilDia/DxilDiaSession.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ STDMETHODIMP dxil_dia::Session::findInjectedSource(
351351
/* [in] */ LPCOLESTR srcFile,
352352
/* [out] */ IDiaEnumInjectedSources **ppResult) {
353353
if (Contents() != nullptr) {
354-
CW2A pUtf8FileName(srcFile, CP_UTF8);
354+
CW2A pUtf8FileName(srcFile);
355355
DxcThreadMalloc TM(m_pMalloc);
356356
IDiaTable *pTable;
357357
IFT(Table::Create(this, Table::Kind::InjectedSource, &pTable));

lib/DxilDia/DxilDiaSymbolManager.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ class Function final : public SymbolManager::SymbolFactory {
296296
IFR(FunctionSymbol::Create(pMalloc, pSession, m_ID, m_Node, m_TypeID,
297297
m_Node->getType(), ppRet));
298298
(*ppRet)->SetLexicalParent(m_ParentID);
299-
(*ppRet)->SetName(CA2W(m_Node->getName().str().c_str(), CP_UTF8));
299+
(*ppRet)->SetName(CA2W(m_Node->getName().str().c_str()));
300300
return S_OK;
301301
}
302302

@@ -398,7 +398,7 @@ class TypedefType final : public SymbolManager::SymbolFactory {
398398
IFR(TypedefTypeSymbol::Create(pMalloc, pSession, m_ParentID, m_ID, m_Node,
399399
m_BaseTypeID, ppRet));
400400
(*ppRet)->SetLexicalParent(m_ParentID);
401-
(*ppRet)->SetName(CA2W(m_Node->getName().str().c_str(), CP_UTF8));
401+
(*ppRet)->SetName(CA2W(m_Node->getName().str().c_str()));
402402
return S_OK;
403403
}
404404

@@ -440,7 +440,7 @@ class VectorType final : public SymbolManager::SymbolFactory {
440440
IFR(VectorTypeSymbol::Create(pMalloc, pSession, m_ParentID, m_ID, m_Node,
441441
m_ElemTyID, m_NumElts, ppRet));
442442
(*ppRet)->SetLexicalParent(m_ParentID);
443-
(*ppRet)->SetName(CA2W(m_Node->getName().str().c_str(), CP_UTF8));
443+
(*ppRet)->SetName(CA2W(m_Node->getName().str().c_str()));
444444
return S_OK;
445445
}
446446

@@ -506,7 +506,7 @@ class GlobalVariable final : public SymbolManager::SymbolFactory {
506506
IFR(GlobalVariableSymbol::Create(pMalloc, pSession, m_ID, m_GV, m_TypeID,
507507
m_Type, ppRet));
508508
(*ppRet)->SetLexicalParent(m_ParentID);
509-
(*ppRet)->SetName(CA2W(m_GV->getName().str().c_str(), CP_UTF8));
509+
(*ppRet)->SetName(CA2W(m_GV->getName().str().c_str()));
510510
(*ppRet)->SetIsHLSLData(true);
511511
return S_OK;
512512
}
@@ -586,7 +586,7 @@ class LocalVariable final : public SymbolManager::SymbolFactory {
586586
m_Type, m_VI->GetOffsetInUDT(),
587587
m_VI->GetDxilRegister(), ppRet));
588588
(*ppRet)->SetLexicalParent(m_ParentID);
589-
(*ppRet)->SetName(CA2W(m_Node->getName().str().c_str(), CP_UTF8));
589+
(*ppRet)->SetName(CA2W(m_Node->getName().str().c_str()));
590590
(*ppRet)->SetDataKind(m_Node->getTag() == llvm::dwarf::DW_TAG_arg_variable
591591
? DataIsParam
592592
: DataIsLocal);
@@ -628,7 +628,7 @@ class UDTField final : public SymbolManager::SymbolFactory {
628628
IFR(UDTFieldSymbol::Create(pMalloc, pSession, m_ID, m_Node, m_TypeID,
629629
m_Type, ppRet));
630630
(*ppRet)->SetLexicalParent(m_ParentID);
631-
(*ppRet)->SetName(CA2W(m_Node->getName().str().c_str(), CP_UTF8));
631+
(*ppRet)->SetName(CA2W(m_Node->getName().str().c_str()));
632632
(*ppRet)->SetDataKind(m_Node->isStaticMember() ? DataIsStaticLocal
633633
: DataIsMember);
634634
return S_OK;
@@ -946,7 +946,7 @@ HRESULT dxil_dia::hlsl_symbols::CompilandEnvSymbol::CreateFlags(
946946
}
947947

948948
std::string str(strRef.begin(), strRef.size());
949-
CA2W cv(str.c_str(), CP_UTF8);
949+
CA2W cv(str.c_str());
950950
pBSTR.Append(cv);
951951
pBSTR.Append(L"\0", 1);
952952
}
@@ -989,7 +989,7 @@ HRESULT dxil_dia::hlsl_symbols::CompilandEnvSymbol::CreateDefines(
989989
it != definesNode->op_end(); ++it) {
990990
llvm::StringRef strRef = llvm::dyn_cast<llvm::MDString>(*it)->getString();
991991
std::string str(strRef.begin(), strRef.size());
992-
CA2W cv(str.c_str(), CP_UTF8);
992+
CA2W cv(str.c_str());
993993
pBSTR.Append(cv);
994994
pBSTR.Append(L"\0", 1);
995995
}
@@ -1068,7 +1068,7 @@ STDMETHODIMP dxil_dia::hlsl_symbols::TypeSymbol::get_name(
10681068
DXASSERT(!this->HasName(), "Setting type name multiple times.");
10691069
std::string Name;
10701070
IFR(m_lazySymbolName(m_pSession, &Name));
1071-
this->SetName(CA2W(Name.c_str(), CP_UTF8));
1071+
this->SetName(CA2W(Name.c_str()));
10721072
m_lazySymbolName = nullptr;
10731073
}
10741074
return Symbol::get_name(pRetVal);
@@ -1579,7 +1579,7 @@ HRESULT dxil_dia::hlsl_symbols::SymbolManagerInit::CreateSubroutineType(
15791579
if (!name) {
15801580
OS << "???";
15811581
} else {
1582-
OS << CW2A((BSTR)name, CP_UTF8);
1582+
OS << CW2A((BSTR)name);
15831583
}
15841584
}
15851585
if (first) {
@@ -1678,7 +1678,7 @@ HRESULT dxil_dia::hlsl_symbols::SymbolManagerInit::CreateCompositeType(
16781678
if (!name) {
16791679
OS << "???";
16801680
} else {
1681-
OS << CW2A((BSTR)name, CP_UTF8);
1681+
OS << CW2A((BSTR)name);
16821682
}
16831683

16841684
OS << "[";
@@ -1910,7 +1910,7 @@ HRESULT dxil_dia::hlsl_symbols::SymbolManagerInit::HandleDerivedType(
19101910
if (!name) {
19111911
OS << "???";
19121912
} else {
1913-
OS << CW2A((BSTR)name, CP_UTF8);
1913+
OS << CW2A((BSTR)name);
19141914
}
19151915
OS << Qualifier;
19161916
return S_OK;

lib/HLSL/DxcOptimizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ HRESULT STDMETHODCALLTYPE DxcOptimizer::RunOptimizer(
462462
continue;
463463
}
464464

465-
CW2A optName(ppOptions[i], CP_UTF8);
465+
CW2A optName(ppOptions[i]);
466466
// The option syntax is
467467
const char ArgDelim = ',';
468468
// '-' OPTION_NAME (',' ARG_NAME ('=' ARG_VALUE)?)*

0 commit comments

Comments
 (0)