Skip to content

Commit 0c24536

Browse files
committed
address comments
1 parent 304a0ed commit 0c24536

File tree

17 files changed

+125
-73
lines changed

17 files changed

+125
-73
lines changed

clang/include/clang/Basic/DiagnosticFrontendKinds.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,8 @@ def err_non_default_visibility_dllimport : Error<
342342
"non-default visibility cannot be applied to 'dllimport' declaration">;
343343
def err_ifunc_resolver_return : Error<
344344
"ifunc resolver function must return a pointer">;
345-
345+
def err_fe_literal_conv_config : Error<
346+
"failed to configure the literal converter">;
346347
def warn_atomic_op_misaligned : Warning<
347348
"misaligned atomic operation may incur "
348349
"significant performance penalty"

clang/include/clang/Basic/DiagnosticLexKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,8 @@ def ext_string_too_long : Extension<"string literal of length %0 exceeds "
279279
"support">, InGroup<OverlengthStrings>;
280280
def err_character_too_large : Error<
281281
"character too large for enclosing character literal type">;
282+
def err_exec_charset_conversion_failed : Error<
283+
"conversion to execution encoding failed: '%0'">;
282284
def warn_c99_compat_unicode_literal : Warning<
283285
"unicode literals are incompatible with C99">,
284286
InGroup<C99Compat>, DefaultIgnore;

clang/include/clang/Basic/TokenKinds.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,18 @@ inline bool isLiteral(TokenKind K) {
111111
return isInLiteralRange;
112112
}
113113

114-
/// Return true if this is a utf literal kind.
114+
/// Return true if this is a UTF literal kind.
115115
inline bool isUTFLiteral(TokenKind K) {
116116
return K == tok::utf8_char_constant || K == tok::utf8_string_literal ||
117117
K == tok::utf16_char_constant || K == tok::utf16_string_literal ||
118118
K == tok::utf32_char_constant || K == tok::utf32_string_literal;
119119
}
120120

121+
/// Return true if this is a wide literal kind.
122+
inline bool isWideLiteral(TokenKind K) {
123+
return K == tok::wide_char_constant || K == tok::wide_string_literal;
124+
}
125+
121126
/// Return true if this is any of tok::annot_* kinds.
122127
bool isAnnotation(TokenKind K);
123128

clang/include/clang/Driver/Options.td

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2967,7 +2967,10 @@ def fexperimental_strict_floating_point : Flag<["-"], "fexperimental-strict-floa
29672967
def finput_charset_EQ : Joined<["-"], "finput-charset=">,
29682968
Visibility<[ClangOption, FlangOption, FC1Option]>, Group<f_Group>,
29692969
HelpText<"Specify the default character set for source files">;
2970-
def fexec_charset_EQ : Joined<["-"], "fexec-charset=">, Group<f_Group>;
2970+
def fexec_charset_EQ : Joined<["-"], "fexec-charset=">, Group<f_Group>,
2971+
HelpText<"Set the execution <charset> for string and character literals. "
2972+
"Supported character encodings include ISO-8859-1, UTF-8, IBM1047, "
2973+
"and possibly those supported by ICU or the host iconv library.">;
29712974
def finstrument_functions
29722975
: Flag<["-"], "finstrument-functions">,
29732976
Group<f_Group>,
@@ -7475,8 +7478,8 @@ def tune_cpu : Separate<["-"], "tune-cpu">,
74757478
MarshallingInfoString<TargetOpts<"TuneCPU">>;
74767479
def fexec_charset : Separate<["-"], "fexec-charset">, MetaVarName<"<charset>">,
74777480
HelpText<"Set the execution <charset> for string and character literals. "
7478-
"Supported character encodings include ISO8859-1, UTF-8, IBM-1047 "
7479-
"and those supported by the host icu or iconv library.">,
7481+
"Supported character encodings include ISO-8859-1, UTF-8, IBM1047, "
7482+
"and possibly those supported by ICU or the host iconv library.">,
74807483
MarshallingInfoString<LangOpts<"ExecEncoding">>;
74817484
def target_cpu : Separate<["-"], "target-cpu">,
74827485
HelpText<"Target a specific cpu type">,
@@ -9083,7 +9086,9 @@ def _SLASH_source_charset : CLCompileJoined<"source-charset:">,
90839086
HelpText<"Set source encoding, supports only UTF-8">,
90849087
Alias<finput_charset_EQ>;
90859088
def _SLASH_execution_charset : CLCompileJoined<"execution-charset:">,
9086-
HelpText<"Set runtime encoding, supports only UTF-8">,
9089+
HelpText<"Set the execution <charset> for string and character literals. "
9090+
"Supported character encodings include ISO-8859-1, UTF-8, IBM1047, "
9091+
"and possibly those supported by ICU or the host iconv library.">,
90879092
Alias<fexec_charset_EQ>;
90889093
def _SLASH_std : CLCompileJoined<"std:">,
90899094
HelpText<"Set language version (c++14,c++17,c++20,c++23preview,c++latest,c11,c17)">;

clang/include/clang/Lex/LiteralConverter.h

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,25 @@
1616
#include "llvm/ADT/StringRef.h"
1717
#include "llvm/Support/TextEncoding.h"
1818

19-
enum ConversionAction { NoConversion, ToSystemEncoding, ToExecEncoding };
19+
enum ConversionAction {
20+
CA_NoConversion,
21+
CA_ToSystemEncoding,
22+
CA_ToExecEncoding
23+
};
2024

2125
class LiteralConverter {
2226
llvm::StringRef InternalEncoding;
2327
llvm::StringRef SystemEncoding;
2428
llvm::StringRef ExecEncoding;
25-
llvm::TextEncodingConverter *ToSystemEncodingConverter;
26-
llvm::TextEncodingConverter *ToExecEncodingConverter;
29+
llvm::TextEncodingConverter *ToSystemEncodingConverter = nullptr;
30+
llvm::TextEncodingConverter *ToExecEncodingConverter = nullptr;
2731

2832
public:
2933
llvm::TextEncodingConverter *getConverter(ConversionAction Action);
30-
void setConvertersFromOptions(const clang::LangOptions &Opts,
31-
const clang::TargetInfo &TInfo,
32-
clang::DiagnosticsEngine &Diags);
34+
static std::error_code
35+
setConvertersFromOptions(LiteralConverter &LiteralConv,
36+
const clang::LangOptions &Opts,
37+
const clang::TargetInfo &TInfo);
3338
};
3439

3540
#endif

clang/include/clang/Lex/LiteralSupport.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ class StringLiteralParser {
251251
StringLiteralParser(
252252
ArrayRef<Token> StringToks, Preprocessor &PP,
253253
StringLiteralEvalMethod StringMethod = StringLiteralEvalMethod::Evaluated,
254-
ConversionAction Action = ToExecEncoding);
254+
ConversionAction Action = CA_ToExecEncoding);
255255
StringLiteralParser(ArrayRef<Token> StringToks, const SourceManager &sm,
256256
const LangOptions &features, const TargetInfo &target,
257257
DiagnosticsEngine *diags = nullptr)
@@ -260,7 +260,7 @@ class StringLiteralParser {
260260
Kind(tok::unknown), ResultPtr(ResultBuf.data()),
261261
EvalMethod(StringLiteralEvalMethod::Evaluated), hadError(false),
262262
Pascal(false) {
263-
init(StringToks, NoConversion);
263+
init(StringToks, CA_NoConversion);
264264
}
265265

266266
bool hadError;

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7364,20 +7364,24 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
73647364
<< value;
73657365
}
73667366

7367-
// Set the default fexec-charset as the system charset.
7368-
CmdArgs.push_back("-fexec-charset");
7369-
CmdArgs.push_back(Args.MakeArgString(Triple.getDefaultTextEncoding()));
73707367
if (Arg *execEncoding = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
73717368
StringRef value = execEncoding->getValue();
7372-
llvm::ErrorOr<llvm::TextEncodingConverter> ErrorOrConverter =
7373-
llvm::TextEncodingConverter::create("UTF-8", value.data());
7374-
if (ErrorOrConverter) {
7375-
CmdArgs.push_back("-fexec-charset");
7376-
CmdArgs.push_back(Args.MakeArgString(value));
7377-
} else {
7378-
D.Diag(diag::err_drv_invalid_value)
7379-
<< execEncoding->getAsString(Args) << value;
7369+
bool KnownEncoding =
7370+
llvm::TextEncodingConverter::getKnownEncoding(value).has_value();
7371+
if (!KnownEncoding) {
7372+
llvm::ErrorOr<llvm::TextEncodingConverter> ErrorOrConverter =
7373+
llvm::TextEncodingConverter::create("UTF-8", value.data());
7374+
if (!ErrorOrConverter)
7375+
D.Diag(diag::err_drv_invalid_value)
7376+
<< execEncoding->getAsString(Args) << value;
73807377
}
7378+
CmdArgs.push_back("-fexec-charset");
7379+
CmdArgs.push_back(Args.MakeArgString(value));
7380+
} else {
7381+
// Set the default fexec-charset as the system charset.
7382+
CmdArgs.push_back("-fexec-charset");
7383+
CmdArgs.push_back(
7384+
Args.MakeArgString(Triple.getDefaultNarrowTextEncoding()));
73817385
}
73827386

73837387
RenderDiagnosticsOptions(D, Args, CmdArgs);

clang/lib/Frontend/CompilerInstance.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -543,8 +543,9 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) {
543543
if (GetDependencyDirectives)
544544
PP->setDependencyDirectivesGetter(*GetDependencyDirectives);
545545

546-
PP->getLiteralConverter().setConvertersFromOptions(getLangOpts(), getTarget(),
547-
getDiagnostics());
546+
if (!LiteralConverter::setConvertersFromOptions(PP->getLiteralConverter(),
547+
getLangOpts(), getTarget()))
548+
PP->getDiagnostics().Report(clang::diag::err_fe_literal_conv_config);
548549
}
549550

550551
std::string CompilerInstance::getSpecificModuleCachePath(StringRef ModuleHash) {

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1029,7 +1029,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
10291029
Builder.defineMacro("__clang_literal_encoding__", LangOpts.ExecEncoding);
10301030
else
10311031
Builder.defineMacro("__clang_literal_encoding__",
1032-
TI.getTriple().getDefaultTextEncoding());
1032+
TI.getTriple().getDefaultNarrowTextEncoding());
10331033
if (TI.getTypeWidth(TI.getWCharType()) >= 32) {
10341034
// FIXME: 32-bit wchar_t signals UTF-32. This may change
10351035
// if -fwide-exec-charset= is ever supported.

clang/lib/Lex/LiteralConverter.cpp

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,41 +13,46 @@ using namespace llvm;
1313

1414
llvm::TextEncodingConverter *
1515
LiteralConverter::getConverter(ConversionAction Action) {
16-
if (Action == ToSystemEncoding)
16+
if (Action == CA_ToSystemEncoding)
1717
return ToSystemEncodingConverter;
18-
else if (Action == ToExecEncoding)
18+
else if (Action == CA_ToExecEncoding)
1919
return ToExecEncodingConverter;
2020
else
2121
return nullptr;
2222
}
2323

24-
void LiteralConverter::setConvertersFromOptions(
25-
const clang::LangOptions &Opts, const clang::TargetInfo &TInfo,
26-
clang::DiagnosticsEngine &Diags) {
24+
std::error_code
25+
LiteralConverter::setConvertersFromOptions(LiteralConverter &LiteralConv,
26+
const clang::LangOptions &Opts,
27+
const clang::TargetInfo &TInfo) {
2728
using namespace llvm;
28-
InternalEncoding = "UTF-8";
29-
SystemEncoding = TInfo.getTriple().getDefaultTextEncoding();
30-
ExecEncoding =
31-
Opts.ExecEncoding.empty() ? InternalEncoding : Opts.ExecEncoding;
29+
LiteralConv.InternalEncoding = "UTF-8";
30+
LiteralConv.SystemEncoding = TInfo.getTriple().getDefaultNarrowTextEncoding();
31+
LiteralConv.ExecEncoding = Opts.ExecEncoding.empty()
32+
? LiteralConv.InternalEncoding
33+
: Opts.ExecEncoding;
34+
3235
// Create converter between internal and system encoding
33-
if (InternalEncoding != SystemEncoding) {
36+
if (LiteralConv.InternalEncoding != LiteralConv.SystemEncoding) {
3437
ErrorOr<TextEncodingConverter> ErrorOrConverter =
35-
llvm::TextEncodingConverter::create(InternalEncoding, SystemEncoding);
38+
llvm::TextEncodingConverter::create(LiteralConv.InternalEncoding,
39+
LiteralConv.SystemEncoding);
3640
if (!ErrorOrConverter)
37-
return;
38-
ToSystemEncodingConverter =
41+
return ErrorOrConverter.getError();
42+
LiteralConv.ToSystemEncodingConverter =
3943
new TextEncodingConverter(std::move(*ErrorOrConverter));
4044
}
4145

4246
// Create converter between internal and exec encoding specified
4347
// in fexec-charset option.
44-
if (InternalEncoding == ExecEncoding)
45-
return;
48+
if (LiteralConv.InternalEncoding == LiteralConv.ExecEncoding)
49+
return std::error_code();
4650
ErrorOr<TextEncodingConverter> ErrorOrConverter =
47-
llvm::TextEncodingConverter::create(InternalEncoding, ExecEncoding);
51+
llvm::TextEncodingConverter::create(LiteralConv.InternalEncoding,
52+
LiteralConv.ExecEncoding);
4853
if (!ErrorOrConverter)
49-
Diags.Report(clang::diag::err_drv_invalid_value)
50-
<< "-fexec-charset" << ExecEncoding;
51-
ToExecEncodingConverter =
54+
return ErrorOrConverter.getError();
55+
LiteralConv.ToExecEncodingConverter =
5256
new TextEncodingConverter(std::move(*ErrorOrConverter));
57+
return std::error_code();
5358
}

0 commit comments

Comments
 (0)