@@ -2051,6 +2051,8 @@ Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) {
20512051 } else if (Literal.isUTF8()) {
20522052 if (getLangOpts().Char8)
20532053 CharTy = Context.Char8Ty;
2054+ else if (getLangOpts().C23)
2055+ CharTy = Context.UnsignedCharTy;
20542056 Kind = StringLiteralKind::UTF8;
20552057 } else if (Literal.isUTF16()) {
20562058 CharTy = Context.Char16Ty;
@@ -2062,17 +2064,23 @@ Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) {
20622064 CharTy = Context.UnsignedCharTy;
20632065 }
20642066
2065- // Warn on initializing an array of char from a u8 string literal; this
2066- // becomes ill-formed in C++2a.
2067- if (getLangOpts().CPlusPlus && !getLangOpts().CPlusPlus20 &&
2068- !getLangOpts().Char8 && Kind == StringLiteralKind::UTF8) {
2069- Diag(StringTokLocs.front(), diag::warn_cxx20_compat_utf8_string);
2067+ // Warn on u8 string literals before C++20 and C23, whose type
2068+ // was an array of char before but becomes an array of char8_t.
2069+ // In C++20, it cannot be used where a pointer to char is expected.
2070+ // In C23, it might have an unexpected value if char was signed.
2071+ if (Kind == StringLiteralKind::UTF8 &&
2072+ (getLangOpts().CPlusPlus
2073+ ? !getLangOpts().CPlusPlus20 && !getLangOpts().Char8
2074+ : !getLangOpts().C23)) {
2075+ Diag(StringTokLocs.front(), getLangOpts().CPlusPlus
2076+ ? diag::warn_cxx20_compat_utf8_string
2077+ : diag::warn_c23_compat_utf8_string);
20702078
20712079 // Create removals for all 'u8' prefixes in the string literal(s). This
2072- // ensures C++2a compatibility (but may change the program behavior when
2080+ // ensures C++20/C23 compatibility (but may change the program behavior when
20732081 // built by non-Clang compilers for which the execution character set is
20742082 // not always UTF-8).
2075- auto RemovalDiag = PDiag(diag::note_cxx20_compat_utf8_string_remove_u8 );
2083+ auto RemovalDiag = PDiag(diag::note_cxx20_c23_compat_utf8_string_remove_u8 );
20762084 SourceLocation RemovalDiagLoc;
20772085 for (const Token &Tok : StringToks) {
20782086 if (Tok.getKind() == tok::utf8_string_literal) {
0 commit comments