Skip to content

Commit 22a938d

Browse files
cor3ntinLukacma
authored andcommitted
[Clang] Do not warn on UTF-16 -> UTF-32 conversions. (llvm#163927)
UTF-16 to UTF-16 conversions seems widespread, and lone surrogate have a distinct representation in UTF-32. Lets not warn on this case to make the warning easier to adopt. This follows SG-16 guideline https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2025/p3695r2.html#changes-since-r1 Fixes llvm#163719
1 parent 8c3a282 commit 22a938d

File tree

2 files changed

+12
-5
lines changed

2 files changed

+12
-5
lines changed

clang/lib/Sema/SemaChecking.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12309,13 +12309,20 @@ static void DiagnoseMixedUnicodeImplicitConversion(Sema &S, const Type *Source,
1230912309
SourceLocation CC) {
1231012310
assert(Source->isUnicodeCharacterType() && Target->isUnicodeCharacterType() &&
1231112311
Source != Target);
12312+
12313+
// Lone surrogates have a distinct representation in UTF-32.
12314+
// Converting between UTF-16 and UTF-32 codepoints seems very widespread,
12315+
// so don't warn on such conversion.
12316+
if (Source->isChar16Type() && Target->isChar32Type())
12317+
return;
12318+
1231212319
Expr::EvalResult Result;
1231312320
if (E->EvaluateAsInt(Result, S.getASTContext(), Expr::SE_AllowSideEffects,
1231412321
S.isConstantEvaluatedContext())) {
1231512322
llvm::APSInt Value(32);
1231612323
Value = Result.Val.getInt();
1231712324
bool IsASCII = Value <= 0x7F;
12318-
bool IsBMP = Value <= 0xD7FF || (Value >= 0xE000 && Value <= 0xFFFF);
12325+
bool IsBMP = Value <= 0xDFFF || (Value >= 0xE000 && Value <= 0xFFFF);
1231912326
bool ConversionPreservesSemantics =
1232012327
IsASCII || (!Source->isChar8Type() && !Target->isChar8Type() && IsBMP);
1232112328

clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
1414
c16(u32); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' may lose precision and change the meaning of the represented code unit}}
1515

1616
c32(u8); // expected-warning {{implicit conversion from 'char8_t' to 'char32_t' may change the meaning of the represented code unit}}
17-
c32(u16); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' may change the meaning of the represented code unit}}
17+
c32(u16);
1818
c32(u32);
1919

2020

@@ -30,7 +30,7 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
3030
c16(char32_t(0x7f));
3131
c16(char32_t(0x80));
3232
c16(char32_t(0xD7FF));
33-
c16(char32_t(0xD800)); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the code unit '<0xD800>'}}
33+
c16(char32_t(0xD800));
3434
c16(char32_t(0xE000));
3535
c16(char32_t(U'🐉')); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the code point '🐉'}}
3636

@@ -44,8 +44,8 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
4444
c32(char16_t(0x80));
4545

4646
c32(char16_t(0xD7FF));
47-
c32(char16_t(0xD800)); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' changes the meaning of the code unit '<0xD800>'}}
48-
c32(char16_t(0xDFFF)); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' changes the meaning of the code unit '<0xDFFF>'}}
47+
c32(char16_t(0xD800));
48+
c32(char16_t(0xDFFF));
4949
c32(char16_t(0xE000));
5050
c32(char16_t(u''));
5151

0 commit comments

Comments
 (0)