Skip to content

Commit 8e39dc9

Browse files
committed
Don't warn if one side of the comparison can be evaluated to a code point representable in both types
1 parent 5e092e7 commit 8e39dc9

File tree

5 files changed

+68
-60
lines changed

5 files changed

+68
-60
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4367,12 +4367,12 @@ def warn_impcast_unicode_precision
43674367
InGroup<CharacterConversion>;
43684368
def warn_impcast_unicode_char_type_constant
43694369
: Warning<"implicit conversion from %0 to %1 changes the meaning of the "
4370-
"%select{code unit|codepoint}2 '%3'">,
4370+
"%select{code unit|code point}2 '%3'">,
43714371
InGroup<CharacterConversion>;
43724372

43734373
def warn_comparison_unicode_mixed_types
43744374
: Warning<"comparing values of different Unicode code unit types %0 and %1 "
4375-
"may compare different codepoints">,
4375+
"may compare different code points">,
43764376
InGroup<CharacterConversion>;
43774377

43784378
def warn_comparison_unicode_mixed_types_constant
@@ -7742,7 +7742,7 @@ def warn_comparison_of_mixed_enum_types_switch : Warning<
77427742
"%diff{ ($ and $)|}0,1">,
77437743
InGroup<EnumCompareSwitch>;
77447744

7745-
def warn_arith_conv_mixed__unicode_types
7745+
def warn_arith_conv_mixed_unicode_types
77467746
: Warning<"%sub{select_arith_conv_kind}0 "
77477747
"different Unicode character types %1 and %2">,
77487748
InGroup<CharacterConversion>;

clang/lib/AST/ASTDiagnostic.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2203,9 +2203,10 @@ std::string clang::FormatUTFCodeUnitAsCodepoint(unsigned Value, QualType T) {
22032203
assert(Value <= 0xFFFF && "not a valid UTF-16 code unit");
22042204
return llvm::IsSingleCodeUnitUTF16Codepoint(Value);
22052205
}
2206+
assert(T->isChar32Type());
22062207
return llvm::IsSingleCodeUnitUTF32Codepoint(Value);
22072208
};
2208-
llvm::SmallVector<char, 4> Str;
2209+
llvm::SmallVector<char, 16> Str;
22092210
if (!IsSingleCodeUnitCP(Value, T)) {
22102211
llvm::raw_svector_ostream OS(Str);
22112212
OS << "<" << llvm::format_hex(Value, 1, /*Upper=*/true) << ">";

clang/lib/Sema/SemaChecking.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11834,6 +11834,7 @@ static void DiagnoseMixedUnicodeImplicitConversion(Sema &S, const Type *Source,
1183411834
return llvm::IsSingleCodeUnitUTF8Codepoint(Value.getExtValue());
1183511835
if (T->isChar16Type())
1183611836
return llvm::IsSingleCodeUnitUTF16Codepoint(Value.getExtValue());
11837+
assert(T->isChar32Type());
1183711838
return llvm::IsSingleCodeUnitUTF32Codepoint(Value.getExtValue());
1183811839
};
1183911840

clang/lib/Sema/SemaExpr.cpp

Lines changed: 42 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1582,50 +1582,60 @@ static void CheckUnicodeArithmeticConversions(Sema &SemaRef, Expr *LHS,
15821582
if (SemaRef.getASTContext().hasSameType(LHSType, RHSType))
15831583
return;
15841584

1585-
Expr::EvalResult LHSRes, RHSRes;
1586-
bool Success = LHS->EvaluateAsInt(LHSRes, SemaRef.getASTContext(),
1587-
Expr::SE_AllowSideEffects,
1588-
SemaRef.isConstantEvaluatedContext());
1589-
if (Success)
1590-
Success = RHS->EvaluateAsInt(RHSRes, SemaRef.getASTContext(),
1591-
Expr::SE_AllowSideEffects,
1592-
SemaRef.isConstantEvaluatedContext());
1593-
if (Success) {
1594-
llvm::APSInt LHSValue(32);
1595-
LHSValue = LHSRes.Val.getInt();
1596-
llvm::APSInt RHSValue(32);
1597-
RHSValue = RHSRes.Val.getInt();
1598-
1599-
auto IsSingleCodeUnitCP = [](const QualType &T,
1600-
const llvm::APSInt &Value) {
1601-
if (T->isChar8Type())
1602-
return llvm::IsSingleCodeUnitUTF8Codepoint(Value.getExtValue());
1603-
if (T->isChar16Type())
1604-
return llvm::IsSingleCodeUnitUTF16Codepoint(Value.getExtValue());
1605-
return llvm::IsSingleCodeUnitUTF32Codepoint(Value.getExtValue());
1606-
};
1585+
auto IsSingleCodeUnitCP = [](const QualType &T, const llvm::APSInt &Value) {
1586+
if (T->isChar8Type())
1587+
return llvm::IsSingleCodeUnitUTF8Codepoint(Value.getExtValue());
1588+
if (T->isChar16Type())
1589+
return llvm::IsSingleCodeUnitUTF16Codepoint(Value.getExtValue());
1590+
assert(T->isChar32Type());
1591+
return llvm::IsSingleCodeUnitUTF32Codepoint(Value.getExtValue());
1592+
};
16071593

1608-
bool LHSSafe = IsSingleCodeUnitCP(LHSType, LHSValue);
1609-
bool RHSSafe = IsSingleCodeUnitCP(RHSType, RHSValue);
1610-
if (LHSSafe && RHSSafe)
1594+
Expr::EvalResult LHSRes, RHSRes;
1595+
bool LHSSuccess = LHS->EvaluateAsInt(LHSRes, SemaRef.getASTContext(),
1596+
Expr::SE_AllowSideEffects,
1597+
SemaRef.isConstantEvaluatedContext());
1598+
bool RHSuccess = RHS->EvaluateAsInt(RHSRes, SemaRef.getASTContext(),
1599+
Expr::SE_AllowSideEffects,
1600+
SemaRef.isConstantEvaluatedContext());
1601+
1602+
// Don't warn if the one known value is a representable
1603+
// in the type of both expressions.
1604+
if (LHSSuccess != RHSuccess) {
1605+
Expr::EvalResult &Res = LHSSuccess ? LHSRes : RHSRes;
1606+
if (IsSingleCodeUnitCP(LHSType, Res.Val.getInt()) &&
1607+
IsSingleCodeUnitCP(RHSType, Res.Val.getInt()))
16111608
return;
1609+
}
16121610

1613-
SemaRef.Diag(Loc, diag::warn_comparison_unicode_mixed_types_constant)
1611+
if (!LHSSuccess || !RHSuccess) {
1612+
SemaRef.Diag(Loc, diag::warn_comparison_unicode_mixed_types)
16141613
<< LHS->getSourceRange() << RHS->getSourceRange() << LHSType
1615-
<< RHSType
1616-
<< FormatUTFCodeUnitAsCodepoint(LHSValue.getExtValue(), LHSType)
1617-
<< FormatUTFCodeUnitAsCodepoint(RHSValue.getExtValue(), RHSType);
1614+
<< RHSType;
16181615
return;
16191616
}
1620-
SemaRef.Diag(Loc, diag::warn_comparison_unicode_mixed_types)
1621-
<< LHS->getSourceRange() << RHS->getSourceRange() << LHSType << RHSType;
1617+
1618+
llvm::APSInt LHSValue(32);
1619+
LHSValue = LHSRes.Val.getInt();
1620+
llvm::APSInt RHSValue(32);
1621+
RHSValue = RHSRes.Val.getInt();
1622+
1623+
bool LHSSafe = IsSingleCodeUnitCP(LHSType, LHSValue);
1624+
bool RHSSafe = IsSingleCodeUnitCP(RHSType, RHSValue);
1625+
if (LHSSafe && RHSSafe)
1626+
return;
1627+
1628+
SemaRef.Diag(Loc, diag::warn_comparison_unicode_mixed_types_constant)
1629+
<< LHS->getSourceRange() << RHS->getSourceRange() << LHSType << RHSType
1630+
<< FormatUTFCodeUnitAsCodepoint(LHSValue.getExtValue(), LHSType)
1631+
<< FormatUTFCodeUnitAsCodepoint(RHSValue.getExtValue(), RHSType);
16221632
return;
16231633
}
16241634

16251635
if (SemaRef.getASTContext().hasSameType(LHSType, RHSType))
16261636
return;
16271637

1628-
SemaRef.Diag(Loc, diag::warn_arith_conv_mixed__unicode_types)
1638+
SemaRef.Diag(Loc, diag::warn_arith_conv_mixed_unicode_types)
16291639
<< LHS->getSourceRange() << RHS->getSourceRange() << ACK << LHSType
16301640
<< RHSType;
16311641
return;

clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,20 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
1919

2020

2121
c8(char32_t(0x7f));
22-
c8(char32_t(0x80)); // expected-warning {{implicit conversion from 'char32_t' to 'char8_t' changes the meaning of the codepoint '<U+0080>'}}
22+
c8(char32_t(0x80)); // expected-warning {{implicit conversion from 'char32_t' to 'char8_t' changes the meaning of the code point '<U+0080>'}}
2323

2424
c8(char16_t(0x7f));
25-
c8(char16_t(0x80)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the codepoint '<U+0080>'}}
25+
c8(char16_t(0x80)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the code point '<U+0080>'}}
2626
c8(char16_t(0xD800)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the code unit '<0xD800>'}}
27-
c8(char16_t(0xE000)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the codepoint '<U+E000>'}}
27+
c8(char16_t(0xE000)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the code point '<U+E000>'}}
2828

2929

3030
c16(char32_t(0x7f));
3131
c16(char32_t(0x80));
3232
c16(char32_t(0xD7FF));
3333
c16(char32_t(0xD800)); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the code unit '<0xD800>'}}
3434
c16(char32_t(0xE000));
35-
c16(char32_t(U'🐉')); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the codepoint '🐉'}}
35+
c16(char32_t(U'🐉')); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the code point '🐉'}}
3636

3737

3838
c32(char8_t(0x7f));
@@ -49,7 +49,7 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
4949
c32(char16_t(0xE000));
5050
c32(char16_t(u''));
5151

52-
(void)static_cast<char32_t>(char8_t(0x80)); // sanity check: no explicit conversion;
52+
(void)static_cast<char32_t>(char8_t(0x80)); //no warnings for explicit conversions.
5353

5454
using Char8 = char8_t;
5555
Char8 c81 = u16; // expected-warning {{implicit conversion from 'char16_t' to 'Char8' (aka 'char8_t') may lose precision and change the meaning of the represented code unit}}
@@ -63,31 +63,27 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
6363

6464
void test_comp(char8_t u8, char16_t u16, char32_t u32) {
6565
(void)(u8 == u8' ');
66-
(void)(u8 == u' '); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char16_t' may compare different codepoints}}
67-
(void)(u8 == U' '); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char32_t' may compare different codepoints}}
66+
(void)(u8 == u' ');
67+
(void)(u8 == U' ');
6868

69-
(void)(u16 == u8' '); // expected-warning{{comparing values of different Unicode code unit types 'char16_t' and 'char8_t' may compare different codepoints}}
70-
(void)(u16 == u' ');
71-
(void)(u16 == U' '); // expected-warning{{comparing values of different Unicode code unit types 'char16_t' and 'char32_t' may compare different codepoints}}
69+
(void)(u16 == u8' ');
70+
(void)(u16 == U' ');
7271

73-
(void)(u32 == u8' '); // expected-warning{{comparing values of different Unicode code unit types 'char32_t' and 'char8_t' may compare different codepoints}}
74-
(void)(u32 == u' '); // expected-warning{{comparing values of different Unicode code unit types 'char32_t' and 'char16_t' may compare different codepoints}}
72+
(void)(u32 == u8' ');
73+
(void)(u32 == u' ');
7574
(void)(u32 == U' ');
7675

76+
(void)(u8 == u'\u00FF'); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char16_t' may compare different code points}}
77+
(void)(u8 == U'\u00FF'); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char32_t' may compare different code points}}
7778

78-
(void)(u8' ' == u' ');
79-
(void)(u8' ' == u' ');
80-
81-
82-
(void)(u8 == U' '); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char32_t' may compare different codepoints}}
83-
(void)(u16 == u8' '); // expected-warning{{comparing values of different Unicode code unit types 'char16_t' and 'char8_t' may compare different codepoints}}
84-
(void)(u16 == u' ');
85-
(void)(u16 == U' '); // expected-warning{{comparing values of different Unicode code unit types 'char16_t' and 'char32_t' may compare different codepoints}}
86-
87-
(void)(u32 == u8' '); // expected-warning{{comparing values of different Unicode code unit types 'char32_t' and 'char8_t' may compare different codepoints}}
88-
(void)(u32 == u' '); // expected-warning{{comparing values of different Unicode code unit types 'char32_t' and 'char16_t' may compare different codepoints}}
89-
(void)(u32 == U' ');
79+
(void)(u16 == u8'\xFF'); // expected-warning{{comparing values of different Unicode code unit types 'char16_t' and 'char8_t' may compare different code points}}
80+
(void)(u16 == u'\u00FF');
81+
(void)(u16 == U'\u00FF');
82+
(void)(u16 == U'\xD800'); // expected-warning{{comparing values of different Unicode code unit types 'char16_t' and 'char32_t' may compare different code points}}
9083

84+
(void)(u32 == u8'\xFF'); // expected-warning{{comparing values of different Unicode code unit types 'char32_t' and 'char8_t' may compare different code points}}
85+
(void)(u32 == u'\u00FF');
86+
(void)(u32 == u'\xD800'); // expected-warning{{comparing values of different Unicode code unit types 'char32_t' and 'char16_t' may compare different code points}}
9187

9288
(void)(char8_t(0x7f) == char8_t(0x7f));
9389
(void)(char8_t(0x7f) == char16_t(0x7f));

0 commit comments

Comments
 (0)