-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[clang] implement current direction of CWG2765 for string literal comparisons in constant evaluation #109208
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang] implement current direction of CWG2765 for string literal comparisons in constant evaluation #109208
Changes from 13 commits
8119356
2f8cfbe
def1aa7
0e8200b
7286ca9
8dbf826
077f4f8
4c869c9
f221777
fe947fa
be369dd
2b033b2
9939280
a9d6923
6cf8d3b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,8 +54,10 @@ | |
| #include "clang/Basic/DiagnosticSema.h" | ||
| #include "clang/Basic/TargetInfo.h" | ||
| #include "llvm/ADT/APFixedPoint.h" | ||
| #include "llvm/ADT/Sequence.h" | ||
| #include "llvm/ADT/SmallBitVector.h" | ||
| #include "llvm/ADT/StringExtras.h" | ||
| #include "llvm/Support/Casting.h" | ||
| #include "llvm/Support/Debug.h" | ||
| #include "llvm/Support/SaveAndRestore.h" | ||
| #include "llvm/Support/SipHash.h" | ||
|
|
@@ -2061,15 +2063,21 @@ static bool EvaluateIgnoredValue(EvalInfo &Info, const Expr *E) { | |
| return true; | ||
| } | ||
|
|
||
| /// Should this call expression be treated as a no-op? | ||
| static bool IsNoOpCall(const CallExpr *E) { | ||
| /// Should this call expression be treated as forming an opaque constant? | ||
| static bool IsOpaqueConstantCall(const CallExpr *E) { | ||
| unsigned Builtin = E->getBuiltinCallee(); | ||
| return (Builtin == Builtin::BI__builtin___CFStringMakeConstantString || | ||
| Builtin == Builtin::BI__builtin___NSStringMakeConstantString || | ||
| Builtin == Builtin::BI__builtin_ptrauth_sign_constant || | ||
| Builtin == Builtin::BI__builtin_function_start); | ||
| } | ||
|
|
||
| static bool IsOpaqueConstantCall(const LValue &LVal) { | ||
| const auto *BaseExpr = | ||
| llvm::dyn_cast_if_present<CallExpr>(LVal.Base.dyn_cast<const Expr *>()); | ||
| return BaseExpr && IsOpaqueConstantCall(BaseExpr); | ||
| } | ||
|
|
||
| static bool IsGlobalLValue(APValue::LValueBase B) { | ||
| // C++11 [expr.const]p3 An address constant expression is a prvalue core | ||
| // constant expression of pointer type that evaluates to... | ||
|
|
@@ -2115,7 +2123,7 @@ static bool IsGlobalLValue(APValue::LValueBase B) { | |
| case Expr::ObjCBoxedExprClass: | ||
| return cast<ObjCBoxedExpr>(E)->isExpressibleAsConstantInitializer(); | ||
| case Expr::CallExprClass: | ||
| return IsNoOpCall(cast<CallExpr>(E)); | ||
| return IsOpaqueConstantCall(cast<CallExpr>(E)); | ||
| // For GCC compatibility, &&label has static storage duration. | ||
| case Expr::AddrLabelExprClass: | ||
| return true; | ||
|
|
@@ -2142,11 +2150,91 @@ static const ValueDecl *GetLValueBaseDecl(const LValue &LVal) { | |
| return LVal.Base.dyn_cast<const ValueDecl*>(); | ||
| } | ||
|
|
||
| static bool IsLiteralLValue(const LValue &Value) { | ||
| if (Value.getLValueCallIndex()) | ||
| // Information about an LValueBase that is some kind of string. | ||
| struct LValueBaseString { | ||
| std::string ObjCEncodeStorage; | ||
| StringRef Bytes; | ||
| int CharWidth; | ||
| }; | ||
|
|
||
| // Gets the lvalue base of LVal as a string. | ||
| static bool GetLValueBaseAsString(const EvalInfo &Info, const LValue &LVal, | ||
| LValueBaseString &AsString) { | ||
| const auto *BaseExpr = LVal.Base.dyn_cast<const Expr *>(); | ||
| if (!BaseExpr) | ||
| return false; | ||
|
|
||
| // For ObjCEncodeExpr, we need to compute and store the string. | ||
| if (const auto *EE = dyn_cast<ObjCEncodeExpr>(BaseExpr)) { | ||
| Info.Ctx.getObjCEncodingForType(EE->getEncodedType(), | ||
| AsString.ObjCEncodeStorage); | ||
| AsString.Bytes = AsString.ObjCEncodeStorage; | ||
| AsString.CharWidth = 1; | ||
| return true; | ||
| } | ||
|
|
||
| // Otherwise, we have a StringLiteral. | ||
| const auto *Lit = dyn_cast<StringLiteral>(BaseExpr); | ||
| if (const auto *PE = dyn_cast<PredefinedExpr>(BaseExpr)) | ||
| Lit = PE->getFunctionName(); | ||
|
|
||
| if (!Lit) | ||
| return false; | ||
| const Expr *E = Value.Base.dyn_cast<const Expr*>(); | ||
| return E && !isa<MaterializeTemporaryExpr>(E); | ||
|
|
||
| AsString.Bytes = Lit->getBytes(); | ||
| AsString.CharWidth = Lit->getCharByteWidth(); | ||
| return true; | ||
| } | ||
|
|
||
| // Determine whether two string literals potentially overlap. This will be the | ||
| // case if they agree on the values of all the bytes on the overlapping region | ||
| // between them. | ||
tbaederr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // | ||
| // The overlapping region is the portion of the two string literals that must | ||
| // overlap in memory if the pointers actually point to the same address at | ||
| // runtime. For example, if LHS is "abcdef" + 3 and RHS is "cdef\0gh" + 1 then | ||
| // the overlapping region is "cdef\0", which in this case does agree, so the | ||
| // strings are potentially overlapping. Conversely, for "foobar" + 3 versus | ||
| // "bazbar" + 3, the overlapping region contains all of both strings, so they | ||
| // are not potentially overlapping, even though they agree from the given | ||
| // addresses onwards. | ||
| // | ||
| // See open core issue CWG2765 which is discussing the desired rule here. | ||
| static bool ArePotentiallyOverlappingStringLiterals(const EvalInfo &Info, | ||
| const LValue &LHS, | ||
| const LValue &RHS) { | ||
| LValueBaseString LHSString, RHSString; | ||
| if (!GetLValueBaseAsString(Info, LHS, LHSString) || | ||
| !GetLValueBaseAsString(Info, RHS, RHSString)) | ||
| return false; | ||
|
|
||
| // This is the byte offset to the location of the first character of LHS | ||
| // within RHS. We don't need to look at the characters of one string that | ||
| // would appear before the start of the other string if they were merged. | ||
| CharUnits Offset = RHS.Offset - LHS.Offset; | ||
| if (Offset.isNegative()) | ||
| LHSString.Bytes = LHSString.Bytes.drop_front(-Offset.getQuantity()); | ||
| else | ||
| RHSString.Bytes = RHSString.Bytes.drop_front(Offset.getQuantity()); | ||
|
|
||
| bool LHSIsLonger = LHSString.Bytes.size() > RHSString.Bytes.size(); | ||
| StringRef Longer = LHSIsLonger ? LHSString.Bytes : RHSString.Bytes; | ||
| StringRef Shorter = LHSIsLonger ? RHSString.Bytes : LHSString.Bytes; | ||
| int ShorterCharWidth = (LHSIsLonger ? RHSString : LHSString).CharWidth; | ||
erichkeane marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // The null terminator isn't included in the string data, so check for it | ||
| // manually. If the longer string doesn't have a null terminator where the | ||
| // shorter string ends, they aren't potentially overlapping. | ||
| for (int nullByte : llvm::seq(ShorterCharWidth)) { | ||
zygoloid marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if (Shorter.size() + nullByte >= Longer.size()) | ||
| break; | ||
| if (Longer[Shorter.size() + nullByte]) | ||
zygoloid marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return false; | ||
| } | ||
|
|
||
| // Otherwise, they're potentially overlapping if and only if the overlapping | ||
| // region is the same. | ||
| return Shorter == Longer.take_front(Shorter.size()); | ||
| } | ||
|
|
||
| static bool IsWeakLValue(const LValue &Value) { | ||
|
|
@@ -8573,7 +8661,10 @@ class LValueExprEvaluator | |
| bool VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E); | ||
| bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E); | ||
| bool VisitMemberExpr(const MemberExpr *E); | ||
| bool VisitStringLiteral(const StringLiteral *E) { return Success(E); } | ||
| bool VisitStringLiteral(const StringLiteral *E) { | ||
| uint64_t Version = Info.getASTContext().getNextStringLiteralVersion(); | ||
|
||
| return Success(APValue::LValueBase(E, 0, static_cast<unsigned>(Version))); | ||
| } | ||
| bool VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { return Success(E); } | ||
| bool VisitCXXTypeidExpr(const CXXTypeidExpr *E); | ||
| bool VisitCXXUuidofExpr(const CXXUuidofExpr *E); | ||
|
|
@@ -9639,7 +9730,7 @@ static bool isOneByteCharacterType(QualType T) { | |
|
|
||
| bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, | ||
| unsigned BuiltinOp) { | ||
| if (IsNoOpCall(E)) | ||
| if (IsOpaqueConstantCall(E)) | ||
| return Success(E); | ||
|
|
||
| switch (BuiltinOp) { | ||
|
|
@@ -13889,13 +13980,22 @@ EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E, | |
| (!RHSValue.Base && !RHSValue.Offset.isZero())) | ||
| return DiagComparison(diag::note_constexpr_pointer_constant_comparison, | ||
| !RHSValue.Base); | ||
| // It's implementation-defined whether distinct literals will have | ||
| // distinct addresses. In clang, the result of such a comparison is | ||
| // unspecified, so it is not a constant expression. However, we do know | ||
| // that the address of a literal will be non-null. | ||
| if ((IsLiteralLValue(LHSValue) || IsLiteralLValue(RHSValue)) && | ||
| LHSValue.Base && RHSValue.Base) | ||
| // C++2c [intro.object]/10: | ||
| // Two objects [...] may have the same address if [...] they are both | ||
| // potentially non-unique objects. | ||
| // C++2c [intro.object]/9: | ||
| // An object is potentially non-unique if it is a string literal object, | ||
| // the backing array of an initializer list, or a subobject thereof. | ||
| // | ||
| // This makes the comparison result unspecified, so it's not a constant | ||
| // expression. | ||
| // | ||
| // TODO: Do we need to handle the initializer list case here? | ||
| if (ArePotentiallyOverlappingStringLiterals(Info, LHSValue, RHSValue)) | ||
| return DiagComparison(diag::note_constexpr_literal_comparison); | ||
| if (IsOpaqueConstantCall(LHSValue) || IsOpaqueConstantCall(RHSValue)) | ||
| return DiagComparison(diag::note_constexpr_opaque_call_comparison, | ||
| !IsOpaqueConstantCall(LHSValue)); | ||
| // We can't tell whether weak symbols will end up pointing to the same | ||
| // object. | ||
| if (IsWeakLValue(LHSValue) || IsWeakLValue(RHSValue)) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -99,7 +99,7 @@ constexpr int f() { | |
| static_assert(f()); | ||
| #endif | ||
|
|
||
| /// Distinct literals have disctinct addresses. | ||
| /// Distinct literals have distinct addresses. | ||
| /// see https://github.com/llvm/llvm-project/issues/58754 | ||
| constexpr auto foo(const char *p) { return p; } | ||
| constexpr auto p1 = "test1"; | ||
|
|
@@ -108,22 +108,16 @@ constexpr auto p2 = "test2"; | |
| constexpr bool b1 = foo(p1) == foo(p1); | ||
| static_assert(b1); | ||
|
|
||
| constexpr bool b2 = foo(p1) == foo(p2); // ref-error {{must be initialized by a constant expression}} \ | ||
| // ref-note {{comparison of addresses of literals}} \ | ||
| // ref-note {{declared here}} | ||
| static_assert(!b2); // ref-error {{not an integral constant expression}} \ | ||
| // ref-note {{not a constant expression}} | ||
| constexpr bool b2 = foo(p1) == foo(p2); | ||
| static_assert(!b2); | ||
|
|
||
| constexpr auto name1() { return "name1"; } | ||
| constexpr auto name2() { return "name2"; } | ||
|
|
||
| constexpr auto b3 = name1() == name1(); | ||
| static_assert(b3); | ||
| constexpr auto b4 = name1() == name2(); // ref-error {{must be initialized by a constant expression}} \ | ||
| // ref-note {{has unspecified value}} \ | ||
| // ref-note {{declared here}} | ||
| static_assert(!b4); // ref-error {{not an integral constant expression}} \ | ||
| // ref-note {{not a constant expression}} | ||
| constexpr auto b3 = name1() == name1(); // ref-error {{must be initialized by a constant expression}} \ | ||
| // ref-note {{comparison of addresses of literals}} | ||
| constexpr auto b4 = name1() == name2(); | ||
| static_assert(!b4); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just from looking at these few lines, I don't understand why The bytecode interpreter simply creates global variables for string literals, so
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The C++ model in general is that each evaluation of a string literal expression can produce a distinct object, and that these objects can fully or partially overlap each other in memory when they have suitable values; that's the model that this PR is implementing.
|
||
|
|
||
| namespace UninitializedFields { | ||
| class A { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| // RUN: rm -rf %t | ||
| // RUN: mkdir -p %t | ||
| // RUN: split-file %s %t | ||
|
|
||
| // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/a.cpp \ | ||
| // RUN: -o %t/A.pcm | ||
|
|
||
| // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/b.cpp \ | ||
| // RUN: -fmodule-file=A=%t/A.pcm -o %t/B.pcm | ||
|
|
||
| // RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/c.cpp \ | ||
| // RUN: -fmodule-file=A=%t/A.pcm -o %t/C.pcm | ||
|
|
||
| // RUN: %clang_cc1 -std=c++20 -verify %t/main.cpp \ | ||
| // RUN: -fmodule-file=A=%t/A.pcm \ | ||
| // RUN: -fmodule-file=B=%t/B.pcm \ | ||
| // RUN: -fmodule-file=C=%t/C.pcm | ||
|
|
||
| // expected-no-diagnostics | ||
|
|
||
| //--- a.cpp | ||
|
|
||
| export module A; | ||
| export consteval const char *hello() { return "hello"; } | ||
| export constexpr const char *helloA0 = hello(); | ||
| export constexpr const char *helloA1 = helloA0; | ||
| export constexpr const char *helloA2 = hello(); | ||
|
|
||
| //--- b.cpp | ||
|
|
||
| export module B; | ||
| import A; | ||
| export constexpr const char *helloB1 = helloA0; | ||
| export constexpr const char *helloB2 = hello(); | ||
|
|
||
| //--- c.cpp | ||
|
|
||
| export module C; | ||
| import A; | ||
| export constexpr const char *helloC1 = helloA1; | ||
| export constexpr const char *helloC2 = hello(); | ||
|
|
||
| //--- main.cpp | ||
|
|
||
| import A; | ||
| import B; | ||
| import C; | ||
|
|
||
| // These are valid: they refer to the same evaluation of the same constant. | ||
| static_assert(helloA0 == helloA1); | ||
| static_assert(helloA0 == helloB1); | ||
| static_assert(helloA0 == helloC1); | ||
|
|
||
| // These refer to distinct evaluations, and so may or may not be equal. | ||
| static_assert(helloA1 == helloA2); // expected-error {{}} expected-note {{unspecified value}} | ||
| static_assert(helloA1 == helloB2); // expected-error {{}} expected-note {{unspecified value}} | ||
| static_assert(helloA1 == helloC2); // expected-error {{}} expected-note {{unspecified value}} | ||
| static_assert(helloA2 == helloB2); // expected-error {{}} expected-note {{unspecified value}} | ||
| static_assert(helloA2 == helloC2); // expected-error {{}} expected-note {{unspecified value}} | ||
| static_assert(helloB2 == helloC2); // expected-error {{}} expected-note {{unspecified value}} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we get test coverage for these cases?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added.