-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[clang] implement current direction of CWG2765 for string literal comparisons in constant evaluation #109208
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang] implement current direction of CWG2765 for string literal comparisons in constant evaluation #109208
Changes from 3 commits
8119356
2f8cfbe
def1aa7
0e8200b
7286ca9
8dbf826
077f4f8
4c869c9
f221777
fe947fa
be369dd
2b033b2
9939280
a9d6923
6cf8d3b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -324,6 +324,13 @@ class ASTContext : public RefCountedBase<ASTContext> { | |
| /// This is lazily created. This is intentionally not serialized. | ||
| mutable llvm::StringMap<StringLiteral *> StringLiteralCache; | ||
|
|
||
| /// The next string literal "version" to allocate during constant evaluation. | ||
| /// This is used to distinguish between repeated evaluations of the same | ||
| /// string literal. | ||
| /// | ||
| /// TODO: Ensure version numbers don't collide when deserialized. | ||
|
||
| unsigned NextStringLiteralVersion = 0; | ||
|
|
||
| /// MD5 hash of CUID. It is calculated when first used and cached by this | ||
| /// data member. | ||
| mutable std::string CUIDHash; | ||
|
|
@@ -3278,6 +3285,10 @@ class ASTContext : public RefCountedBase<ASTContext> { | |
| /// PredefinedExpr to cache evaluated results. | ||
| StringLiteral *getPredefinedStringLiteralFromCache(StringRef Key) const; | ||
|
|
||
| /// Return the next version number to be used for a string literal evaluated | ||
| /// as part of constant evaluation. | ||
| unsigned getNextStringLiteralVersion() { return NextStringLiteralVersion++; } | ||
|
|
||
| /// Return a declaration for the global GUID object representing the given | ||
| /// GUID value. | ||
| MSGuidDecl *getMSGuidDecl(MSGuidDeclParts Parts) const; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -96,6 +96,8 @@ def note_constexpr_pointer_constant_comparison : Note< | |
| "at runtime">; | ||
| def note_constexpr_literal_comparison : Note< | ||
| "comparison of addresses of literals has unspecified value">; | ||
| def note_constexpr_opaque_call_comparison : Note< | ||
| "comparison against opaque constant has unspecified value">; | ||
|
||
| def note_constexpr_pointer_weak_comparison : Note< | ||
| "comparison against address of weak declaration '%0' can only be performed " | ||
| "at runtime">; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,8 +54,10 @@ | |
| #include "clang/Basic/DiagnosticSema.h" | ||
| #include "clang/Basic/TargetInfo.h" | ||
| #include "llvm/ADT/APFixedPoint.h" | ||
| #include "llvm/ADT/Sequence.h" | ||
| #include "llvm/ADT/SmallBitVector.h" | ||
| #include "llvm/ADT/StringExtras.h" | ||
| #include "llvm/Support/Casting.h" | ||
| #include "llvm/Support/Debug.h" | ||
| #include "llvm/Support/SaveAndRestore.h" | ||
| #include "llvm/Support/SipHash.h" | ||
|
|
@@ -2061,15 +2063,21 @@ static bool EvaluateIgnoredValue(EvalInfo &Info, const Expr *E) { | |
| return true; | ||
| } | ||
|
|
||
| /// Should this call expression be treated as a no-op? | ||
| static bool IsNoOpCall(const CallExpr *E) { | ||
| /// Should this call expression be treated as forming an opaque constant? | ||
| static bool IsOpaqueConstantCall(const CallExpr *E) { | ||
| unsigned Builtin = E->getBuiltinCallee(); | ||
| return (Builtin == Builtin::BI__builtin___CFStringMakeConstantString || | ||
| Builtin == Builtin::BI__builtin___NSStringMakeConstantString || | ||
| Builtin == Builtin::BI__builtin_ptrauth_sign_constant || | ||
|
Comment on lines
2069
to
2071
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we get test coverage for these cases?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added. |
||
| Builtin == Builtin::BI__builtin_function_start); | ||
| } | ||
|
|
||
| static bool IsOpaqueConstantCall(const LValue &LVal) { | ||
| auto *BaseExpr = | ||
| llvm::dyn_cast_or_null<CallExpr>(LVal.Base.dyn_cast<const Expr *>()); | ||
zygoloid marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return BaseExpr && IsOpaqueConstantCall(BaseExpr); | ||
| } | ||
|
|
||
| static bool IsGlobalLValue(APValue::LValueBase B) { | ||
| // C++11 [expr.const]p3 An address constant expression is a prvalue core | ||
| // constant expression of pointer type that evaluates to... | ||
|
|
@@ -2115,7 +2123,7 @@ static bool IsGlobalLValue(APValue::LValueBase B) { | |
| case Expr::ObjCBoxedExprClass: | ||
| return cast<ObjCBoxedExpr>(E)->isExpressibleAsConstantInitializer(); | ||
| case Expr::CallExprClass: | ||
| return IsNoOpCall(cast<CallExpr>(E)); | ||
| return IsOpaqueConstantCall(cast<CallExpr>(E)); | ||
| // For GCC compatibility, &&label has static storage duration. | ||
| case Expr::AddrLabelExprClass: | ||
| return true; | ||
|
|
@@ -2142,11 +2150,81 @@ static const ValueDecl *GetLValueBaseDecl(const LValue &LVal) { | |
| return LVal.Base.dyn_cast<const ValueDecl*>(); | ||
| } | ||
|
|
||
| static bool IsLiteralLValue(const LValue &Value) { | ||
| if (Value.getLValueCallIndex()) | ||
| // Information about an LValueBase that is some kind of string. | ||
| struct LValueBaseString { | ||
| std::string ObjCEncodeStorage; | ||
| StringRef Bytes; | ||
| int CharWidth; | ||
| }; | ||
|
|
||
| // Gets the lvalue base of LVal as a string. | ||
| static bool GetLValueBaseAsString(const EvalInfo &Info, const LValue &LVal, | ||
| LValueBaseString &AsString) { | ||
| const auto *BaseExpr = LVal.Base.dyn_cast<const Expr *>(); | ||
| if (!BaseExpr) | ||
| return false; | ||
|
|
||
| // For ObjCEncodeExpr, we need to compute and store the string. | ||
| if (const auto *EE = dyn_cast<ObjCEncodeExpr>(BaseExpr)) { | ||
| Info.Ctx.getObjCEncodingForType(EE->getEncodedType(), | ||
| AsString.ObjCEncodeStorage); | ||
| AsString.Bytes = AsString.ObjCEncodeStorage; | ||
| AsString.CharWidth = 1; | ||
| return true; | ||
| } | ||
|
|
||
| // Otherwise, we have a StringLiteral. | ||
| const auto *Lit = dyn_cast<StringLiteral>(BaseExpr); | ||
| if (const auto *PE = dyn_cast<PredefinedExpr>(BaseExpr)) | ||
| Lit = PE->getFunctionName(); | ||
|
|
||
| if (!Lit) | ||
| return false; | ||
|
|
||
| AsString.Bytes = Lit->getBytes(); | ||
| AsString.CharWidth = Lit->getCharByteWidth(); | ||
| return true; | ||
| } | ||
|
|
||
| // Determine whether two string literals potentially overlap. This will be the | ||
| // case if they agree on the values of all the bytes on the overlapping region | ||
| // between them. | ||
tbaederr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| static bool ArePotentiallyOverlappingStringLiterals(const EvalInfo &Info, | ||
| const LValue &LHS, | ||
| const LValue &RHS) { | ||
| LValueBaseString LHSString, RHSString; | ||
| if (!GetLValueBaseAsString(Info, LHS, LHSString) || | ||
| !GetLValueBaseAsString(Info, RHS, RHSString)) | ||
| return false; | ||
| const Expr *E = Value.Base.dyn_cast<const Expr*>(); | ||
| return E && !isa<MaterializeTemporaryExpr>(E); | ||
|
|
||
| // This is the byte offset to the location of the first character of LHS | ||
| // within RHS. We don't need to look at the characters of one string that | ||
| // would appear before the start of the other string if they were merged. | ||
| CharUnits Offset = RHS.Offset - LHS.Offset; | ||
| if (Offset.isPositive()) { | ||
| RHSString.Bytes = RHSString.Bytes.drop_front(Offset.getQuantity()); | ||
| } else if (Offset.isNegative()) { | ||
erichkeane marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| LHSString.Bytes = LHSString.Bytes.drop_front(-Offset.getQuantity()); | ||
| } | ||
zygoloid marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| bool LHSIsLonger = LHSString.Bytes.size() > RHSString.Bytes.size(); | ||
| StringRef Longer = LHSIsLonger ? LHSString.Bytes : RHSString.Bytes; | ||
| StringRef Shorter = LHSIsLonger ? RHSString.Bytes : LHSString.Bytes; | ||
| int ShorterCharWidth = (LHSIsLonger ? RHSString : LHSString).CharWidth; | ||
erichkeane marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // The null terminator isn't included in the string data, so check for it | ||
| // manually. If the longer string doesn't have a null terminator where the | ||
| // shorter string ends, they aren't potentially overlapping. | ||
| for (int nullByte : llvm::seq(ShorterCharWidth)) { | ||
zygoloid marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if (Shorter.size() + nullByte >= Longer.size()) | ||
| break; | ||
| if (Longer[Shorter.size() + nullByte]) | ||
zygoloid marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return false; | ||
| } | ||
|
|
||
| // Otherwise, they're potentially overlapping if and only if the overlapping | ||
| // region is the same. | ||
| return Shorter == Longer.take_front(Shorter.size()); | ||
| } | ||
|
|
||
| static bool IsWeakLValue(const LValue &Value) { | ||
|
|
@@ -8573,7 +8651,10 @@ class LValueExprEvaluator | |
| bool VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E); | ||
| bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E); | ||
| bool VisitMemberExpr(const MemberExpr *E); | ||
| bool VisitStringLiteral(const StringLiteral *E) { return Success(E); } | ||
| bool VisitStringLiteral(const StringLiteral *E) { | ||
| uint64_t Version = Info.getASTContext().getNextStringLiteralVersion(); | ||
|
||
| return Success(APValue::LValueBase(E, 0, static_cast<unsigned>(Version))); | ||
| } | ||
| bool VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { return Success(E); } | ||
| bool VisitCXXTypeidExpr(const CXXTypeidExpr *E); | ||
| bool VisitCXXUuidofExpr(const CXXUuidofExpr *E); | ||
|
|
@@ -9639,7 +9720,7 @@ static bool isOneByteCharacterType(QualType T) { | |
|
|
||
| bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, | ||
| unsigned BuiltinOp) { | ||
| if (IsNoOpCall(E)) | ||
| if (IsOpaqueConstantCall(E)) | ||
| return Success(E); | ||
|
|
||
| switch (BuiltinOp) { | ||
|
|
@@ -13889,13 +13970,21 @@ EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E, | |
| (!RHSValue.Base && !RHSValue.Offset.isZero())) | ||
| return DiagComparison(diag::note_constexpr_pointer_constant_comparison, | ||
| !RHSValue.Base); | ||
| // It's implementation-defined whether distinct literals will have | ||
| // distinct addresses. In clang, the result of such a comparison is | ||
| // unspecified, so it is not a constant expression. However, we do know | ||
| // that the address of a literal will be non-null. | ||
| if ((IsLiteralLValue(LHSValue) || IsLiteralLValue(RHSValue)) && | ||
| LHSValue.Base && RHSValue.Base) | ||
| // C++2c [intro.object]/10: | ||
| // Two objects [...] may have the same address if [...] they are both | ||
| // potentially non-unique objects. | ||
| // C++2c [intro.object]/9: | ||
| // An object is potentially non-unique if it is a string literal object, | ||
| // the backing array of an initializer list, or a subobject thereof. | ||
| // | ||
| // This makes the comparison result unspecified, so it's not a constant | ||
| // expression. | ||
| // | ||
| // TODO: Do we need to handle the initializer list case here? | ||
| if (ArePotentiallyOverlappingStringLiterals(Info, LHSValue, RHSValue)) | ||
| return DiagComparison(diag::note_constexpr_literal_comparison); | ||
| if (IsOpaqueConstantCall(LHSValue) || IsOpaqueConstantCall(RHSValue)) | ||
| return DiagComparison(diag::note_constexpr_opaque_call_comparison); | ||
| // We can't tell whether weak symbols will end up pointing to the same | ||
| // object. | ||
| if (IsWeakLValue(LHSValue) || IsWeakLValue(RHSValue)) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -99,7 +99,7 @@ constexpr int f() { | |
| static_assert(f()); | ||
| #endif | ||
|
|
||
| /// Distinct literals have disctinct addresses. | ||
| /// Distinct literals have distinct addresses. | ||
| /// see https://github.com/llvm/llvm-project/issues/58754 | ||
| constexpr auto foo(const char *p) { return p; } | ||
| constexpr auto p1 = "test1"; | ||
|
|
@@ -108,22 +108,16 @@ constexpr auto p2 = "test2"; | |
| constexpr bool b1 = foo(p1) == foo(p1); | ||
| static_assert(b1); | ||
|
|
||
| constexpr bool b2 = foo(p1) == foo(p2); // ref-error {{must be initialized by a constant expression}} \ | ||
| // ref-note {{comparison of addresses of literals}} \ | ||
| // ref-note {{declared here}} | ||
| static_assert(!b2); // ref-error {{not an integral constant expression}} \ | ||
| // ref-note {{not a constant expression}} | ||
| constexpr bool b2 = foo(p1) == foo(p2); | ||
| static_assert(!b2); | ||
|
|
||
| constexpr auto name1() { return "name1"; } | ||
| constexpr auto name2() { return "name2"; } | ||
|
|
||
| constexpr auto b3 = name1() == name1(); | ||
| static_assert(b3); | ||
| constexpr auto b4 = name1() == name2(); // ref-error {{must be initialized by a constant expression}} \ | ||
| // ref-note {{has unspecified value}} \ | ||
| // ref-note {{declared here}} | ||
| static_assert(!b4); // ref-error {{not an integral constant expression}} \ | ||
| // ref-note {{not a constant expression}} | ||
| constexpr auto b3 = name1() == name1(); // ref-error {{must be initialized by a constant expression}} \ | ||
| // ref-note {{comparison of addresses of literals}} | ||
| constexpr auto b4 = name1() == name2(); | ||
| static_assert(!b4); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just from looking at these few lines, I don't understand why The bytecode interpreter simply creates global variables for string literals, so
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The C++ model in general is that each evaluation of a string literal expression can produce a distinct object, and that these objects can fully or partially overlap each other in memory when they have suitable values; that's the model that this PR is implementing.
|
||
|
|
||
| namespace UninitializedFields { | ||
| class A { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1306,3 +1306,18 @@ constexpr int field(int a) { | |
| static_assert(field(3), ""); // expected-error {{constant expression}} \ | ||
| // expected-note {{in call to 'field(3)'}} | ||
| } | ||
|
|
||
| namespace literal_comparison { | ||
|
|
||
| constexpr bool different_in_loop(bool b = false) { | ||
| if (b) return false; | ||
|
Comment on lines
+1312
to
+1313
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the purpose of this 'b' parameter?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to suppress the "constexpr function never produces a constant expression" diagnostic. |
||
|
|
||
| const char *p[2] = {}; | ||
| for (const char *&r : p) | ||
| r = "hello"; | ||
| return p[0] == p[1]; // expected-note {{addresses of literals}} | ||
| } | ||
| constexpr bool check = different_in_loop(); | ||
| // expected-error@-1 {{}} expected-note@-1 {{in call}} | ||
|
|
||
| } | ||
zygoloid marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we add a test case involving modules to make sure that behavior is reasonable?