5454#include "clang/Basic/DiagnosticSema.h"
5555#include "clang/Basic/TargetInfo.h"
5656#include "llvm/ADT/APFixedPoint.h"
57+ #include "llvm/ADT/Sequence.h"
5758#include "llvm/ADT/SmallBitVector.h"
5859#include "llvm/ADT/StringExtras.h"
60+ #include "llvm/Support/Casting.h"
5961#include "llvm/Support/Debug.h"
6062#include "llvm/Support/SaveAndRestore.h"
6163#include "llvm/Support/SipHash.h"
@@ -2061,15 +2063,21 @@ static bool EvaluateIgnoredValue(EvalInfo &Info, const Expr *E) {
20612063 return true;
20622064}
20632065
2064- /// Should this call expression be treated as a no-op ?
2065- static bool IsNoOpCall (const CallExpr *E) {
2066+ /// Should this call expression be treated as forming an opaque constant ?
2067+ static bool IsOpaqueConstantCall (const CallExpr *E) {
20662068 unsigned Builtin = E->getBuiltinCallee();
20672069 return (Builtin == Builtin::BI__builtin___CFStringMakeConstantString ||
20682070 Builtin == Builtin::BI__builtin___NSStringMakeConstantString ||
20692071 Builtin == Builtin::BI__builtin_ptrauth_sign_constant ||
20702072 Builtin == Builtin::BI__builtin_function_start);
20712073}
20722074
2075+ static bool IsOpaqueConstantCall(const LValue &LVal) {
2076+ const auto *BaseExpr =
2077+ llvm::dyn_cast_if_present<CallExpr>(LVal.Base.dyn_cast<const Expr *>());
2078+ return BaseExpr && IsOpaqueConstantCall(BaseExpr);
2079+ }
2080+
20732081static bool IsGlobalLValue(APValue::LValueBase B) {
20742082 // C++11 [expr.const]p3 An address constant expression is a prvalue core
20752083 // constant expression of pointer type that evaluates to...
@@ -2115,7 +2123,7 @@ static bool IsGlobalLValue(APValue::LValueBase B) {
21152123 case Expr::ObjCBoxedExprClass:
21162124 return cast<ObjCBoxedExpr>(E)->isExpressibleAsConstantInitializer();
21172125 case Expr::CallExprClass:
2118- return IsNoOpCall (cast<CallExpr>(E));
2126+ return IsOpaqueConstantCall (cast<CallExpr>(E));
21192127 // For GCC compatibility, &&label has static storage duration.
21202128 case Expr::AddrLabelExprClass:
21212129 return true;
@@ -2142,11 +2150,91 @@ static const ValueDecl *GetLValueBaseDecl(const LValue &LVal) {
21422150 return LVal.Base.dyn_cast<const ValueDecl*>();
21432151}
21442152
2145- static bool IsLiteralLValue(const LValue &Value) {
2146- if (Value.getLValueCallIndex())
2153+ // Information about an LValueBase that is some kind of string.
2154+ struct LValueBaseString {
2155+ std::string ObjCEncodeStorage;
2156+ StringRef Bytes;
2157+ int CharWidth;
2158+ };
2159+
2160+ // Gets the lvalue base of LVal as a string.
2161+ static bool GetLValueBaseAsString(const EvalInfo &Info, const LValue &LVal,
2162+ LValueBaseString &AsString) {
2163+ const auto *BaseExpr = LVal.Base.dyn_cast<const Expr *>();
2164+ if (!BaseExpr)
2165+ return false;
2166+
2167+ // For ObjCEncodeExpr, we need to compute and store the string.
2168+ if (const auto *EE = dyn_cast<ObjCEncodeExpr>(BaseExpr)) {
2169+ Info.Ctx.getObjCEncodingForType(EE->getEncodedType(),
2170+ AsString.ObjCEncodeStorage);
2171+ AsString.Bytes = AsString.ObjCEncodeStorage;
2172+ AsString.CharWidth = 1;
2173+ return true;
2174+ }
2175+
2176+ // Otherwise, we have a StringLiteral.
2177+ const auto *Lit = dyn_cast<StringLiteral>(BaseExpr);
2178+ if (const auto *PE = dyn_cast<PredefinedExpr>(BaseExpr))
2179+ Lit = PE->getFunctionName();
2180+
2181+ if (!Lit)
21472182 return false;
2148- const Expr *E = Value.Base.dyn_cast<const Expr*>();
2149- return E && !isa<MaterializeTemporaryExpr>(E);
2183+
2184+ AsString.Bytes = Lit->getBytes();
2185+ AsString.CharWidth = Lit->getCharByteWidth();
2186+ return true;
2187+ }
2188+
2189+ // Determine whether two string literals potentially overlap. This will be the
2190+ // case if they agree on the values of all the bytes on the overlapping region
2191+ // between them.
2192+ //
2193+ // The overlapping region is the portion of the two string literals that must
2194+ // overlap in memory if the pointers actually point to the same address at
2195+ // runtime. For example, if LHS is "abcdef" + 3 and RHS is "cdef\0gh" + 1 then
2196+ // the overlapping region is "cdef\0", which in this case does agree, so the
2197+ // strings are potentially overlapping. Conversely, for "foobar" + 3 versus
2198+ // "bazbar" + 3, the overlapping region contains all of both strings, so they
2199+ // are not potentially overlapping, even though they agree from the given
2200+ // addresses onwards.
2201+ //
2202+ // See open core issue CWG2765 which is discussing the desired rule here.
2203+ static bool ArePotentiallyOverlappingStringLiterals(const EvalInfo &Info,
2204+ const LValue &LHS,
2205+ const LValue &RHS) {
2206+ LValueBaseString LHSString, RHSString;
2207+ if (!GetLValueBaseAsString(Info, LHS, LHSString) ||
2208+ !GetLValueBaseAsString(Info, RHS, RHSString))
2209+ return false;
2210+
2211+ // This is the byte offset to the location of the first character of LHS
2212+ // within RHS. We don't need to look at the characters of one string that
2213+ // would appear before the start of the other string if they were merged.
2214+ CharUnits Offset = RHS.Offset - LHS.Offset;
2215+ if (Offset.isNegative())
2216+ LHSString.Bytes = LHSString.Bytes.drop_front(-Offset.getQuantity());
2217+ else
2218+ RHSString.Bytes = RHSString.Bytes.drop_front(Offset.getQuantity());
2219+
2220+ bool LHSIsLonger = LHSString.Bytes.size() > RHSString.Bytes.size();
2221+ StringRef Longer = LHSIsLonger ? LHSString.Bytes : RHSString.Bytes;
2222+ StringRef Shorter = LHSIsLonger ? RHSString.Bytes : LHSString.Bytes;
2223+ int ShorterCharWidth = (LHSIsLonger ? RHSString : LHSString).CharWidth;
2224+
2225+ // The null terminator isn't included in the string data, so check for it
2226+ // manually. If the longer string doesn't have a null terminator where the
2227+ // shorter string ends, they aren't potentially overlapping.
2228+ for (int NullByte : llvm::seq(ShorterCharWidth)) {
2229+ if (Shorter.size() + NullByte >= Longer.size())
2230+ break;
2231+ if (Longer[Shorter.size() + NullByte])
2232+ return false;
2233+ }
2234+
2235+ // Otherwise, they're potentially overlapping if and only if the overlapping
2236+ // region is the same.
2237+ return Shorter == Longer.take_front(Shorter.size());
21502238}
21512239
21522240static bool IsWeakLValue(const LValue &Value) {
@@ -8573,7 +8661,10 @@ class LValueExprEvaluator
85738661 bool VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E);
85748662 bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E);
85758663 bool VisitMemberExpr(const MemberExpr *E);
8576- bool VisitStringLiteral(const StringLiteral *E) { return Success(E); }
8664+ bool VisitStringLiteral(const StringLiteral *E) {
8665+ return Success(APValue::LValueBase(
8666+ E, 0, Info.getASTContext().getNextStringLiteralVersion()));
8667+ }
85778668 bool VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { return Success(E); }
85788669 bool VisitCXXTypeidExpr(const CXXTypeidExpr *E);
85798670 bool VisitCXXUuidofExpr(const CXXUuidofExpr *E);
@@ -9639,7 +9730,7 @@ static bool isOneByteCharacterType(QualType T) {
96399730
96409731bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
96419732 unsigned BuiltinOp) {
9642- if (IsNoOpCall (E))
9733+ if (IsOpaqueConstantCall (E))
96439734 return Success(E);
96449735
96459736 switch (BuiltinOp) {
@@ -13889,13 +13980,22 @@ EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E,
1388913980 (!RHSValue.Base && !RHSValue.Offset.isZero()))
1389013981 return DiagComparison(diag::note_constexpr_pointer_constant_comparison,
1389113982 !RHSValue.Base);
13892- // It's implementation-defined whether distinct literals will have
13893- // distinct addresses. In clang, the result of such a comparison is
13894- // unspecified, so it is not a constant expression. However, we do know
13895- // that the address of a literal will be non-null.
13896- if ((IsLiteralLValue(LHSValue) || IsLiteralLValue(RHSValue)) &&
13897- LHSValue.Base && RHSValue.Base)
13983+ // C++2c [intro.object]/10:
13984+ // Two objects [...] may have the same address if [...] they are both
13985+ // potentially non-unique objects.
13986+ // C++2c [intro.object]/9:
13987+ // An object is potentially non-unique if it is a string literal object,
13988+ // the backing array of an initializer list, or a subobject thereof.
13989+ //
13990+ // This makes the comparison result unspecified, so it's not a constant
13991+ // expression.
13992+ //
13993+ // TODO: Do we need to handle the initializer list case here?
13994+ if (ArePotentiallyOverlappingStringLiterals(Info, LHSValue, RHSValue))
1389813995 return DiagComparison(diag::note_constexpr_literal_comparison);
13996+ if (IsOpaqueConstantCall(LHSValue) || IsOpaqueConstantCall(RHSValue))
13997+ return DiagComparison(diag::note_constexpr_opaque_call_comparison,
13998+ !IsOpaqueConstantCall(LHSValue));
1389913999 // We can't tell whether weak symbols will end up pointing to the same
1390014000 // object.
1390114001 if (IsWeakLValue(LHSValue) || IsWeakLValue(RHSValue))
0 commit comments