diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 7be4022649329..06ac0f1704aa9 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -1752,7 +1752,14 @@ enum class StringLiteralKind { UTF8, UTF16, UTF32, - Unevaluated + Unevaluated, + // Binary kind of string literal is used for the data coming via #embed + // directive. File's binary contents is transformed to a special kind of + // string literal that in some cases may be used directly as an initializer + // and some features of classic string literals are not applicable to this + // kind of a string literal, for example finding a particular byte's source + // location for better diagnosing. + Binary }; /// StringLiteral - This represents a string literal expression, e.g. "foo" @@ -1884,6 +1891,8 @@ class StringLiteral final int64_t getCodeUnitS(size_t I, uint64_t BitWidth) const { int64_t V = getCodeUnit(I); if (isOrdinary() || isWide()) { + // Ordinary and wide string literals have types that can be signed. + // It is important for checking C23 constexpr initializers. unsigned Width = getCharByteWidth() * BitWidth; llvm::APInt AInt(Width, (uint64_t)V); V = AInt.getSExtValue(); @@ -4965,9 +4974,9 @@ class EmbedExpr final : public Expr { assert(EExpr && CurOffset != ULLONG_MAX && "trying to dereference an invalid iterator"); IntegerLiteral *N = EExpr->FakeChildNode; - StringRef DataRef = EExpr->Data->BinaryData->getBytes(); N->setValue(*EExpr->Ctx, - llvm::APInt(N->getValue().getBitWidth(), DataRef[CurOffset], + llvm::APInt(N->getValue().getBitWidth(), + EExpr->Data->BinaryData->getCodeUnit(CurOffset), N->getType()->isSignedIntegerType())); // We want to return a reference to the fake child node in the // EmbedExpr, not the local variable N. diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index aa7e14329a21b..8571b617c70eb 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1104,6 +1104,7 @@ unsigned StringLiteral::mapCharByteWidth(TargetInfo const &Target, switch (SK) { case StringLiteralKind::Ordinary: case StringLiteralKind::UTF8: + case StringLiteralKind::Binary: CharByteWidth = Target.getCharWidth(); break; case StringLiteralKind::Wide: @@ -1216,6 +1217,7 @@ void StringLiteral::outputString(raw_ostream &OS) const { switch (getKind()) { case StringLiteralKind::Unevaluated: case StringLiteralKind::Ordinary: + case StringLiteralKind::Binary: break; // no prefix. case StringLiteralKind::Wide: OS << 'L'; @@ -1332,6 +1334,11 @@ StringLiteral::getLocationOfByte(unsigned ByteNo, const SourceManager &SM, const LangOptions &Features, const TargetInfo &Target, unsigned *StartToken, unsigned *StartTokenByteOffset) const { + // No source location of bytes for binary literals since they don't come from + // source. + if (getKind() == StringLiteralKind::Binary) + return getStrTokenLoc(0); + assert((getKind() == StringLiteralKind::Ordinary || getKind() == StringLiteralKind::UTF8 || getKind() == StringLiteralKind::Unevaluated) && diff --git a/clang/lib/Parse/ParseInit.cpp b/clang/lib/Parse/ParseInit.cpp index 63b1d7bd9db53..471b3eaf28287 100644 --- a/clang/lib/Parse/ParseInit.cpp +++ b/clang/lib/Parse/ParseInit.cpp @@ -445,7 +445,7 @@ ExprResult Parser::createEmbedExpr() { Context.MakeIntValue(Str.size(), Context.getSizeType()); QualType ArrayTy = Context.getConstantArrayType( Ty, ArraySize, nullptr, ArraySizeModifier::Normal, 0); - return StringLiteral::Create(Context, Str, StringLiteralKind::Ordinary, + return StringLiteral::Create(Context, Str, StringLiteralKind::Binary, false, ArrayTy, StartLoc); }; diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 1e39d69e8b230..c6621402adfc9 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -4143,6 +4143,7 @@ Sema::IsStringLiteralToNonConstPointerConversion(Expr *From, QualType ToType) { // We don't allow UTF literals to be implicitly converted break; case StringLiteralKind::Ordinary: + case StringLiteralKind::Binary: return (ToPointeeType->getKind() == BuiltinType::Char_U || ToPointeeType->getKind() == BuiltinType::Char_S); case StringLiteralKind::Wide: diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 37796758960cd..6e9ed875b50c5 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -105,6 +105,7 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT, return SIF_None; [[fallthrough]]; case StringLiteralKind::Ordinary: + case StringLiteralKind::Binary: // char array can be initialized with a narrow string. // Only allow char x[] = "foo"; not char x[] = L"foo"; if (ElemTy->isCharType()) diff --git a/clang/test/Preprocessor/embed_constexpr.c b/clang/test/Preprocessor/embed_constexpr.c new file mode 100644 index 0000000000000..e444dfec158b5 --- /dev/null +++ b/clang/test/Preprocessor/embed_constexpr.c @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -std=c23 + +static constexpr unsigned char data[] = { +#embed "big_char.txt" +}; + +static constexpr char data1[] = { +#embed "big_char.txt" // expected-error {{constexpr initializer evaluates to 255 which is not exactly representable in type 'const char'}} +}; + +static constexpr int data2[] = { +#embed "big_char.txt" +}; + +static constexpr unsigned data3[] = { +#embed "big_char.txt" suffix(, -1) // expected-error {{constexpr initializer evaluates to -1 which is not exactly representable in type 'const unsigned int'}} +}; + +static constexpr int data4[] = { +#embed "big_char.txt" suffix(, -1) +};