-
Notifications
You must be signed in to change notification settings - Fork 15k
[CIR] Upstream support for string literals #140796
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This adds the minimal support needed to handle string literals.
|
@llvm/pr-subscribers-clangir @llvm/pr-subscribers-clang Author: Andy Kaylor (andykaylor) ChangesThis adds the minimal support needed to handle string literals. Full diff: https://github.com/llvm/llvm-project/pull/140796.diff 10 Files Affected:
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index b680e4162a5ce..738f33bf36c9e 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -13,6 +13,7 @@
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
#include "llvm/ADT/STLForwardCompat.h"
#include "llvm/Support/ErrorHandling.h"
@@ -177,6 +178,12 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
return create<cir::AllocaOp>(loc, addrType, type, name, alignment);
}
+ mlir::Value createGetGlobal(mlir::Location loc, cir::GlobalOp global) {
+ assert(!cir::MissingFeatures::addressSpace());
+ return create<cir::GetGlobalOp>(loc, getPointerTo(global.getSymType()),
+ global.getSymName());
+ }
+
cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr,
bool isVolatile = false, uint64_t alignment = 0) {
mlir::IntegerAttr intAttr;
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 7b33d94483d5f..d43e2d9f461d1 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -38,6 +38,7 @@ struct MissingFeatures {
static bool opGlobalWeakRef() { return false; }
static bool opGlobalLinkage() { return false; }
static bool opGlobalSetVisitibility() { return false; }
+ static bool opGlobalUnnamedAddr() { return false; }
static bool supportIFuncAttr() { return false; }
static bool supportVisibility() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index b1b0826a4e44a..aff8b8949f3ad 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -26,6 +26,34 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
CIRGenBuilderTy(mlir::MLIRContext &mlirContext, const CIRGenTypeCache &tc)
: CIRBaseBuilderTy(mlirContext), typeCache(tc) {}
+ /// Get a cir::ConstArrayAttr for a string literal.
+ /// Note: This is different from what is returned by
+ /// mlir::Builder::getStringAttr() which is an mlir::StringAttr.
+ mlir::Attribute getString(llvm::StringRef str, mlir::Type eltTy,
+ unsigned size) {
+ unsigned finalSize = size ? size : str.size();
+
+ size_t lastNonZeroPos = str.find_last_not_of('\0');
+ // If the string is full of null bytes, emit a #cir.zero rather than
+ // a #cir.const_array.
+ if (lastNonZeroPos == llvm::StringRef::npos) {
+ auto arrayTy = cir::ArrayType::get(eltTy, finalSize);
+ return cir::ZeroAttr::get(arrayTy);
+ }
+ // We emit trailing zeros only if there are multiple trailing zeros.
+ int trailingZerosNum = 0;
+ if (finalSize > lastNonZeroPos + 2)
+ trailingZerosNum = finalSize - lastNonZeroPos - 1;
+ auto truncatedArrayTy =
+ cir::ArrayType::get(eltTy, finalSize - trailingZerosNum);
+ auto fullArrayTy = cir::ArrayType::get(eltTy, finalSize);
+ return cir::ConstArrayAttr::get(
+ fullArrayTy,
+ mlir::StringAttr::get(str.drop_back(trailingZerosNum),
+ truncatedArrayTy),
+ trailingZerosNum);
+ }
+
std::string getUniqueAnonRecordName() { return getUniqueRecordName("anon"); }
std::string getUniqueRecordName(const std::string &baseName) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index c5fe3c1378624..a8fecafe4a1f3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -743,6 +743,16 @@ CIRGenFunction::emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e) {
return lv;
}
+LValue CIRGenFunction::emitStringLiteralLValue(const StringLiteral *e) {
+ cir::GlobalOp globalOp = cgm.getGlobalForStringLiteral(e);
+ assert(!cir::MissingFeatures::opGlobalAlignment());
+ mlir::Value addr =
+ builder.createGetGlobal(getLoc(e->getSourceRange()), globalOp);
+ return makeAddrLValue(
+ Address(addr, globalOp.getSymType(), CharUnits::fromQuantity(1)),
+ e->getType(), AlignmentSource::Decl);
+}
+
/// Casts are never lvalues unless that cast is to a reference type. If the cast
/// is to a reference, we can have the usual lvalue result, otherwise if a cast
/// is needed by the code generator in an lvalue context, then it must mean that
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index c3798de79d969..ce88e656a38e8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -531,6 +531,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) {
return emitArraySubscriptExpr(cast<ArraySubscriptExpr>(e));
case Expr::UnaryOperatorClass:
return emitUnaryOpLValue(cast<UnaryOperator>(e));
+ case Expr::StringLiteralClass:
+ return emitStringLiteralLValue(cast<StringLiteral>(e));
case Expr::MemberExprClass:
return emitMemberExpr(cast<MemberExpr>(e));
case Expr::BinaryOperatorClass:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index ce080f481da6b..74f2e4043933d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -695,6 +695,8 @@ class CIRGenFunction : public CIRGenTypeCache {
mlir::Value emitStoreThroughBitfieldLValue(RValue src, LValue dstresult);
+ LValue emitStringLiteralLValue(const StringLiteral *e);
+
mlir::LogicalResult emitSwitchBody(const clang::Stmt *s);
mlir::LogicalResult emitSwitchCase(const clang::SwitchCase &s,
bool buildingTopLevelCase);
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index e170498b67548..5bae8908d5dbb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -562,6 +562,30 @@ void CIRGenModule::emitGlobalDefinition(clang::GlobalDecl gd,
llvm_unreachable("Invalid argument to CIRGenModule::emitGlobalDefinition");
}
+mlir::Attribute
+CIRGenModule::getConstantArrayFromStringLiteral(const StringLiteral *e) {
+ assert(!e->getType()->isPointerType() && "Strings are always arrays");
+
+ // Don't emit it as the address of the string, emit the string data itself
+ // as an inline array.
+ if (e->getCharByteWidth() == 1) {
+ SmallString<64> str(e->getString());
+
+ // Resize the string to the right size, which is indicated by its type.
+ const ConstantArrayType *cat =
+ astContext.getAsConstantArrayType(e->getType());
+ uint64_t finalSize = cat->getZExtSize();
+ str.resize(finalSize);
+
+ mlir::Type eltTy = convertType(cat->getElementType());
+ return builder.getString(str, eltTy, finalSize);
+ }
+
+ errorNYI(e->getSourceRange(),
+ "getConstantArrayFromStringLiteral: wide characters");
+ return mlir::Attribute();
+}
+
static bool shouldBeInCOMDAT(CIRGenModule &cgm, const Decl &d) {
assert(!cir::MissingFeatures::supportComdat());
@@ -749,6 +773,84 @@ CIRGenModule::getCIRLinkageVarDefinition(const VarDecl *vd, bool isConstant) {
return getCIRLinkageForDeclarator(vd, linkage, isConstant);
}
+static cir::GlobalOp generateStringLiteral(mlir::Location loc,
+ mlir::TypedAttr c, CIRGenModule &cgm,
+ StringRef globalName) {
+ assert(!cir::MissingFeatures::addressSpace());
+
+ // Create a global variable for this string
+ // FIXME(cir): check for insertion point in module level.
+ cir::GlobalOp gv =
+ CIRGenModule::createGlobalOp(cgm, loc, globalName, c.getType());
+
+ // Set up extra information and add to the module
+ assert(!cir::MissingFeatures::opGlobalAlignment());
+ assert(!cir::MissingFeatures::opGlobalLinkage());
+ assert(!cir::MissingFeatures::opGlobalThreadLocal());
+ assert(!cir::MissingFeatures::opGlobalUnnamedAddr());
+ CIRGenModule::setInitializer(gv, c);
+ assert(!cir::MissingFeatures::supportComdat());
+ assert(!cir::MissingFeatures::opGlobalDSOLocal());
+ return gv;
+}
+
+// LLVM IR automatically uniques names when new llvm::GlobalVariables are
+// created. This is handy, for example, when creating globals for string
+// literals. Since we don't do that when creating cir::GlobalOp's, we need
+// a mechanism to generate a unique name in advance.
+//
+// For now, this mechanism is only used in cases where we know that the
+// name is compiler-generated, so we don't use the MLIR symbol table for
+// the lookup.
+std::string CIRGenModule::getUniqueGlobalName(const std::string &baseName) {
+ // If this is the first time we've generated a name for this basename, use
+ // it as is and start a counter for this base name.
+ auto it = cgGlobalNames.find(baseName);
+ if (it == cgGlobalNames.end()) {
+ cgGlobalNames[baseName] = 0;
+ return baseName;
+ }
+
+ std::string result =
+ baseName + "." + std::to_string(cgGlobalNames[baseName]++);
+ // There should not be any symbol with this name in the module.
+ assert(!mlir::SymbolTable::lookupSymbolIn(theModule, result));
+ return result;
+}
+
+/// Return a pointer to a constant array for the given string literal.
+cir::GlobalOp CIRGenModule::getGlobalForStringLiteral(const StringLiteral *s,
+ StringRef name) {
+ mlir::Attribute c = getConstantArrayFromStringLiteral(s);
+
+ if (getLangOpts().WritableStrings) {
+ errorNYI(s->getSourceRange(),
+ "getGlobalForStringLiteral: Writable strings");
+ }
+
+ // Mangle the string literal if that's how the ABI merges duplicate strings.
+ // Don't do it if they are writable, since we don't want writes in one TU to
+ // affect strings in another.
+ if (getCXXABI().getMangleContext().shouldMangleStringLiteral(s) &&
+ !getLangOpts().WritableStrings) {
+ errorNYI(s->getSourceRange(),
+ "getGlobalForStringLiteral: mangle string literals");
+ }
+
+ // Unlike LLVM IR, CIR doesn't automatically unique names for globals, so
+ // we need to do that explicitly.
+ std::string uniqueName = getUniqueGlobalName(name.str());
+ mlir::Location loc = getLoc(s->getSourceRange());
+ auto typedC = llvm::cast<mlir::TypedAttr>(c);
+ assert(!cir::MissingFeatures::opGlobalAlignment());
+ cir::GlobalOp gv = generateStringLiteral(loc, typedC, *this, uniqueName);
+ assert(!cir::MissingFeatures::opGlobalDSOLocal());
+
+ assert(!cir::MissingFeatures::sanitizers());
+
+ return gv;
+}
+
void CIRGenModule::emitDeclContext(const DeclContext *dc) {
for (Decl *decl : dc->decls()) {
// Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
index b67239fcff44b..9828e1068e4fb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -126,6 +126,9 @@ class CIRGenModule : public CIRGenTypeCache {
llvm::StringRef name, mlir::Type t,
mlir::Operation *insertPoint = nullptr);
+ llvm::StringMap<unsigned> cgGlobalNames;
+ std::string getUniqueGlobalName(const std::string &baseName);
+
/// Return the mlir::Value for the address of the given global variable.
/// If Ty is non-null and if the global doesn't exist, then it will be created
/// with the specified type instead of whatever the normal requested type
@@ -136,6 +139,14 @@ class CIRGenModule : public CIRGenTypeCache {
getAddrOfGlobalVar(const VarDecl *d, mlir::Type ty = {},
ForDefinition_t isForDefinition = NotForDefinition);
+ /// Return a constant array for the given string.
+ mlir::Attribute getConstantArrayFromStringLiteral(const StringLiteral *e);
+
+ /// Return a global symbol reference to a constant array for the given string
+ /// literal.
+ cir::GlobalOp getGlobalForStringLiteral(const StringLiteral *S,
+ llvm::StringRef Name = ".str");
+
const TargetCIRGenInfo &getTargetCIRGenInfo();
/// Helpers to convert the presumed location of Clang's SourceLocation to an
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 365569ce1f48a..2516007afd561 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -270,6 +270,18 @@ mlir::Value CIRAttrToValue::visitCirAttr(cir::ConstArrayAttr attr) {
result =
rewriter.create<mlir::LLVM::InsertValueOp>(loc, result, init, idx);
}
+ } else if (auto strAttr = mlir::dyn_cast<mlir::StringAttr>(attr.getElts())) {
+ // TODO(cir): this diverges from traditional lowering. Normally the string
+ // would be a global constant that is memcopied.
+ auto arrayTy = mlir::dyn_cast<cir::ArrayType>(strAttr.getType());
+ assert(arrayTy && "String attribute must have an array type");
+ mlir::Type eltTy = arrayTy.getElementType();
+ for (auto [idx, elt] : llvm::enumerate(strAttr)) {
+ auto init = rewriter.create<mlir::LLVM::ConstantOp>(
+ loc, converter->convertType(eltTy), elt);
+ result =
+ rewriter.create<mlir::LLVM::InsertValueOp>(loc, result, init, idx);
+ }
} else {
llvm_unreachable("unexpected ConstArrayAttr elements");
}
diff --git a/clang/test/CIR/CodeGen/string-literals.c b/clang/test/CIR/CodeGen/string-literals.c
new file mode 100644
index 0000000000000..873b00d9c9a98
--- /dev/null
+++ b/clang/test/CIR/CodeGen/string-literals.c
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// LLVM: @[[STR1_GLOBAL:.*]] = dso_local global [2 x i8] c"1\00"
+// LLVM: @[[STR2_GLOBAL:.*]] = dso_local global [1 x i8] zeroinitializer
+// LLVM: @[[STR3_GLOBAL:.*]] = dso_local global [2 x i8] zeroinitializer
+
+// OGCG: @[[STR1_GLOBAL:.*]] = private unnamed_addr constant [2 x i8] c"1\00"
+// OGCG: @[[STR2_GLOBAL:.*]] = private unnamed_addr constant [1 x i8] zeroinitializer
+// OGCG: @[[STR3_GLOBAL:.*]] = private unnamed_addr constant [2 x i8] zeroinitializer
+
+char *f1() {
+ return "1";
+}
+
+// CIR: cir.global external @[[STR1_GLOBAL:.*]] = #cir.const_array<"1\00" : !cir.array<!s8i x 2>> : !cir.array<!s8i x 2>
+// CIR: cir.func @f1()
+// CIR: %[[STR:.*]] = cir.get_global @[[STR1_GLOBAL]] : !cir.ptr<!cir.array<!s8i x 2>>
+
+// LLVM: define ptr @f1()
+// LLVM: store ptr @[[STR1_GLOBAL]], ptr {{.*}}
+
+// OGCG: define {{.*}}ptr @f1()
+// OGCG: ret ptr @[[STR1_GLOBAL]]
+
+char *f2() {
+ return "";
+}
+
+// CIR: cir.global external @[[STR2_GLOBAL:.*]] = #cir.zero : !cir.array<!s8i x 1>
+// CIR: cir.func @f2()
+// CIR: %[[STR2:.*]] = cir.get_global @[[STR2_GLOBAL]] : !cir.ptr<!cir.array<!s8i x 1>>
+
+// LLVM: define ptr @f2()
+// LLVM: store ptr @[[STR2_GLOBAL]], ptr {{.*}}
+
+// OGCG: define {{.*}}ptr @f2()
+// OGCG: ret ptr @[[STR2_GLOBAL]]
+
+char *f3() {
+ return "\00";
+}
+
+// CIR: cir.global external @[[STR3_GLOBAL:.*]] = #cir.zero : !cir.array<!s8i x 2>
+// CIR: cir.func @f3()
+// CIR: %[[STR3:.*]] = cir.get_global @[[STR3_GLOBAL]] : !cir.ptr<!cir.array<!s8i x 2>>
+
+// LLVM: define ptr @f3()
+// LLVM: store ptr @[[STR3_GLOBAL]], ptr {{.*}}
+
+// OGCG: define {{.*}}ptr @f3()
+// OGCG: ret ptr @[[STR3_GLOBAL]]
|
| /// mlir::Builder::getStringAttr() which is an mlir::StringAttr. | ||
| mlir::Attribute getString(llvm::StringRef str, mlir::Type eltTy, | ||
| unsigned size) { | ||
| unsigned finalSize = size ? size : str.size(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
instead of 0 being special, can we make size be an optional?
| return cir::ZeroAttr::get(arrayTy); | ||
| } | ||
| // We emit trailing zeros only if there are multiple trailing zeros. | ||
| int trailingZerosNum = 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could probably end up being size_t in length with a big enough set of zeros, right? It doesn't look like you try tomake it negative anywhere, so perhaps it should too be size_t?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
still ok, 1 nit.
| /// mlir::Builder::getStringAttr() which is an mlir::StringAttr. | ||
| mlir::Attribute getString(llvm::StringRef str, mlir::Type eltTy, | ||
| std::optional<size_t> size) { | ||
| size_t finalSize = size ? *size : str.size(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| size_t finalSize = size ? *size : str.size(); | |
| size_t finalSize = size.value_or(str.size()); |
Co-authored-by: Henrich Lauko <[email protected]>
This adds the minimal support needed to handle string literals.