Skip to content

Commit 54416c1

Browse files
andykaylorlanza
authored andcommitted
[CIR] Simlipify string literal global creation (llvm#1632)
Previously, when emitting a global for a string literal, we were creating a GlobalOp, building a GlobalView attr for it, and looking up the global from the symbol associated with the attr. This change splits out the function that creates the global so that the global is returned directly and the GlobalView attribute is only created in the case where it is needed. This also updates the mechanism used for uniquing the global name used for the strings so that if different base names are used the uniquing numbers each base name separately. The mangling of the global used for strings is not implemented, but the uniquing was happening prior to the mangling. This change drops the uniquing below the placeholder for mangling.
1 parent fa01740 commit 54416c1

File tree

3 files changed

+49
-25
lines changed

3 files changed

+49
-25
lines changed

clang/lib/CIR/CodeGen/CIRGenExpr.cpp

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1880,21 +1880,14 @@ LValue CIRGenFunction::emitArraySubscriptExpr(const ArraySubscriptExpr *E,
18801880
return LV;
18811881
}
18821882

1883-
LValue CIRGenFunction::emitStringLiteralLValue(const StringLiteral *E) {
1884-
auto sym = CGM.getAddrOfConstantStringFromLiteral(E).getSymbol();
1885-
1886-
auto cstGlobal = mlir::SymbolTable::lookupSymbolIn(CGM.getModule(), sym);
1887-
assert(cstGlobal && "Expected global");
1888-
1889-
auto g = dyn_cast<cir::GlobalOp>(cstGlobal);
1890-
assert(g && "unaware of other symbol providers");
1891-
1883+
LValue CIRGenFunction::emitStringLiteralLValue(const StringLiteral *e) {
1884+
auto g = CGM.getGlobalForStringLiteral(e);
18921885
assert(g.getAlignment() && "expected alignment for string literal");
18931886
auto align = *g.getAlignment();
1894-
auto addr = builder.createGetGlobal(getLoc(E->getSourceRange()), g);
1887+
auto addr = builder.createGetGlobal(getLoc(e->getSourceRange()), g);
18951888
return makeAddrLValue(
18961889
Address(addr, g.getSymType(), CharUnits::fromQuantity(align)),
1897-
E->getType(), AlignmentSource::Decl);
1890+
e->getType(), AlignmentSource::Decl);
18981891
}
18991892

19001893
/// Casts are never lvalues unless that cast is to a reference type. If the cast

clang/lib/CIR/CodeGen/CIRGenModule.cpp

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1783,10 +1783,32 @@ generateStringLiteral(mlir::Location loc, mlir::TypedAttr c,
17831783
return gv;
17841784
}
17851785

1786-
/// Return a pointer to a constant array for the given string literal.
1787-
cir::GlobalViewAttr
1788-
CIRGenModule::getAddrOfConstantStringFromLiteral(const StringLiteral *s,
1789-
StringRef name) {
1786+
// LLVM IR automatically uniques names when new llvm::GlobalVariables are
1787+
// created. This is handy, for example, when creating globals for string
1788+
// literals. Since we don't do that when creating cir::GlobalOp's, we need
1789+
// a mechanism to generate a unique name in advance.
1790+
//
1791+
// For now, this mechanism is only used in cases where we know that the
1792+
// name is compiler-generated, so we don't use the MLIR symbol table for
1793+
// the lookup.
1794+
std::string CIRGenModule::getUniqueGlobalName(const std::string &baseName) {
1795+
// If this is the first time we've generated a name for this basename, use
1796+
// it as is and start a counter for this base name.
1797+
auto it = cgGlobalNames.find(baseName);
1798+
if (it == cgGlobalNames.end()) {
1799+
cgGlobalNames[baseName] = 1;
1800+
return baseName;
1801+
}
1802+
1803+
std::string result =
1804+
baseName + "." + std::to_string(cgGlobalNames[baseName]++);
1805+
// There should not be any symbol with this name in the module.
1806+
assert(!mlir::SymbolTable::lookupSymbolIn(theModule, result));
1807+
return result;
1808+
}
1809+
1810+
cir::GlobalOp CIRGenModule::getGlobalForStringLiteral(const StringLiteral *s,
1811+
StringRef name) {
17901812
CharUnits alignment =
17911813
astContext.getAlignOfGlobalVarInChars(s->getType(), /*VD=*/nullptr);
17921814

@@ -1800,13 +1822,6 @@ CIRGenModule::getAddrOfConstantStringFromLiteral(const StringLiteral *s,
18001822
uint64_t(alignment.getQuantity()) > *gv.getAlignment())
18011823
gv.setAlignmentAttr(getSize(alignment));
18021824
} else {
1803-
SmallString<256> stringNameBuffer = name;
1804-
llvm::raw_svector_ostream out(stringNameBuffer);
1805-
if (StringLiteralCnt)
1806-
out << '.' << StringLiteralCnt;
1807-
name = out.str();
1808-
StringLiteralCnt++;
1809-
18101825
SmallString<256> mangledNameBuffer;
18111826
StringRef globalVariableName;
18121827
auto lt = cir::GlobalLinkageKind::ExternalLinkage;
@@ -1822,18 +1837,28 @@ CIRGenModule::getAddrOfConstantStringFromLiteral(const StringLiteral *s,
18221837
globalVariableName = name;
18231838
}
18241839

1840+
// Unlike LLVM IR, CIR doesn't automatically unique names for globals, so
1841+
// we need to do that explicitly.
1842+
std::string uniqueName = getUniqueGlobalName(globalVariableName.str());
18251843
auto loc = getLoc(s->getSourceRange());
18261844
auto typedC = llvm::dyn_cast<mlir::TypedAttr>(c);
18271845
if (!typedC)
18281846
llvm_unreachable("this should never be untyped at this point");
1829-
gv = generateStringLiteral(loc, typedC, lt, *this, globalVariableName,
1830-
alignment);
1847+
gv = generateStringLiteral(loc, typedC, lt, *this, uniqueName, alignment);
18311848
setDSOLocal(static_cast<mlir::Operation *>(gv));
18321849
ConstantStringMap[c] = gv;
18331850

18341851
assert(!cir::MissingFeatures::reportGlobalToASan() && "NYI");
18351852
}
18361853

1854+
return gv;
1855+
}
1856+
1857+
/// Return a pointer to a constant array for the given string literal.
1858+
cir::GlobalViewAttr
1859+
CIRGenModule::getAddrOfConstantStringFromLiteral(const StringLiteral *s,
1860+
StringRef name) {
1861+
auto gv = getGlobalForStringLiteral(s, name);
18371862
auto arrayTy = mlir::dyn_cast<cir::ArrayType>(gv.getSymType());
18381863
assert(arrayTy && "String literal must be array");
18391864
auto ptrTy = getBuilder().getPointerTo(arrayTy.getElementType(),

clang/lib/CIR/CodeGen/CIRGenModule.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,9 @@ class CIRGenModule : public CIRGenTypeCache {
338338
return !getLangOpts().CPlusPlus;
339339
}
340340

341+
llvm::StringMap<unsigned> cgGlobalNames;
342+
std::string getUniqueGlobalName(const std::string &baseName);
343+
341344
/// Return the mlir::Value for the address of the given global variable.
342345
/// If Ty is non-null and if the global doesn't exist, then it will be created
343346
/// with the specified type instead of whatever the normal requested type
@@ -444,12 +447,15 @@ class CIRGenModule : public CIRGenTypeCache {
444447
/// Return a constant array for the given string.
445448
mlir::Attribute getConstantArrayFromStringLiteral(const StringLiteral *E);
446449

450+
/// Return a global op for the given string literal.
451+
cir::GlobalOp getGlobalForStringLiteral(const StringLiteral *s,
452+
llvm::StringRef name = ".str");
453+
447454
/// Return a global symbol reference to a constant array for the given string
448455
/// literal.
449456
cir::GlobalViewAttr
450457
getAddrOfConstantStringFromLiteral(const StringLiteral *S,
451458
llvm::StringRef Name = ".str");
452-
unsigned StringLiteralCnt = 0;
453459

454460
unsigned CompoundLitaralCnt = 0;
455461
/// Return the unique name for global compound literal

0 commit comments

Comments
 (0)