Skip to content

Commit 35a7fd7

Browse files
committed
Switch Neon, SVE, SME, and RVV builtins to use a prefix
This avoids repeating this part of the name in every string, shrinking the string tables. For SVE in particular, which is by-far the largest builtin string table, this gets us well under 200KiB. Others shrink by 30% - 50% depending on how long the rest of the strings end up. Overall, this completes restructuring the builtin string tables to try and minimize their size and hopefully avoid both toolchain bugs and compile-time memory overheads of the full sized string tables.
1 parent fcb7395 commit 35a7fd7

File tree

13 files changed

+83
-74
lines changed

13 files changed

+83
-74
lines changed

clang/include/clang/Basic/Builtins.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ enum ID {
7171
FirstTSBuiltin
7272
};
7373

74+
struct InfosShard;
75+
7476
/// The info used to represent each builtin.
7577
struct Info {
7678
// Rather than store pointers to the string literals describing these four
@@ -84,6 +86,11 @@ struct Info {
8486

8587
HeaderDesc Header = HeaderDesc::NO_HEADER;
8688
LanguageID Langs = ALL_LANGUAGES;
89+
90+
/// Get the name for the builtin represented by this `Info` object.
91+
///
92+
/// Must be provided the `Shard` for this `Info` object.
93+
std::string getName(const InfosShard &Shard) const;
8794
};
8895

8996
/// A constexpr function to construct an infos array from X-macros.
@@ -121,6 +128,8 @@ static constexpr std::array<Info, N> MakeInfos(std::array<Info, N> Infos) {
121128
struct InfosShard {
122129
const llvm::StringTable *Strings;
123130
llvm::ArrayRef<Info> Infos;
131+
132+
llvm::StringLiteral NamePrefix = "";
124133
};
125134

126135
// A detail macro used below to emit a string literal that, after string literal
@@ -236,9 +245,10 @@ class Context {
236245

237246
/// Return the identifier name for the specified builtin,
238247
/// e.g. "__builtin_abs".
239-
llvm::StringRef getName(unsigned ID) const;
248+
std::string getName(unsigned ID) const;
240249

241-
/// Return a quoted name for the specified builtin for use in diagnostics.
250+
/// Return the identifier name for the specified builtin inside single quotes
251+
/// for a diagnostic, e.g. "'__builtin_abs'".
242252
std::string getQuotedName(unsigned ID) const;
243253

244254
/// Get the type descriptor string for the specified builtin.

clang/lib/Basic/Builtins.cpp

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,22 @@ Builtin::Context::getShardAndInfo(unsigned ID) const {
7171
llvm_unreachable("Invalid target builtin shard structure!");
7272
}
7373

74+
std::string Builtin::Info::getName(const Builtin::InfosShard &Shard) const {
75+
return (Twine(Shard.NamePrefix) + (*Shard.Strings)[Offsets.Name]).str();
76+
}
77+
7478
/// Return the identifier name for the specified builtin,
7579
/// e.g. "__builtin_abs".
76-
llvm::StringRef Builtin::Context::getName(unsigned ID) const {
80+
std::string Builtin::Context::getName(unsigned ID) const {
7781
const auto &[Shard, I] = getShardAndInfo(ID);
78-
return (*Shard.Strings)[I.Offsets.Name];
82+
return I.getName(Shard);
83+
}
84+
85+
std::string Builtin::Context::getQuotedName(unsigned ID) const {
86+
const auto &[Shard, I] = getShardAndInfo(ID);
87+
return (Twine("'") + Shard.NamePrefix + (*Shard.Strings)[I.Offsets.Name] +
88+
"'")
89+
.str();
7990
}
8091

8192
const char *Builtin::Context::getTypeString(unsigned ID) const {
@@ -112,12 +123,14 @@ void Builtin::Context::InitializeTarget(const TargetInfo &Target,
112123
bool Builtin::Context::isBuiltinFunc(llvm::StringRef FuncName) {
113124
bool InStdNamespace = FuncName.consume_front("std-");
114125
for (const auto &Shard : {InfosShard{&BuiltinStrings, BuiltinInfos}})
115-
for (const auto &I : Shard.Infos)
116-
if (FuncName == (*Shard.Strings)[I.Offsets.Name] &&
117-
(bool)strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'z') ==
118-
InStdNamespace)
119-
return strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'f') !=
120-
nullptr;
126+
if (llvm::StringRef FuncNameSuffix = FuncName;
127+
FuncNameSuffix.consume_front(Shard.NamePrefix))
128+
for (const auto &I : Shard.Infos)
129+
if (FuncNameSuffix == (*Shard.Strings)[I.Offsets.Name] &&
130+
(bool)strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'z') ==
131+
InStdNamespace)
132+
return strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'f') !=
133+
nullptr;
121134

122135
return false;
123136
}
@@ -193,7 +206,7 @@ void Builtin::Context::initializeBuiltins(IdentifierTable &Table,
193206
for (const auto &I : Shard.Infos) {
194207
// If this is a real builtin (ID != 0) and is supported, add it.
195208
if (ID != 0 && builtinIsSupported(*Shard.Strings, I, LangOpts))
196-
Table.get((*Shard.Strings)[I.Offsets.Name]).setBuiltinID(ID);
209+
Table.get(I.getName(Shard)).setBuiltinID(ID);
197210
++ID;
198211
}
199212
assert(ID == FirstTSBuiltin && "Should have added all non-target IDs!");
@@ -202,14 +215,14 @@ void Builtin::Context::initializeBuiltins(IdentifierTable &Table,
202215
for (const auto &Shard : TargetShards)
203216
for (const auto &I : Shard.Infos) {
204217
if (builtinIsSupported(*Shard.Strings, I, LangOpts))
205-
Table.get((*Shard.Strings)[I.Offsets.Name]).setBuiltinID(ID);
218+
Table.get(I.getName(Shard)).setBuiltinID(ID);
206219
++ID;
207220
}
208221

209222
// Step #3: Register target-specific builtins for AuxTarget.
210223
for (const auto &Shard : AuxTargetShards)
211224
for (const auto &I : Shard.Infos) {
212-
Table.get((*Shard.Strings)[I.Offsets.Name]).setBuiltinID(ID);
225+
Table.get(I.getName(Shard)).setBuiltinID(ID);
213226
++ID;
214227
}
215228
}
@@ -228,10 +241,6 @@ void Builtin::Context::initializeBuiltins(IdentifierTable &Table,
228241
}
229242
}
230243

231-
std::string Builtin::Context::getQuotedName(unsigned ID) const {
232-
return (llvm::Twine("'") + getName(ID) + "'").str();
233-
}
234-
235244
unsigned Builtin::Context::getRequiredVectorWidth(unsigned ID) const {
236245
const char *WidthPos = ::strchr(getAttributesString(ID), 'V');
237246
if (!WidthPos)

clang/lib/Basic/Targets/AArch64.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -770,11 +770,12 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
770770
llvm::SmallVector<Builtin::InfosShard>
771771
AArch64TargetInfo::getTargetBuiltins() const {
772772
return {
773-
{&NEON::BuiltinStrings, NEON::BuiltinInfos},
774-
{&NEON::FP16::BuiltinStrings, NEON::FP16::BuiltinInfos},
775-
{&SVE::BuiltinStrings, SVE::BuiltinInfos},
773+
{&NEON::BuiltinStrings, NEON::BuiltinInfos, "__builtin_neon_"},
774+
{&NEON::FP16::BuiltinStrings, NEON::FP16::BuiltinInfos,
775+
"__builtin_neon_"},
776+
{&SVE::BuiltinStrings, SVE::BuiltinInfos, "__builtin_sve_"},
776777
{&BuiltinSVENeonBridgeStrings, BuiltinSVENeonBridgeInfos},
777-
{&SME::BuiltinStrings, SME::BuiltinInfos},
778+
{&SME::BuiltinStrings, SME::BuiltinInfos, "__builtin_sme_"},
778779
{&BuiltinAArch64Strings, BuiltinAArch64Infos},
779780
};
780781
}

clang/lib/Basic/Targets/ARM.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1126,8 +1126,9 @@ static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumARMBuiltins>({
11261126
llvm::SmallVector<Builtin::InfosShard>
11271127
ARMTargetInfo::getTargetBuiltins() const {
11281128
return {
1129-
{&NEON::BuiltinStrings, NEON::BuiltinInfos},
1130-
{&NEON::FP16::BuiltinStrings, NEON::FP16::BuiltinInfos},
1129+
{&NEON::BuiltinStrings, NEON::BuiltinInfos, "__builtin_neon_"},
1130+
{&NEON::FP16::BuiltinStrings, NEON::FP16::BuiltinInfos,
1131+
"__builtin_neon_"},
11311132
{&BuiltinStrings, BuiltinInfos},
11321133
};
11331134
}

clang/lib/Basic/Targets/RISCV.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,6 @@ namespace RVV {
255255
#define GET_RISCVV_BUILTIN_STR_TABLE
256256
#include "clang/Basic/riscv_vector_builtins.inc"
257257
#undef GET_RISCVV_BUILTIN_STR_TABLE
258-
static_assert(BuiltinStrings.size() < 100'000);
259258

260259
static constexpr std::array<Builtin::Info, NumRVVBuiltins> BuiltinInfos = {
261260
#define GET_RISCVV_BUILTIN_INFOS
@@ -291,8 +290,8 @@ static_assert(std::size(BuiltinInfos) == NumRISCVBuiltins);
291290
llvm::SmallVector<Builtin::InfosShard>
292291
RISCVTargetInfo::getTargetBuiltins() const {
293292
return {
294-
{&RVV::BuiltinStrings, RVV::BuiltinInfos},
295-
{&RVVSiFive::BuiltinStrings, RVVSiFive::BuiltinInfos},
293+
{&RVV::BuiltinStrings, RVV::BuiltinInfos, "__builtin_rvv_"},
294+
{&RVVSiFive::BuiltinStrings, RVVSiFive::BuiltinInfos, "__builtin_rvv_"},
296295
{&BuiltinStrings, BuiltinInfos},
297296
};
298297
}

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,10 @@ llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
263263
unsigned BuiltinID) {
264264
assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
265265

266-
// Get the name, skip over the __builtin_ prefix (if necessary).
267-
StringRef Name;
266+
// Get the name, skip over the __builtin_ prefix (if necessary). We may have
267+
// to build this up so provide a small stack buffer to handle the vast
268+
// majority of names.
269+
llvm::SmallString<64> Name;
268270
GlobalDecl D(FD);
269271

270272
// TODO: This list should be expanded or refactored after all GCC-compatible
@@ -6566,7 +6568,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
65666568
LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
65676569

65686570
// See if we have a target specific intrinsic.
6569-
StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6571+
std::string Name = getContext().BuiltinInfo.getName(BuiltinID);
65706572
Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
65716573
StringRef Prefix =
65726574
llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
@@ -21213,7 +21215,7 @@ static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
2121321215
auto &C = CGF.CGM.getContext();
2121421216
if (!(C.getLangOpts().NativeHalfType ||
2121521217
!C.getTargetInfo().useFP16ConversionIntrinsics())) {
21216-
CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
21218+
CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getQuotedName(BuiltinID) +
2121721219
" requires native half type support.");
2121821220
return nullptr;
2121921221
}

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3983,7 +3983,8 @@ namespace {
39833983
unsigned BuiltinID = FD->getBuiltinID();
39843984
if (!BuiltinID || !BI.isLibFunction(BuiltinID))
39853985
return false;
3986-
StringRef BuiltinName = BI.getName(BuiltinID);
3986+
std::string BuiltinNameStr = BI.getName(BuiltinID);
3987+
StringRef BuiltinName = BuiltinNameStr;
39873988
if (BuiltinName.starts_with("__builtin_") &&
39883989
Name == BuiltinName.slice(strlen("__builtin_"), StringRef::npos)) {
39893990
return true;

clang/lib/Sema/SemaChecking.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,7 +1235,9 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
12351235
bool IsChkVariant = false;
12361236

12371237
auto GetFunctionName = [&]() {
1238-
StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID);
1238+
std::string FunctionNameStr =
1239+
getASTContext().BuiltinInfo.getName(BuiltinID);
1240+
llvm::StringRef FunctionName = FunctionNameStr;
12391241
// Skim off the details of whichever builtin was called to produce a better
12401242
// diagnostic, as it's unlikely that the user wrote the __builtin
12411243
// explicitly.
@@ -1245,7 +1247,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
12451247
} else {
12461248
FunctionName.consume_front("__builtin_");
12471249
}
1248-
return FunctionName;
1250+
return FunctionName.str();
12491251
};
12501252

12511253
switch (BuiltinID) {
@@ -1289,7 +1291,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
12891291
unsigned SourceSize) {
12901292
DiagID = diag::warn_fortify_scanf_overflow;
12911293
unsigned Index = ArgIndex + DataIndex;
1292-
StringRef FunctionName = GetFunctionName();
1294+
std::string FunctionName = GetFunctionName();
12931295
DiagRuntimeBehavior(TheCall->getArg(Index)->getBeginLoc(), TheCall,
12941296
PDiag(DiagID) << FunctionName << (Index + 1)
12951297
<< DestSize << SourceSize);
@@ -1438,7 +1440,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
14381440
llvm::APSInt::compareValues(*SourceSize, *DestinationSize) <= 0)
14391441
return;
14401442

1441-
StringRef FunctionName = GetFunctionName();
1443+
std::string FunctionName = GetFunctionName();
14421444

14431445
SmallString<16> DestinationStr;
14441446
SmallString<16> SourceStr;
@@ -4545,7 +4547,7 @@ ExprResult Sema::BuiltinAtomicOverloaded(ExprResult TheCallResult) {
45454547
// Get the decl for the concrete builtin from this, we can tell what the
45464548
// concrete integer type we should convert to is.
45474549
unsigned NewBuiltinID = BuiltinIndices[BuiltinIndex][SizeIndex];
4548-
StringRef NewBuiltinName = Context.BuiltinInfo.getName(NewBuiltinID);
4550+
std::string NewBuiltinName = Context.BuiltinInfo.getName(NewBuiltinID);
45494551
FunctionDecl *NewBuiltinDecl;
45504552
if (NewBuiltinID == BuiltinID)
45514553
NewBuiltinDecl = FDecl;
@@ -8334,7 +8336,7 @@ static void emitReplacement(Sema &S, SourceLocation Loc, SourceRange Range,
83348336
unsigned AbsKind, QualType ArgType) {
83358337
bool EmitHeaderHint = true;
83368338
const char *HeaderName = nullptr;
8337-
StringRef FunctionName;
8339+
std::string FunctionName;
83388340
if (S.getLangOpts().CPlusPlus && !ArgType->isAnyComplexType()) {
83398341
FunctionName = "std::abs";
83408342
if (ArgType->isIntegralOrEnumerationType()) {
@@ -8483,7 +8485,7 @@ void Sema::CheckAbsoluteValueFunction(const CallExpr *Call,
84838485
// Unsigned types cannot be negative. Suggest removing the absolute value
84848486
// function call.
84858487
if (ArgType->isUnsignedIntegerType()) {
8486-
StringRef FunctionName =
8488+
std::string FunctionName =
84878489
IsStdAbs ? "std::abs" : Context.BuiltinInfo.getName(AbsKind);
84888490
Diag(Call->getExprLoc(), diag::warn_unsigned_abs) << ArgType << ParamType;
84898491
Diag(Call->getExprLoc(), diag::note_remove_abs)

clang/lib/Sema/SemaExpr.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6687,7 +6687,7 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
66876687

66886688
Expr *Sema::BuildBuiltinCallExpr(SourceLocation Loc, Builtin::ID Id,
66896689
MultiExprArg CallArgs) {
6690-
StringRef Name = Context.BuiltinInfo.getName(Id);
6690+
std::string Name = Context.BuiltinInfo.getName(Id);
66916691
LookupResult R(*this, &Context.Idents.get(Name), Loc,
66926692
Sema::LookupOrdinaryName);
66936693
LookupName(R, TUScope, /*AllowBuiltinCreation=*/true);

clang/lib/StaticAnalyzer/Core/CheckerContext.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD,
5555
if (BId != 0) {
5656
if (Name.empty())
5757
return true;
58-
StringRef BName = FD->getASTContext().BuiltinInfo.getName(BId);
58+
std::string BName = FD->getASTContext().BuiltinInfo.getName(BId);
5959
size_t start = BName.find(Name);
6060
if (start != StringRef::npos) {
6161
// Accept exact match.

0 commit comments

Comments
 (0)