-
Notifications
You must be signed in to change notification settings - Fork 15.4k
RuntimeLibcalls: Fix building hash table with duplicate entries #153801
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
RuntimeLibcalls: Fix building hash table with duplicate entries #153801
Conversation
|
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-llvm-binary-utilities Author: Matt Arsenault (arsenm) ChangesWe were sizing the table appropriately for the number of LibcallImpls, BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and As a secondary change, align the table size up to the next Full diff: https://github.com/llvm/llvm-project/pull/153801.diff 1 Files Affected:
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index 1c5f38d0c24b8..05f2512e24a50 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -289,7 +289,6 @@ class RuntimeLibcallEmitter {
/// Helper struct for the name hash table.
struct LookupEntry {
- StringRef FuncName;
uint64_t Hash = 0;
unsigned TableValue = 0;
};
@@ -339,14 +338,17 @@ static void emitHashFunction(raw_ostream &OS) {
/// Return the table size, maximum number of collisions for the set of hashes
static std::pair<int, int>
computePerfectHashParameters(ArrayRef<uint64_t> Hashes) {
- const int SizeOverhead = 10;
- const int NumHashes = Hashes.size();
+ // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
+ const int SizeOverhead = 4;
// Index derived from hash -> number of collisions.
DenseMap<uint64_t, int> Table;
+ unsigned NumHashes = Hashes.size();
+
for (int MaxCollisions = 1;; ++MaxCollisions) {
- for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+ for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead * NumHashes;
+ N <<= 1) {
Table.clear();
bool NeedResize = false;
@@ -367,22 +369,12 @@ computePerfectHashParameters(ArrayRef<uint64_t> Hashes) {
static std::vector<LookupEntry>
constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
- ArrayRef<uint64_t> Hashes, int Size, int Collisions,
- StringToOffsetTable &OffsetTable) {
- DenseSet<StringRef> Seen;
+ ArrayRef<uint64_t> Hashes,
+ ArrayRef<unsigned> TableValues, int Size,
+ int Collisions, StringToOffsetTable &OffsetTable) {
std::vector<LookupEntry> Lookup(Size * Collisions);
- for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
- StringRef ImplName = LibCallImpl.getLibcallFuncName();
-
- // We do not want to add repeated entries for cases with the same name, only
- // an entry for the first, with the name collision enum values immediately
- // following.
- if (!Seen.insert(ImplName).second)
- continue;
-
- uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
-
+ for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
uint64_t Idx = (HashValue % static_cast<uint64_t>(Size)) *
static_cast<uint64_t>(Collisions);
@@ -390,8 +382,7 @@ constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
for (int J = 0; J < Collisions; ++J) {
LookupEntry &Entry = Lookup[Idx + J];
if (Entry.TableValue == 0) {
- Entry.FuncName = ImplName;
- Entry.TableValue = LibCallImpl.getEnumVal();
+ Entry.TableValue = TableValue;
Entry.Hash = HashValue;
Found = true;
break;
@@ -399,7 +390,7 @@ constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
}
if (!Found)
- reportFatalInternalError("failure to hash " + ImplName);
+ reportFatalInternalError("failure to hash");
}
return Lookup;
@@ -409,15 +400,25 @@ constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
void RuntimeLibcallEmitter::emitNameMatchHashTable(
raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
std::vector<uint64_t> Hashes(RuntimeLibcallImplDefList.size());
+ std::vector<unsigned> TableValues(RuntimeLibcallImplDefList.size());
+ DenseSet<StringRef> SeenFuncNames;
size_t MaxFuncNameSize = 0;
size_t Index = 0;
+
for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
StringRef ImplName = LibCallImpl.getLibcallFuncName();
- MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
- Hashes[Index++] = hash(ImplName);
+ if (SeenFuncNames.insert(ImplName).second) {
+ MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
+ TableValues[Index] = LibCallImpl.getEnumVal();
+ Hashes[Index++] = hash(ImplName);
+ }
}
+ // Trim excess elements from non-unique entries.
+ Hashes.resize(SeenFuncNames.size());
+ TableValues.resize(SeenFuncNames.size());
+
LLVM_DEBUG({
for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
StringRef ImplName = LibCallImpl.getLibcallFuncName();
@@ -447,8 +448,9 @@ void RuntimeLibcallEmitter::emitNameMatchHashTable(
"#endif\n";
auto [Size, Collisions] = computePerfectHashParameters(Hashes);
- std::vector<LookupEntry> Lookup = constructPerfectHashTable(
- RuntimeLibcallImplDefList, Hashes, Size, Collisions, OffsetTable);
+ std::vector<LookupEntry> Lookup =
+ constructPerfectHashTable(RuntimeLibcallImplDefList, Hashes, TableValues,
+ Size, Collisions, OffsetTable);
LLVM_DEBUG(dbgs() << "Runtime libcall perfect hashing parameters: Size = "
<< Size << ", maximum collisions = " << Collisions << '\n');
@@ -464,10 +466,13 @@ void RuntimeLibcallEmitter::emitNameMatchHashTable(
OS << " static constexpr uint16_t HashTableNameToEnum[" << Lookup.size()
<< "] = {\n";
- for (auto [FuncName, Hash, TableVal] : Lookup) {
+ for (auto [Hash, TableVal] : Lookup) {
OS << " " << TableVal << ',';
- if (TableVal != 0)
+ if (TableVal != 0) {
+ StringRef FuncName =
+ RuntimeLibcallImplDefList[TableVal].getLibcallFuncName();
OS << " // " << format_hex(Hash, 16) << ", " << FuncName;
+ }
OS << '\n';
}
|
|
@llvm/pr-subscribers-tablegen Author: Matt Arsenault (arsenm) ChangesWe were sizing the table appropriately for the number of LibcallImpls, BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and As a secondary change, align the table size up to the next Full diff: https://github.com/llvm/llvm-project/pull/153801.diff 1 Files Affected:
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index 1c5f38d0c24b8..05f2512e24a50 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -289,7 +289,6 @@ class RuntimeLibcallEmitter {
/// Helper struct for the name hash table.
struct LookupEntry {
- StringRef FuncName;
uint64_t Hash = 0;
unsigned TableValue = 0;
};
@@ -339,14 +338,17 @@ static void emitHashFunction(raw_ostream &OS) {
/// Return the table size, maximum number of collisions for the set of hashes
static std::pair<int, int>
computePerfectHashParameters(ArrayRef<uint64_t> Hashes) {
- const int SizeOverhead = 10;
- const int NumHashes = Hashes.size();
+ // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
+ const int SizeOverhead = 4;
// Index derived from hash -> number of collisions.
DenseMap<uint64_t, int> Table;
+ unsigned NumHashes = Hashes.size();
+
for (int MaxCollisions = 1;; ++MaxCollisions) {
- for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+ for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead * NumHashes;
+ N <<= 1) {
Table.clear();
bool NeedResize = false;
@@ -367,22 +369,12 @@ computePerfectHashParameters(ArrayRef<uint64_t> Hashes) {
static std::vector<LookupEntry>
constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
- ArrayRef<uint64_t> Hashes, int Size, int Collisions,
- StringToOffsetTable &OffsetTable) {
- DenseSet<StringRef> Seen;
+ ArrayRef<uint64_t> Hashes,
+ ArrayRef<unsigned> TableValues, int Size,
+ int Collisions, StringToOffsetTable &OffsetTable) {
std::vector<LookupEntry> Lookup(Size * Collisions);
- for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
- StringRef ImplName = LibCallImpl.getLibcallFuncName();
-
- // We do not want to add repeated entries for cases with the same name, only
- // an entry for the first, with the name collision enum values immediately
- // following.
- if (!Seen.insert(ImplName).second)
- continue;
-
- uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
-
+ for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
uint64_t Idx = (HashValue % static_cast<uint64_t>(Size)) *
static_cast<uint64_t>(Collisions);
@@ -390,8 +382,7 @@ constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
for (int J = 0; J < Collisions; ++J) {
LookupEntry &Entry = Lookup[Idx + J];
if (Entry.TableValue == 0) {
- Entry.FuncName = ImplName;
- Entry.TableValue = LibCallImpl.getEnumVal();
+ Entry.TableValue = TableValue;
Entry.Hash = HashValue;
Found = true;
break;
@@ -399,7 +390,7 @@ constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
}
if (!Found)
- reportFatalInternalError("failure to hash " + ImplName);
+ reportFatalInternalError("failure to hash");
}
return Lookup;
@@ -409,15 +400,25 @@ constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
void RuntimeLibcallEmitter::emitNameMatchHashTable(
raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
std::vector<uint64_t> Hashes(RuntimeLibcallImplDefList.size());
+ std::vector<unsigned> TableValues(RuntimeLibcallImplDefList.size());
+ DenseSet<StringRef> SeenFuncNames;
size_t MaxFuncNameSize = 0;
size_t Index = 0;
+
for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
StringRef ImplName = LibCallImpl.getLibcallFuncName();
- MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
- Hashes[Index++] = hash(ImplName);
+ if (SeenFuncNames.insert(ImplName).second) {
+ MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
+ TableValues[Index] = LibCallImpl.getEnumVal();
+ Hashes[Index++] = hash(ImplName);
+ }
}
+ // Trim excess elements from non-unique entries.
+ Hashes.resize(SeenFuncNames.size());
+ TableValues.resize(SeenFuncNames.size());
+
LLVM_DEBUG({
for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
StringRef ImplName = LibCallImpl.getLibcallFuncName();
@@ -447,8 +448,9 @@ void RuntimeLibcallEmitter::emitNameMatchHashTable(
"#endif\n";
auto [Size, Collisions] = computePerfectHashParameters(Hashes);
- std::vector<LookupEntry> Lookup = constructPerfectHashTable(
- RuntimeLibcallImplDefList, Hashes, Size, Collisions, OffsetTable);
+ std::vector<LookupEntry> Lookup =
+ constructPerfectHashTable(RuntimeLibcallImplDefList, Hashes, TableValues,
+ Size, Collisions, OffsetTable);
LLVM_DEBUG(dbgs() << "Runtime libcall perfect hashing parameters: Size = "
<< Size << ", maximum collisions = " << Collisions << '\n');
@@ -464,10 +466,13 @@ void RuntimeLibcallEmitter::emitNameMatchHashTable(
OS << " static constexpr uint16_t HashTableNameToEnum[" << Lookup.size()
<< "] = {\n";
- for (auto [FuncName, Hash, TableVal] : Lookup) {
+ for (auto [Hash, TableVal] : Lookup) {
OS << " " << TableVal << ',';
- if (TableVal != 0)
+ if (TableVal != 0) {
+ StringRef FuncName =
+ RuntimeLibcallImplDefList[TableVal].getLibcallFuncName();
OS << " // " << format_hex(Hash, 16) << ", " << FuncName;
+ }
OS << '\n';
}
|
b6af40f to
f0c374c
Compare
f445a16 to
6e2b170
Compare
We were sizing the table appropriately for the number of LibcallImpls, but many of those have identical names which were pushing up the collision count unnecessarily. This ends up decreasing the table size slightly, and makes it a bit faster. BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and BM_LookupRuntimeLibcallByNameSampleData by ~5%. As a secondary change, align the table size up to the next power of 2. This makes the table larger than before, but improves the sample data benchmark by an additional 5%.
6e2b170 to
aaec1db
Compare
|
ping |
efriedma-quic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using a power-of-two hashtable size means you're really relying on a strong hash function; allowing non-power-of-two functions gives a lot more chances to cover up any imperfection. We might want to consider adding a diagnostic if MaxCollisions gets excessively high.
Otherwise LGTM
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/116/builds/17418 Here is the relevant piece of the build log for the reference |

We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.
BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.
As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.