Skip to content

Commit 674d8a9

Browse files
committed
[clangd] Use xxhash instead of SHA1 for background index file digests.
Summary: Currently SHA1 is about 10% of our CPU, this patch reduces it to ~1%. xxhash is a well-defined (stable) non-cryptographic hash optimized for fast checksums (like crc32). Collisions shouldn't be a problem, despite the reduced length: - for actual file content (used to invalidate bg index shards), there are only two versions that can collide (new shard and old shard). - for file paths in bg index shard filenames, we would need 2^32 files with the same filename to expect a collision. Imperfect hashing may reduce this a bit but it's well beyond what's plausible. This will invalidate shards on disk (as usual; I bumped the version), but this time the filenames are changing so the old files will stick around :-( So this is more expensive than the usual bump, but would be good to land before the v9 branch when everyone will start using bg index. Reviewers: kadircet Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64306 llvm-svn: 365311
1 parent 556ec99 commit 674d8a9

File tree

7 files changed

+11
-17
lines changed

7 files changed

+11
-17
lines changed

clang-tools-extra/clangd/SourceCode.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/Support/Error.h"
2626
#include "llvm/Support/ErrorHandling.h"
2727
#include "llvm/Support/Path.h"
28+
#include "llvm/Support/xxhash.h"
2829
#include <algorithm>
2930

3031
namespace clang {
@@ -376,7 +377,13 @@ bool isRangeConsecutive(const Range &Left, const Range &Right) {
376377
}
377378

378379
FileDigest digest(llvm::StringRef Content) {
379-
return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
380+
uint64_t Hash{llvm::xxHash64(Content)};
381+
FileDigest Result;
382+
for (unsigned I = 0; I < Result.size(); ++I) {
383+
Result[I] = uint8_t(Hash);
384+
Hash >>= 8;
385+
}
386+
return Result;
380387
}
381388

382389
llvm::Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {

clang-tools-extra/clangd/SourceCode.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
#include "clang/Tooling/Core/Replacement.h"
2323
#include "llvm/ADT/StringSet.h"
2424
#include "llvm/ADT/StringRef.h"
25-
#include "llvm/Support/SHA1.h"
2625

2726
namespace clang {
2827
class SourceManager;
@@ -32,7 +31,7 @@ namespace clangd {
3231
// We tend to generate digests for source codes in a lot of different places.
3332
// This represents the type for those digests to prevent us hard coding details
3433
// of hashing function at every place that needs to store this information.
35-
using FileDigest = decltype(llvm::SHA1::hash({}));
34+
using FileDigest = std::array<uint8_t, 8>;
3635
FileDigest digest(StringRef Content);
3736
Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID);
3837

clang-tools-extra/clangd/index/Background.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
#include "llvm/ADT/StringRef.h"
3333
#include "llvm/ADT/StringSet.h"
3434
#include "llvm/Support/Error.h"
35-
#include "llvm/Support/SHA1.h"
3635
#include "llvm/Support/Threading.h"
3736

3837
#include <atomic>

clang-tools-extra/clangd/index/Background.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "index/Serialization.h"
2020
#include "clang/Tooling/CompilationDatabase.h"
2121
#include "llvm/ADT/StringMap.h"
22-
#include "llvm/Support/SHA1.h"
2322
#include "llvm/Support/Threading.h"
2423
#include <atomic>
2524
#include <condition_variable>

clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,11 @@
1313
#include "llvm/Support/FileSystem.h"
1414
#include "llvm/Support/MemoryBuffer.h"
1515
#include "llvm/Support/Path.h"
16-
#include "llvm/Support/SHA1.h"
1716

1817
namespace clang {
1918
namespace clangd {
2019
namespace {
2120

22-
using FileDigest = decltype(llvm::SHA1::hash({}));
23-
24-
static FileDigest digest(StringRef Content) {
25-
return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
26-
}
27-
2821
std::string getShardPathFromFilePath(llvm::StringRef ShardRoot,
2922
llvm::StringRef FilePath) {
3023
llvm::SmallString<128> ShardRootSS(ShardRoot);

clang-tools-extra/clangd/index/Serialization.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) {
444444
// The current versioning scheme is simple - non-current versions are rejected.
445445
// If you make a breaking change, bump this version number to invalidate stored
446446
// data. Later we may want to support some backward compatibility.
447-
constexpr static uint32_t Version = 11;
447+
constexpr static uint32_t Version = 12;
448448

449449
llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) {
450450
auto RIFF = riff::readFile(Data);

clang-tools-extra/clangd/unittests/SerializationTests.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
#include "index/Index.h"
1111
#include "index/Serialization.h"
1212
#include "clang/Tooling/CompilationDatabase.h"
13-
#include "llvm/Support/SHA1.h"
1413
#include "llvm/Support/ScopedPrinter.h"
1514
#include "gmock/gmock.h"
1615
#include "gtest/gtest.h"
@@ -208,9 +207,7 @@ TEST(SerializationTest, SrcsTest) {
208207

209208
std::string TestContent("TestContent");
210209
IncludeGraphNode IGN;
211-
IGN.Digest =
212-
llvm::SHA1::hash({reinterpret_cast<const uint8_t *>(TestContent.data()),
213-
TestContent.size()});
210+
IGN.Digest = digest(TestContent);
214211
IGN.DirectIncludes = {"inc1", "inc2"};
215212
IGN.URI = "URI";
216213
IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU;

0 commit comments

Comments
 (0)