Skip to content

Commit 874fe2b

Browse files
committed
Swift: introduce an in-memory file hash cache
File hashing is now done internally in `SwiftFileInterception` (and exported as a `getHashOfRealFile` function for future use in linkage awareness), and using a per-process in-memory cache. The persistent caching of paths is removed, so the solution is now robust against input file changes during the build. For the same reason, the hash to artifact mapping have the symlinks reversed now. The artifacts themselves are stored using the hash as filenames, and the original paths of the artifacts are reacreated in the scratch dir with symlinks mostly for debugging purposes (to understand what artifact each hash corresponds to, and to follow what was built by the extractor).
1 parent 7384122 commit 874fe2b

File tree

11 files changed

+75
-75
lines changed

11 files changed

+75
-75
lines changed

swift/extractor/infra/file/BUILD.bazel

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ swift_cc_library(
55
srcs = glob(["*.cpp"]),
66
hdrs = glob(["*.h"]) + [":path_hash_workaround"],
77
visibility = ["//swift:__subpackages__"],
8-
deps = ["@picosha2"],
98
)
109

1110
genrule(

swift/extractor/infra/file/FileHash.cpp

Lines changed: 0 additions & 32 deletions
This file was deleted.

swift/extractor/infra/file/FileHash.h

Lines changed: 0 additions & 11 deletions
This file was deleted.

swift/extractor/infra/file/Path.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ static bool shouldCanonicalize() {
1616
return true;
1717
}
1818

19-
std::filesystem::path resolvePath(std::string_view path) {
19+
std::filesystem::path resolvePath(const std::filesystem::path& path) {
2020
std::error_code ec;
2121
std::filesystem::path ret = {};
2222
static const auto canonicalize = shouldCanonicalize();
@@ -26,8 +26,8 @@ std::filesystem::path resolvePath(std::string_view path) {
2626
ret = std::filesystem::absolute(path, ec);
2727
}
2828
if (ec) {
29-
std::cerr << "Cannot get " << (canonicalize ? "canonical" : "absolute")
30-
<< " path: " << std::quoted(path) << ": " << ec.message() << "\n";
29+
std::cerr << "Cannot get " << (canonicalize ? "canonical" : "absolute") << " path: " << path
30+
<< ": " << ec.message() << "\n";
3131
return path;
3232
}
3333
return ret;

swift/extractor/infra/file/Path.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,13 @@
33
#include <filesystem>
44

55
namespace codeql {
6-
std::filesystem::path resolvePath(std::string_view path);
6+
std::filesystem::path resolvePath(const std::filesystem::path& path);
7+
8+
inline std::filesystem::path resolvePath(std::string_view path) {
9+
return resolvePath(std::filesystem::path{path});
10+
}
11+
12+
inline std::filesystem::path resolvePath(const std::string& path) {
13+
return resolvePath(std::filesystem::path{path});
714
}
15+
} // namespace codeql

swift/extractor/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ int main(int argc, char** argv) {
181181

182182
const auto configuration = configure(argc, argv);
183183

184-
auto openInterception = codeql::setupFileInterception(configuration.getTempArtifactDir());
184+
auto openInterception = codeql::setupFileInterception(configuration);
185185

186186
auto invocationTrapFile = invocationTargetFile(configuration);
187187
codeql::TrapDomain invocationDomain(invocationTrapFile);

swift/extractor/remapping/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ swift_cc_library(
66
hdrs = glob(["*.h"]),
77
visibility = ["//swift:__subpackages__"],
88
deps = [
9+
"//swift/extractor/config",
910
"//swift/extractor/infra/file",
1011
"//swift/third_party/swift-llvm-support",
12+
"@picosha2",
1113
],
1214
)

swift/extractor/remapping/SwiftFileInterception.cpp

Lines changed: 53 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,16 @@
44
#include <filesystem>
55

66
#include <dlfcn.h>
7+
#include <unistd.h>
78
#include <mutex>
89
#include <optional>
910
#include <cassert>
11+
#include <iostream>
1012

11-
#include "swift/extractor/infra/file/FileHash.h"
12-
#include "swift/extractor/infra/file/FileHash.h"
13+
#include <picosha2.h>
14+
15+
#include "swift/extractor/infra/file/PathHash.h"
16+
#include "swift/extractor/infra/file/Path.h"
1317

1418
#ifdef __APPLE__
1519
// path is hardcoded as otherwise redirection could break when setting DYLD_FALLBACK_LIBRARY_PATH
@@ -64,6 +68,28 @@ bool mayBeRedirected(const char* path, int flags = O_RDONLY) {
6468
endsWith(path, ".swiftmodule"));
6569
}
6670

71+
std::optional<std::string> hashFile(const fs::path& path) {
72+
auto fd = original::open(path.c_str(), O_RDONLY | O_CLOEXEC);
73+
if (fd < 0) {
74+
auto ec = std::make_error_code(static_cast<std::errc>(errno));
75+
std::cerr << "unable to open " << path << " for reading (" << ec.message() << ")\n";
76+
return std::nullopt;
77+
}
78+
auto hasher = picosha2::hash256_one_by_one();
79+
constexpr size_t bufferSize = 16 * 1024;
80+
char buffer[bufferSize];
81+
ssize_t bytesRead = 0;
82+
while ((bytesRead = ::read(fd, buffer, bufferSize)) > 0) {
83+
hasher.process(buffer, buffer + bytesRead);
84+
}
85+
::close(fd);
86+
if (bytesRead < 0) {
87+
return std::nullopt;
88+
}
89+
hasher.finish();
90+
return get_hash_hex_string(hasher);
91+
}
92+
6793
} // namespace
6894

6995
namespace codeql {
@@ -72,22 +98,15 @@ class FileInterceptor {
7298
public:
7399
FileInterceptor(fs::path&& workingDir) : workingDir{std::move(workingDir)} {
74100
fs::create_directories(hashesPath());
75-
fs::create_directories(storePath());
76101
}
77102

78103
int open(const char* path, int flags, mode_t mode = 0) const {
79104
fs::path fsPath{path};
80105
assert((flags & O_ACCMODE) == O_RDONLY);
106+
// try to use the hash map first
81107
errno = 0;
82-
// first, try the same path underneath the artifact store
83-
if (auto ret = original::open(redirectedPath(path).c_str(), flags);
84-
ret >= 0 || errno != ENOENT) {
85-
return ret;
86-
}
87-
errno = 0;
88-
// then try to use the hash map
89108
if (auto hashed = hashPath(path)) {
90-
if (auto ret = original::open(hashed->c_str(), flags); ret >= 0 || errno != ENOENT) {
109+
if (auto ret = original::open(hashed->c_str(), flags); errno != ENOENT) {
91110
return ret;
92111
}
93112
}
@@ -96,17 +115,18 @@ class FileInterceptor {
96115

97116
fs::path redirect(const fs::path& target) const {
98117
assert(mayBeRedirected(target.c_str()));
99-
auto ret = redirectedPath(target);
100-
fs::create_directories(ret.parent_path());
118+
auto redirected = redirectedPath(target);
119+
fs::create_directories(redirected.parent_path());
101120
if (auto hashed = hashPath(target)) {
102121
std::error_code ec;
103-
fs::create_symlink(ret, *hashed, ec);
122+
fs::create_symlink(*hashed, redirected, ec);
104123
if (ec) {
105-
std::cerr << "Cannot remap file " << ret << " -> " << *hashed << ": " << ec.message()
124+
std::cerr << "Cannot remap file " << *hashed << " -> " << redirected << ": " << ec.message()
106125
<< "\n";
107126
}
127+
return *hashed;
108128
}
109-
return ret;
129+
return redirected;
110130
}
111131

112132
private:
@@ -119,17 +139,27 @@ class FileInterceptor {
119139
}
120140

121141
std::optional<fs::path> hashPath(const fs::path& target) const {
122-
if (auto fd = original::open(target.c_str(), O_RDONLY | O_CLOEXEC); fd >= 0) {
123-
return hashesPath() / hashFile(fd);
142+
if (auto hashed = getHashOfRealFile(target)) {
143+
return hashesPath() / *hashed;
124144
}
125145
return std::nullopt;
126146
}
127147

128148
fs::path workingDir;
129149
};
130150

131-
int openReal(const fs::path& path) {
132-
return original::open(path.c_str(), O_RDONLY | O_CLOEXEC);
151+
std::optional<std::string> getHashOfRealFile(const fs::path& path) {
152+
static std::unordered_map<fs::path, std::string> cache;
153+
auto resolved = resolvePath(path);
154+
if (auto found = cache.find(resolved); found != cache.end()) {
155+
return found->second;
156+
}
157+
158+
if (auto hashed = hashFile(resolved)) {
159+
cache.emplace(resolved, *hashed);
160+
return hashed;
161+
}
162+
return std::nullopt;
133163
}
134164

135165
fs::path redirect(const fs::path& target) {
@@ -140,8 +170,9 @@ fs::path redirect(const fs::path& target) {
140170
}
141171
}
142172

143-
std::shared_ptr<FileInterceptor> setupFileInterception(fs::path workginDir) {
144-
auto ret = std::make_shared<FileInterceptor>(std::move(workginDir));
173+
std::shared_ptr<FileInterceptor> setupFileInterception(
174+
const SwiftExtractorConfiguration& configuration) {
175+
auto ret = std::make_shared<FileInterceptor>(configuration.getTempArtifactDir());
145176
fileInterceptorInstance() = ret;
146177
return ret;
147178
}

swift/extractor/remapping/SwiftFileInterception.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,16 @@
55
#include <filesystem>
66
#include <memory>
77

8-
#include "swift/extractor/infra/file/PathHash.h"
8+
#include "swift/extractor/config/SwiftExtractorConfiguration.h"
99

1010
namespace codeql {
1111

12-
int openReal(const std::filesystem::path& path);
12+
std::optional<std::string> getHashOfRealFile(const std::filesystem::path& path);
1313

1414
class FileInterceptor;
1515

16-
std::shared_ptr<FileInterceptor> setupFileInterception(std::filesystem::path workingDir);
16+
std::shared_ptr<FileInterceptor> setupFileInterception(
17+
const SwiftExtractorConfiguration& configuration);
1718

1819
std::filesystem::path redirect(const std::filesystem::path& target);
1920
} // namespace codeql

swift/integration-tests/posix-only/frontend-invocations/build.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ else
88
FRONTEND="swift-frontend"
99
fi
1010

11+
rm -rf *.swiftmodule *.o
12+
1113
$FRONTEND -frontend -c A.swift $SDK
1214
$FRONTEND -frontend -c B.swift -o B.o $SDK
1315
$FRONTEND -frontend -c -primary-file C.swift $SDK

0 commit comments

Comments
 (0)