Skip to content

Commit 521e623

Browse files
committed
Swift: use std::filesystem and picoSHA2
This replaces usages of `llvm::fs` and string manipulation with `std::filesystem`, also replacing `std::string` with `std::filesystem::path` where it made sense. Moreover MD5 hashing used in macOS file remapping was replaced by SHA256 hashing using a small header-only SHA256 C++ library with an MIT license, https://github.com/okdshin/PicoSHA2. File contents hashing was relocated to the newly created `file` library for later planned reuse.
1 parent e422a4e commit 521e623

23 files changed

+254
-178
lines changed

misc/bazel/workspace.bzl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@ def codeql_workspace(repository_name = "codeql"):
3434
sha256 = "9f2cdee6dcc2039d4c47d25ab5141fe0678ce6ed27ef482cab17fe9fa38a30ce",
3535
)
3636

37+
http_archive(
38+
name = "picosha2",
39+
url = "https://github.com/okdshin/PicoSHA2/archive/27fcf6979298949e8a462e16d09a0351c18fcaf2.zip",
40+
strip_prefix = "PicoSHA2-27fcf6979298949e8a462e16d09a0351c18fcaf2",
41+
build_file = "@%s//swift/third_party/picosha2:BUILD.picosha2.bazel" % repository_name,
42+
sha256 = "d6647ca45a8b7bdaf027ecb68d041b22a899a0218b7206dee755c558a2725abb",
43+
)
44+
3745
maybe(
3846
repo_rule = http_archive,
3947
name = "rules_pkg",

swift/extractor/SwiftExtractor.cpp

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,39 +15,42 @@
1515

1616
using namespace codeql;
1717
using namespace std::string_literals;
18+
namespace fs = std::filesystem;
1819

19-
static void archiveFile(const SwiftExtractorConfiguration& config, swift::SourceFile& file) {
20-
if (std::error_code ec = llvm::sys::fs::create_directories(config.trapDir)) {
21-
std::cerr << "Cannot create TRAP directory: " << ec.message() << "\n";
22-
return;
23-
}
20+
static fs::path toPath(llvm::StringRef s) {
21+
return {static_cast<std::string_view>(s)};
22+
}
2423

25-
if (std::error_code ec = llvm::sys::fs::create_directories(config.sourceArchiveDir)) {
26-
std::cerr << "Cannot create source archive directory: " << ec.message() << "\n";
27-
return;
24+
static void ensureDirectory(const char* label, const fs::path& dir) {
25+
std::error_code ec;
26+
fs::create_directories(dir, ec);
27+
if (ec) {
28+
std::cerr << "Cannot create " << label << " directory: " << ec.message() << "\n";
29+
std::abort();
2830
}
31+
}
2932

30-
llvm::SmallString<PATH_MAX> srcFilePath(file.getFilename());
31-
llvm::sys::fs::make_absolute(srcFilePath);
33+
static void archiveFile(const SwiftExtractorConfiguration& config, swift::SourceFile& file) {
34+
ensureDirectory("TRAP", config.trapDir);
35+
ensureDirectory("source archive", config.sourceArchiveDir);
3236

33-
llvm::SmallString<PATH_MAX> dstFilePath(config.sourceArchiveDir);
34-
llvm::sys::path::append(dstFilePath, srcFilePath);
37+
fs::path srcFilePath = fs::absolute(toPath(file.getFilename()));
38+
auto dstFilePath = config.sourceArchiveDir;
39+
dstFilePath += srcFilePath;
3540

36-
llvm::StringRef parent = llvm::sys::path::parent_path(dstFilePath);
37-
if (std::error_code ec = llvm::sys::fs::create_directories(parent)) {
38-
std::cerr << "Cannot create source archive destination directory '" << parent.str()
39-
<< "': " << ec.message() << "\n";
40-
return;
41-
}
41+
ensureDirectory("source archive destination", dstFilePath.parent_path());
42+
43+
std::error_code ec;
44+
fs::copy(srcFilePath, dstFilePath, ec);
4245

43-
if (std::error_code ec = llvm::sys::fs::copy_file(srcFilePath, dstFilePath)) {
44-
std::cerr << "Cannot archive source file '" << srcFilePath.str().str() << "' -> '"
45-
<< dstFilePath.str().str() << "': " << ec.message() << "\n";
46+
if (ec) {
47+
std::cerr << "Cannot archive source file " << srcFilePath << " -> " << dstFilePath
48+
<< "': " << ec.message() << "\n";
4649
return;
4750
}
4851
}
4952

50-
static std::string getFilename(swift::ModuleDecl& module, swift::SourceFile* primaryFile) {
53+
static fs::path getFilename(swift::ModuleDecl& module, swift::SourceFile* primaryFile) {
5154
if (primaryFile) {
5255
return primaryFile->getFilename().str();
5356
}
@@ -57,7 +60,8 @@ static std::string getFilename(swift::ModuleDecl& module, swift::SourceFile* pri
5760
// In this case we want to differentiate them
5861
// Moreover, pcm files may come from caches located in different directories, but are
5962
// unambiguously identified by the base file name, so we can discard the absolute directory
60-
std::string filename = "/pcms/"s + llvm::sys::path::filename(module.getModuleFilename()).str();
63+
fs::path filename = "/pcms";
64+
filename /= toPath(module.getModuleFilename()).filename();
6165
filename += "-";
6266
filename += module.getName().str();
6367
return filename;
@@ -66,7 +70,7 @@ static std::string getFilename(swift::ModuleDecl& module, swift::SourceFile* pri
6670
// The Builtin module has an empty filename, let's fix that
6771
return "/__Builtin__";
6872
}
69-
auto filename = module.getModuleFilename().str();
73+
auto filename = toPath(module.getModuleFilename());
7074
// there is a special case of a module without an actual filename reporting `<imports>`: in this
7175
// case we want to avoid the `<>` characters, in case a dirty DB is imported on Windows
7276
if (filename == "<imports>") {

swift/extractor/SwiftExtractorConfiguration.h

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,19 @@
22

33
#include <string>
44
#include <vector>
5+
#include <filesystem>
56

67
#include "swift/extractor/infra/file/TargetFile.h"
78

89
namespace codeql {
910
struct SwiftExtractorConfiguration {
1011
// The location for storing TRAP files to be imported by CodeQL engine.
11-
std::string trapDir;
12+
std::filesystem::path trapDir;
1213
// The location for storing extracted source files.
13-
std::string sourceArchiveDir;
14+
std::filesystem::path sourceArchiveDir;
1415
// A temporary directory that exists during database creation, but is deleted once the DB is
1516
// finalized.
16-
std::string scratchDir;
17+
std::filesystem::path scratchDir;
1718

1819
// The original arguments passed to the extractor. Used for debugging.
1920
std::vector<std::string> frontendOptions;
@@ -22,18 +23,20 @@ struct SwiftExtractorConfiguration {
2223

2324
// A temporary directory that contains TRAP files before they are moved into their final
2425
// destination.
25-
std::string getTempTrapDir() const { return scratchDir + "/swift-trap-temp"; }
26+
std::filesystem::path getTempTrapDir() const { return scratchDir / "swift-trap-temp"; }
2627

2728
// VFS (virtual file system) support.
2829
// A temporary directory that contains VFS files used during extraction.
29-
std::string getVFSDir() const { return scratchDir + "/swift-vfs"; }
30+
std::filesystem::path getVFSDir() const { return scratchDir / "swift-vfs"; }
3031

3132
// A temporary directory that contains temp VFS files before they moved into VFSDir.
32-
std::string getTempVFSDir() const { return scratchDir + "/swift-vfs-temp"; }
33+
std::filesystem::path getTempVFSDir() const { return scratchDir / "swift-vfs-temp"; }
3334

3435
// A temporary directory that contains build artifacts generated by the extractor during the
3536
// overall extraction process.
36-
std::string getTempArtifactDir() const { return scratchDir + "/swift-extraction-artifacts"; }
37+
std::filesystem::path getTempArtifactDir() const {
38+
return scratchDir / "swift-extraction-artifacts";
39+
}
3740
};
3841

3942
} // namespace codeql

swift/extractor/TargetTrapFile.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
#include <iomanip>
33
namespace codeql {
44
std::optional<TargetFile> createTargetTrapFile(const SwiftExtractorConfiguration& configuration,
5-
std::string_view target) {
6-
std::string trap{target};
5+
const std::filesystem::path& target) {
6+
auto trap = target;
77
trap += ".trap";
88
auto ret = TargetFile::create(trap, configuration.trapDir, configuration.getTempTrapDir());
99
if (ret) {

swift/extractor/TargetTrapFile.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@
66
namespace codeql {
77

88
std::optional<TargetFile> createTargetTrapFile(const SwiftExtractorConfiguration& configuration,
9-
std::string_view target);
9+
const std::filesystem::path& target);
1010

1111
} // namespace codeql

swift/extractor/infra/FilePath.h

Lines changed: 0 additions & 24 deletions
This file was deleted.

swift/extractor/infra/SwiftDispatcher.h

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
#pragma once
22

3+
#include <filesystem>
4+
35
#include <swift/AST/SourceFile.h>
46
#include <swift/Basic/SourceManager.h>
5-
#include <llvm/Support/FileSystem.h>
67
#include <swift/Parse/Token.h>
78

89
#include "swift/extractor/trap/TrapLabelStore.h"
910
#include "swift/extractor/trap/TrapDomain.h"
1011
#include "swift/extractor/infra/SwiftTagTraits.h"
1112
#include "swift/extractor/trap/generated/TrapClasses.h"
12-
#include "swift/extractor/infra/FilePath.h"
13+
#include "swift/extractor/infra/file/PathHash.h"
1314

1415
namespace codeql {
1516

@@ -29,7 +30,7 @@ class SwiftDispatcher {
2930
const swift::Pattern*,
3031
const swift::TypeRepr*,
3132
const swift::TypeBase*,
32-
FilePath>;
33+
std::filesystem::path>;
3334

3435
template <typename E>
3536
static constexpr bool IsStorable = std::is_constructible_v<Store::Handle, const E&>;
@@ -306,16 +307,17 @@ class SwiftDispatcher {
306307
return false;
307308
}
308309

309-
static FilePath getFilePath(llvm::StringRef path) {
310+
static std::filesystem::path getFilePath(std::string_view path) {
310311
// TODO: this needs more testing
311312
// TODO: check canonicalization of names on a case insensitive filesystems
312313
// TODO: make symlink resolution conditional on CODEQL_PRESERVE_SYMLINKS=true
313-
llvm::SmallString<PATH_MAX> realPath;
314-
if (std::error_code ec = llvm::sys::fs::real_path(path, realPath)) {
315-
std::cerr << "Cannot get real path: '" << path.str() << "': " << ec.message() << "\n";
314+
std::error_code ec;
315+
auto ret = std::filesystem::canonical(path, ec);
316+
if (ec) {
317+
std::cerr << "Cannot get real path: " << std::quoted(path) << ": " << ec.message() << "\n";
316318
return {};
317319
}
318-
return realPath.str().str();
320+
return ret;
319321
}
320322

321323
// TODO: for const correctness these should consistently be `const` (and maybe const references
@@ -331,9 +333,9 @@ class SwiftDispatcher {
331333
virtual void visit(swift::TypeRepr* typeRepr, swift::Type type) = 0;
332334
virtual void visit(swift::TypeBase* type) = 0;
333335

334-
void visit(const FilePath& file) {
335-
auto entry = createEntry(file, file.path);
336-
entry.name = file.path;
336+
void visit(const std::filesystem::path& file) {
337+
auto entry = createEntry(file, file.string());
338+
entry.name = file.string();
337339
emit(entry);
338340
}
339341

swift/extractor/infra/SwiftTagTraits.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
// This file implements the mapping needed by the API defined in the TrapTagTraits.h, so that
44
// TrapTagOf/TrapLabelOf provide the tags/labels for specific swift entity types.
5+
#include <filesystem>
56
#include <swift/AST/ASTVisitor.h>
67
#include "swift/extractor/trap/TrapTagTraits.h"
78
#include "swift/extractor/trap/generated/TrapTags.h"
8-
#include "swift/extractor/infra/FilePath.h"
99

1010
namespace codeql {
1111

@@ -68,7 +68,7 @@ MAP_TYPE_TO_TAG(swift::TypeBase, TypeTag);
6868
OVERRIDE_TAG(FuncDecl, ConcreteFuncDeclTag);
6969
OVERRIDE_TAG(VarDecl, ConcreteVarDeclTag);
7070

71-
MAP_TYPE_TO_TAG(FilePath, DbFileTag);
71+
MAP_TYPE_TO_TAG(std::filesystem::path, DbFileTag);
7272

7373
#undef MAP_TAG
7474
#undef MAP_SUBTAG

swift/extractor/infra/file/BUILD.bazel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ swift_cc_library(
55
srcs = glob(["*.cpp"]),
66
hdrs = glob(["*.h"]),
77
visibility = ["//swift:__subpackages__"],
8-
deps = ["//swift/third_party/swift-llvm-support"],
8+
deps = ["@picosha2"],
99
)
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#include "swift/extractor/infra/file/FileHash.h"
2+
#include <iostream>
3+
#include <fstream>
4+
#include <unistd.h>
5+
#include <fcntl.h>
6+
#include <picosha2.h>
7+
8+
namespace codeql {
9+
std::string hashFile(const std::filesystem::path& file) {
10+
// using `open` instead of `std::ifstream` to reuse `hashFile(int)` below
11+
if (auto fd = ::open(file.c_str(), O_RDONLY); fd >= 0) {
12+
return hashFile(fd);
13+
}
14+
return "";
15+
}
16+
17+
std::string hashFile(int fd) {
18+
auto hasher = picosha2::hash256_one_by_one();
19+
constexpr size_t bufferSize = 16 * 1024;
20+
char buffer[bufferSize];
21+
ssize_t bytesRead = 0;
22+
while ((bytesRead = ::read(fd, buffer, bufferSize)) > 0) {
23+
hasher.process(buffer, buffer + bytesRead);
24+
}
25+
::close(fd);
26+
if (bytesRead < 0) {
27+
return "";
28+
}
29+
hasher.finish();
30+
return get_hash_hex_string(hasher);
31+
}
32+
} // namespace codeql

0 commit comments

Comments
 (0)