Skip to content

Commit c4661b1

Browse files
refactor: Introduce separate CommandObject type (#378)
1 parent 4d53c18 commit c4661b1

16 files changed

+141
-144
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ See the [Usage](#usage) section for step-by-step instructions.
8888

8989
## System Requirements
9090

91-
1. About 2MB of temporary space per compilation database entry.
91+
1. About 2MB of temporary space for every TU in the compilation database.
9292
```bash
9393
echo "$(perl -e "print $(jq 'length' build/compile_commands.json) / 512.0") GB"
9494
```

indexer/CompilationDatabase.cc

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,21 @@ ToolchainPathsResult static determineToolchainPaths(
157157

158158
namespace scip_clang {
159159
namespace compdb {
160+
161+
llvm::json::Value toJSON(const CommandObject &cmd) {
162+
return llvm::json::Object{{"directory", cmd.workingDirectory},
163+
{"file", cmd.filePath},
164+
{"arguments", cmd.arguments}};
165+
}
166+
167+
bool fromJSON(const llvm::json::Value &jsonValue, CommandObject &cmd,
168+
llvm::json::Path path) {
169+
llvm::json::ObjectMapper mapper(jsonValue, path);
170+
return mapper && mapper.map("directory", cmd.workingDirectory)
171+
&& mapper.map("file", cmd.filePath)
172+
&& mapper.map("arguments", cmd.arguments);
173+
}
174+
160175
namespace {
161176

162177
// Handler to validate a compilation database in a streaming fashion.
@@ -287,7 +302,7 @@ class ValidateHandler
287302
if (this->options.checkDirectoryPathsAreAbsolute
288303
&& this->lastKey == Key::Directory) {
289304
auto dirPath = std::string_view(str, length);
290-
// NOTE(ref: directory-field-is-absolute): While the JSON compilation
305+
// NOTE(def: directory-field-is-absolute): While the JSON compilation
291306
// database schema
292307
// (https://clang.llvm.org/docs/JSONCompilationDatabase.html) does not
293308
// specify if the "directory" key should be an absolute path or not, if
@@ -456,21 +471,20 @@ bool CommandObjectHandler::String(const char *str, rapidjson::SizeType length,
456471
ENFORCE(false, "unexpected input");
457472
return false;
458473
case Key::Directory:
459-
this->wipCommand.Directory = std::string(str, length);
474+
this->wipCommand.workingDirectory = std::string(str, length);
460475
break;
461476
case Key::File:
462-
this->wipCommand.Filename = std::string(str, length);
477+
this->wipCommand.filePath = std::string(str, length);
463478
break;
464479
case Key::Command:
465-
this->wipCommand.CommandLine = scip_clang::unescapeCommandLine(
480+
this->wipCommand.arguments = scip_clang::unescapeCommandLine(
466481
clang::tooling::JSONCommandLineSyntax::AutoDetect,
467482
std::string_view(str, length));
468483
break;
469484
case Key::Arguments: // Validator makes sure we have an array outside.
470-
this->wipCommand.CommandLine.emplace_back(str, length);
485+
this->wipCommand.arguments.emplace_back(str, length);
471486
break;
472-
case Key::Output:
473-
this->wipCommand.Output = std::string(str, length);
487+
case Key::Output: // Do nothing
474488
break;
475489
}
476490
return true;
@@ -506,11 +520,10 @@ bool CommandObjectHandler::reachedLimit() const {
506520
return this->commands.size() == this->parseLimit;
507521
}
508522

509-
CompilationDatabaseFile
510-
CompilationDatabaseFile::open(const StdPath &path,
511-
ValidationOptions validationOptions,
512-
std::error_code &fileSizeError) {
513-
CompilationDatabaseFile compdbFile{};
523+
compdb::File compdb::File::open(const StdPath &path,
524+
ValidationOptions validationOptions,
525+
std::error_code &fileSizeError) {
526+
compdb::File compdbFile{};
514527
compdbFile.file = std::fopen(path.c_str(), "rb");
515528
if (!compdbFile.file) {
516529
return compdbFile;
@@ -525,11 +538,11 @@ CompilationDatabaseFile::open(const StdPath &path,
525538
return compdbFile;
526539
}
527540

528-
CompilationDatabaseFile CompilationDatabaseFile::openAndExitOnErrors(
529-
const StdPath &path, ValidationOptions validationOptions) {
541+
compdb::File
542+
compdb::File::openAndExitOnErrors(const StdPath &path,
543+
ValidationOptions validationOptions) {
530544
std::error_code fileSizeError;
531-
auto compdbFile =
532-
CompilationDatabaseFile::open(path, validationOptions, fileSizeError);
545+
auto compdbFile = compdb::File::open(path, validationOptions, fileSizeError);
533546
if (!compdbFile.file) {
534547
spdlog::error("failed to open '{}': {}", path.string(),
535548
std::strerror(errno));
@@ -548,8 +561,8 @@ CompilationDatabaseFile CompilationDatabaseFile::openAndExitOnErrors(
548561
return compdbFile;
549562
}
550563

551-
void ResumableParser::initialize(CompilationDatabaseFile compdb,
552-
size_t refillCount, bool inferResourceDir) {
564+
void ResumableParser::initialize(compdb::File compdb, size_t refillCount,
565+
bool inferResourceDir) {
553566
auto averageJobSize = compdb.sizeInBytes() / compdb.commandCount();
554567
// Some customers have averageJobSize = 150KiB.
555568
// If numWorkers == 300 (very high core count machine),
@@ -567,8 +580,8 @@ void ResumableParser::initialize(CompilationDatabaseFile compdb,
567580
this->inferResourceDir = inferResourceDir;
568581
}
569582

570-
void ResumableParser::parseMore(
571-
std::vector<clang::tooling::CompileCommand> &out, bool checkFilesExist) {
583+
void ResumableParser::parseMore(std::vector<compdb::CommandObject> &out,
584+
bool checkFilesExist) {
572585
if (this->reader.IterativeParseComplete()) {
573586
if (this->reader.HasParseError()) {
574587
spdlog::error(
@@ -615,7 +628,8 @@ void ResumableParser::parseMore(
615628
}
616629
std::string pathBuffer;
617630
for (auto &cmd : this->handler->commands) {
618-
if (checkFilesExist && !doesFileExist(cmd.Filename, cmd.Directory)) {
631+
if (checkFilesExist
632+
&& !doesFileExist(cmd.filePath, cmd.workingDirectory)) {
619633
continue;
620634
}
621635
out.emplace_back(std::move(cmd));
@@ -625,10 +639,10 @@ void ResumableParser::parseMore(
625639

626640
if (this->inferResourceDir) {
627641
for (auto &cmd : out) {
628-
if (cmd.CommandLine.empty()) {
642+
if (cmd.arguments.empty()) {
629643
continue;
630644
}
631-
this->tryInferResourceDir(cmd.Directory, cmd.CommandLine);
645+
this->tryInferResourceDir(cmd.workingDirectory, cmd.arguments);
632646
}
633647
}
634648
}

indexer/CompilationDatabase.h

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,28 @@
1515
#include "spdlog/fmt/fmt.h"
1616
#include "spdlog/spdlog.h"
1717

18-
#include "clang/Tooling/CompilationDatabase.h"
19-
18+
#include "indexer/Derive.h"
2019
#include "indexer/FileSystem.h"
2120

21+
namespace clang::tooling {
22+
struct CompileCommand;
23+
} // namespace clang::tooling
24+
2225
namespace scip_clang {
2326
namespace compdb {
2427

2528
struct ValidationOptions {
2629
bool checkDirectoryPathsAreAbsolute;
2730
};
2831

29-
class CompilationDatabaseFile {
32+
class File {
3033
size_t _sizeInBytes;
3134
size_t _commandCount;
3235

3336
public:
3437
FILE *file;
3538

36-
static CompilationDatabaseFile openAndExitOnErrors(const StdPath &,
37-
ValidationOptions);
39+
static File openAndExitOnErrors(const StdPath &, ValidationOptions);
3840

3941
size_t sizeInBytes() const {
4042
return this->_sizeInBytes;
@@ -44,8 +46,8 @@ class CompilationDatabaseFile {
4446
}
4547

4648
private:
47-
static CompilationDatabaseFile open(const StdPath &, ValidationOptions,
48-
std::error_code &fileSizeError);
49+
static File open(const StdPath &, ValidationOptions,
50+
std::error_code &fileSizeError);
4951
};
5052

5153
// Key to identify fields in a command object
@@ -58,16 +60,30 @@ enum class Key : uint32_t {
5860
Output = 1 << 5,
5961
};
6062

63+
/// The 'command object' terminology is taken from the official Clang docs.
64+
/// https://clang.llvm.org/docs/JSONCompilationDatabase.html
65+
struct CommandObject {
66+
/// Strictly speaking, this should be an absolute directory in an actual
67+
/// compilation database (see NOTE(ref: directory-field-is-absolute)),
68+
/// but we use a std::string instead as it may be a relative path for
69+
/// test cases.
70+
std::string workingDirectory;
71+
// May be relative or absolute
72+
std::string filePath;
73+
std::vector<std::string> arguments;
74+
};
75+
SERIALIZABLE(CommandObject)
76+
6177
// Handler for extracting command objects from compilation database.
6278
class CommandObjectHandler
6379
: public rapidjson::BaseReaderHandler<rapidjson::UTF8<>,
6480
CommandObjectHandler> {
6581
compdb::Key previousKey;
66-
clang::tooling::CompileCommand wipCommand;
82+
compdb::CommandObject wipCommand;
6783
size_t parseLimit;
6884

6985
public:
70-
std::vector<clang::tooling::CompileCommand> commands;
86+
std::vector<compdb::CommandObject> commands;
7187

7288
CommandObjectHandler(size_t parseLimit)
7389
: previousKey(Key::Unset), wipCommand(), parseLimit(parseLimit),
@@ -97,7 +113,7 @@ class ResumableParser {
97113
absl::flat_hash_set<std::string> emittedErrors;
98114

99115
/// Mapping from compiler/wrapper path to extra information needed
100-
/// to tweak the compilation database entry before invoking the driver.
116+
/// to tweak the command object before invoking the driver.
101117
///
102118
/// For example, Bazel uses a compiler wrapper, but scip-clang needs
103119
/// to use the full path to the compiler driver when running semantic
@@ -113,13 +129,12 @@ class ResumableParser {
113129
/// If \param inferResourceDir is set, then the parser will automatically
114130
/// add extra '-resource-dir' '<path>' arguments to the parsed
115131
/// CompileCommands' CommandLine field.
116-
void initialize(CompilationDatabaseFile compdb, size_t refillCount,
132+
void initialize(compdb::File compdb, size_t refillCount,
117133
bool inferResourceDir);
118134

119135
// Parses at most refillCount elements (passed during initialization)
120136
// from the compilation database passed during initialization.
121-
void parseMore(std::vector<clang::tooling::CompileCommand> &out,
122-
bool checkFilesExist = true);
137+
void parseMore(std::vector<CommandObject> &out, bool checkFilesExist = true);
123138

124139
private:
125140
void tryInferResourceDir(const std::string &directoryPath,

indexer/Driver.cc

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "spdlog/spdlog.h"
3636

3737
#include "llvm/ADT/StringMap.h"
38+
#include "llvm/Support/Path.h"
3839
#include "llvm/Support/StringSaver.h"
3940

4041
#include "proto/fwd_decls.pb.h"
@@ -467,7 +468,7 @@ class Scheduler final {
467468
uint32_t nextTaskId = 0;
468469
/// Monotonically growing map of all jobs that have been created so far.
469470
/// This number will generally be unrelated to \c compdbCommandCount
470-
/// because a single compilation database entry will typically lead
471+
/// because a single command object will typically lead
471472
/// to creation of multiple jobs.
472473
///
473474
/// In principle, after a job is completed, we could start removing
@@ -538,7 +539,7 @@ class Scheduler final {
538539
switch (job.kind) {
539540
case IndexJob::Kind::SemanticAnalysis:
540541
return fmt::format("running semantic analysis for '{}'",
541-
job.semanticAnalysis.command.Filename);
542+
job.semanticAnalysis.command.filePath);
542543
case IndexJob::Kind::EmitIndex:
543544
auto &fileInfos = job.emitIndex.filesToBeIndexed;
544545
auto fileInfoIt = absl::c_find_if(
@@ -920,7 +921,7 @@ class Driver {
920921
ENFORCE(job.kind == IndexJob::Kind::SemanticAnalysis);
921922
perJobStats.emplace_back(
922923
jobId.taskId(),
923-
StatsEntry{job.semanticAnalysis.command.Filename, std::move(stats)});
924+
StatsEntry{job.semanticAnalysis.command.filePath, std::move(stats)});
924925
}
925926
absl::c_sort(perJobStats, [](const auto &p1, const auto &p2) -> bool {
926927
return p1.first < p2.first;
@@ -1142,7 +1143,7 @@ class Driver {
11421143
}
11431144

11441145
size_t refillJobs() {
1145-
std::vector<clang::tooling::CompileCommand> commands{};
1146+
std::vector<compdb::CommandObject> commands{};
11461147
this->compdbParser.parseMore(commands);
11471148
for (auto &command : commands) {
11481149
this->scheduler.queueNewTask(
@@ -1184,7 +1185,7 @@ class Driver {
11841185
FileGuard openCompilationDatabase() {
11851186
std::error_code error;
11861187
StdPath compdbStdPath{this->compdbPath().asStringRef()};
1187-
auto compdbFile = compdb::CompilationDatabaseFile::openAndExitOnErrors(
1188+
auto compdbFile = compdb::File::openAndExitOnErrors(
11881189
compdbStdPath,
11891190
compdb::ValidationOptions{.checkDirectoryPathsAreAbsolute =
11901191
!this->options.isTesting});

indexer/IpcMessages.cc

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,6 @@
1515
#include "indexer/Derive.h"
1616
#include "indexer/IpcMessages.h"
1717

18-
namespace clang::tooling {
19-
20-
llvm::json::Value toJSON(const clang::tooling::CompileCommand &cc) {
21-
return llvm::json::Object{{"directory", cc.Directory},
22-
{"file", cc.Filename},
23-
{"output", cc.Output},
24-
{"arguments", cc.CommandLine}};
25-
}
26-
27-
bool fromJSON(const llvm::json::Value &jsonValue,
28-
clang::tooling::CompileCommand &cc, llvm::json::Path path) {
29-
llvm::json::ObjectMapper mapper(jsonValue, path);
30-
return mapper && mapper.map("directory", cc.Directory)
31-
&& mapper.map("file", cc.Filename)
32-
&& mapper.mapOptional("output", cc.Output)
33-
&& mapper.map("arguments", cc.CommandLine);
34-
}
35-
36-
} // namespace clang::tooling
37-
3818
namespace scip_clang {
3919

4020
std::string driverToWorkerQueueName(std::string_view driverId,

indexer/IpcMessages.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
#include <string>
99
#include <string_view>
1010

11-
#include "clang/Tooling/CompilationDatabase.h"
1211
#include "llvm/Support/JSON.h"
1312

13+
#include "indexer/CompilationDatabase.h"
1414
#include "indexer/Derive.h"
1515
#include "indexer/Hash.h"
1616
#include "indexer/Path.h"
@@ -100,7 +100,7 @@ template <> struct fmt::formatter<scip_clang::JobId> {
100100
namespace scip_clang {
101101

102102
struct SemanticAnalysisJobDetails {
103-
clang::tooling::CompileCommand command;
103+
compdb::CommandObject command;
104104
};
105105
SERIALIZABLE(SemanticAnalysisJobDetails)
106106

indexer/LlvmCommandLineParsing.cc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@
88
#include "indexer/Enforce.h"
99
#include "indexer/LlvmCommandLineParsing.h"
1010

11-
// ----------------------------------- ATTENTION
12-
// --------------------------------- The code in this file is vendored from
13-
// Clang's JSONCompilationDatabase.cpp because the parser is not exposed in any
14-
// header.
11+
// --------------------------- ATTENTION -------------------------------------
12+
// The code in this file is vendored from Clang's JSONCompilationDatabase.cpp
13+
// because the parser is not exposed in any header.
1514
//
1615
// We could potentially have instead reused the APIs TokenizeWindowsCommandLine
1716
// and TokenizeGNUCommandLine from llvm/Support/CommandLine.h, but it is unclear

0 commit comments

Comments
 (0)