Skip to content

Commit 65342f3

Browse files
OSS-Fuzz Teamcopybara-github
authored andcommitted
Switch to copying source files only after the indexing process
Indexer-PiperOrigin-RevId: 779213411
1 parent 4e8b168 commit 65342f3

File tree

5 files changed

+55
-52
lines changed

5 files changed

+55
-52
lines changed

infra/indexer/index/file_copier.cc

Lines changed: 29 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include <filesystem> // NOLINT
1818
#include <string>
1919
#include <system_error> // NOLINT
20-
#include <utility>
2120
#include <vector>
2221

2322
#include "absl/container/flat_hash_set.h"
@@ -42,12 +41,10 @@ void PreparePath(std::string& path) {
4241

4342
FileCopier::FileCopier(absl::string_view base_path,
4443
absl::string_view index_path,
45-
const std::vector<std::string>& extra_paths,
46-
bool dry_run)
44+
const std::vector<std::string>& extra_paths)
4745
: base_path_(base_path),
4846
extra_paths_(extra_paths),
49-
index_path_(index_path),
50-
dry_run_(dry_run) {
47+
index_path_(index_path) {
5148
PreparePath(base_path_);
5249
for (std::string& extra_path : extra_paths_) {
5350
PreparePath(extra_path);
@@ -74,49 +71,49 @@ std::string FileCopier::ToIndexPath(absl::string_view path) const {
7471
return result;
7572
}
7673

77-
void FileCopier::CopyFileIfNecessary(absl::string_view index_path) {
74+
void FileCopier::RegisterIndexedFile(absl::string_view index_path) {
7875
if (index_path.empty() || index_path.starts_with('<')) {
7976
// Built-in header or a location lacking the filename.
8077
return;
8178
}
8279

83-
std::filesystem::path src_path;
84-
std::filesystem::path dst_path;
85-
src_path = std::filesystem::path(index_path);
86-
if (src_path.is_absolute()) {
87-
dst_path =
88-
std::filesystem::path(index_path_) / "absolute" / index_path.substr(1);
89-
} else {
90-
src_path = std::filesystem::path(base_path_) / index_path;
91-
dst_path = std::filesystem::path(index_path_) / "relative" / index_path;
80+
{
81+
absl::MutexLock lock(&mutex_);
82+
indexed_files_.emplace(index_path);
9283
}
84+
}
9385

94-
DLOG(INFO) << "From: " << src_path << "\n To: " << dst_path << "\n";
86+
void FileCopier::CopyIndexedFiles() {
87+
absl::MutexLock lock(&mutex_);
88+
89+
for (const std::string& indexed_path : indexed_files_) {
90+
std::filesystem::path src_path = indexed_path;
91+
std::filesystem::path dst_path;
92+
if (src_path.is_absolute()) {
93+
dst_path = std::filesystem::path(index_path_) / "absolute" /
94+
indexed_path.substr(1);
95+
} else {
96+
src_path = std::filesystem::path(base_path_) / indexed_path;
97+
dst_path = std::filesystem::path(index_path_) / "relative" / indexed_path;
98+
}
9599

96-
CHECK(std::filesystem::exists(src_path))
97-
<< "Source file does not exist: " << index_path;
100+
DLOG(INFO) << "\nFrom: " << src_path << "\n To: " << dst_path << "\n";
98101

99-
bool should_copy = false;
100-
{
101-
absl::MutexLock lock(&mutex_);
102-
should_copy = indexed_files_.insert(dst_path).second;
103-
}
102+
QCHECK(std::filesystem::exists(src_path))
103+
<< "Source file does not exist: " << src_path;
104104

105-
if (should_copy && !dry_run_) {
106105
std::error_code error_code;
107-
// We can race on creating the destination directory structure, so silently
108-
// ignore errors here.
106+
// The destination directory may already exist, but report other errors.
109107
(void)std::filesystem::create_directories(dst_path.parent_path(),
110108
error_code);
109+
QCHECK(!error_code) << "Failed to create directory: "
110+
<< dst_path.parent_path()
111+
<< " (error: " << error_code.message() << ")";
111112

112-
// We cannot race on creating the same destination file.
113-
QCHECK(std::filesystem::copy_file(
114-
src_path, dst_path, std::filesystem::copy_options::overwrite_existing,
115-
error_code))
116-
<< "Failed to copy file: " << src_path
113+
QCHECK(std::filesystem::copy_file(src_path, dst_path, error_code))
114+
<< "Failed to copy file " << src_path << " to " << dst_path
117115
<< " (error: " << error_code.message() << ")";
118116
}
119117
}
120-
121118
} // namespace indexer
122119
} // namespace oss_fuzz

infra/indexer/index/file_copier.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,25 +33,26 @@ namespace indexer {
3333
class FileCopier {
3434
public:
3535
FileCopier(absl::string_view base_path, absl::string_view index_path,
36-
const std::vector<std::string>& extra_paths, bool dry_run = false);
36+
const std::vector<std::string>& extra_paths);
3737
FileCopier(const FileCopier&) = delete;
3838

3939
// Rewrites a path into the representation it will have in the index
4040
// (relative if within the source tree and absolute otherwise).
4141
std::string ToIndexPath(absl::string_view path) const;
4242

4343
// `index_path` is expected to be produced by `ToIndexPath`.
44-
void CopyFileIfNecessary(absl::string_view index_path);
44+
void RegisterIndexedFile(absl::string_view index_path);
45+
46+
// Single-threaded.
47+
void CopyIndexedFiles();
4548

4649
private:
4750
std::string base_path_;
4851
std::vector<std::string> extra_paths_;
4952
const std::filesystem::path index_path_;
50-
bool dry_run_;
5153

5254
absl::Mutex mutex_;
53-
absl::flat_hash_set<std::filesystem::path> indexed_files_
54-
ABSL_GUARDED_BY(mutex_);
55+
absl::flat_hash_set<std::string> indexed_files_ ABSL_GUARDED_BY(mutex_);
5556
};
5657

5758
} // namespace indexer

infra/indexer/index/file_copier_unittest.cc

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ TEST(FileCopierTest, ToIndexPathOutside) {
6464
EXPECT_DEATH(file_copier.ToIndexPath("/a/b/c/d.cc"), "/a/b/c/d.cc");
6565
}
6666

67-
TEST(FileCopierTest, CopyFileIfNecessary) {
67+
TEST(FileCopierTest, FileCopying) {
6868
auto tmp_dir_path = std::filesystem::path(::testing::TempDir());
6969
auto base_path = tmp_dir_path / "src";
7070
auto index_path = tmp_dir_path / "idx";
@@ -85,18 +85,15 @@ TEST(FileCopierTest, CopyFileIfNecessary) {
8585
auto file_c_copy = index_path / "absolute" /
8686
sysroot_path.lexically_relative("/") / "y/z/c.cc";
8787

88-
file_copier.CopyFileIfNecessary(file_copier.ToIndexPath(file_a.string()));
89-
file_copier.CopyFileIfNecessary(file_copier.ToIndexPath(file_b.string()));
90-
file_copier.CopyFileIfNecessary(file_copier.ToIndexPath(file_c.string()));
88+
file_copier.RegisterIndexedFile(file_copier.ToIndexPath(file_a.string()));
89+
file_copier.RegisterIndexedFile(file_copier.ToIndexPath(file_b.string()));
90+
file_copier.RegisterIndexedFile(file_copier.ToIndexPath(file_c.string()));
91+
92+
file_copier.CopyIndexedFiles();
9193

9294
EXPECT_EQ(GetFileContents(file_a_copy).value_or(""), "a.cc");
9395
EXPECT_EQ(GetFileContents(file_b_copy).value_or(""), "b.cc");
9496
EXPECT_EQ(GetFileContents(file_c_copy).value_or(""), "c.cc");
95-
96-
// Delete `file_a` and check that it's not copied more than once.
97-
std::filesystem::remove(file_a_copy);
98-
file_copier.CopyFileIfNecessary(file_copier.ToIndexPath(file_a.string()));
99-
EXPECT_FALSE(std::filesystem::exists(file_a_copy));
10097
}
10198

10299
} // namespace indexer

infra/indexer/index/in_memory_index.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ LocationId InMemoryIndex::GetLocationId(const Location& location) {
280280
auto [iter, inserted] = locations_.insert({new_location, next_location_id_});
281281
if (inserted) {
282282
next_location_id_++;
283-
file_copier_.CopyFileIfNecessary(new_location.path());
283+
file_copier_.RegisterIndexedFile(new_location.path());
284284
}
285285

286286
return iter->second;

infra/indexer/main.cc

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <filesystem> // NOLINT
1818
#include <memory>
1919
#include <string>
20+
#include <system_error> // NOLINT
2021
#include <utility>
2122
#include <vector>
2223

@@ -37,7 +38,8 @@
3738

3839
ABSL_FLAG(std::string, source_dir, "", "Source directory");
3940
ABSL_FLAG(std::string, build_dir, "", "Build directory");
40-
ABSL_FLAG(std::string, index_dir, "", "Output index file directory");
41+
ABSL_FLAG(std::string, index_dir, "",
42+
"Output index file directory (should be empty if it exists)");
4143
ABSL_FLAG(std::vector<std::string>, extra_dirs, {"/"},
4244
"Additional source directory/-ies (comma-separated)");
4345
ABSL_FLAG(std::string, dry_run_regex, "",
@@ -84,8 +86,7 @@ int main(int argc, char** argv) {
8486
clang::tooling::Filter.setValue(dry_run_regex);
8587
}
8688

87-
oss_fuzz::indexer::FileCopier file_copier(source_dir, index_dir, extra_dirs,
88-
/*dry_run=*/!dry_run_regex.empty());
89+
oss_fuzz::indexer::FileCopier file_copier(source_dir, index_dir, extra_dirs);
8990

9091
std::unique_ptr<MergeQueue> merge_queue = MergeQueue::Create(
9192
absl::GetFlag(FLAGS_merge_queues), absl::GetFlag(FLAGS_merge_queue_size));
@@ -99,7 +100,9 @@ int main(int argc, char** argv) {
99100
<< llvm::toString(std::move(index_error));
100101
if (!absl::GetFlag(FLAGS_ignore_indexing_errors)) {
101102
merge_queue->Cancel();
102-
std::filesystem::remove_all(std::filesystem::path(index_dir));
103+
std::error_code ignored_error_code;
104+
std::filesystem::remove_all(std::filesystem::path(index_dir),
105+
ignored_error_code);
103106
return 1;
104107
}
105108
}
@@ -108,14 +111,19 @@ int main(int argc, char** argv) {
108111
auto index = merge_queue->TakeIndex();
109112
if (!index) {
110113
LOG(ERROR) << "Failed to create index";
111-
std::filesystem::remove_all(std::filesystem::path(index_dir));
114+
std::error_code ignored_error_code;
115+
std::filesystem::remove_all(std::filesystem::path(index_dir),
116+
ignored_error_code);
112117
return 1;
113118
}
114119

115120
if (!dry_run_regex.empty()) {
116121
return 0;
117122
}
118123

124+
LOG(INFO) << "copying files";
125+
file_copier.CopyIndexedFiles();
126+
119127
LOG(INFO) << "exporting index";
120128
auto flat_index = index->Export();
121129

0 commit comments

Comments
 (0)