Skip to content

Commit d4b4243

Browse files
authored
Avoid further I/O on the Reviewing index phase (#719)
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent 7f6182c commit d4b4243

File tree

4 files changed

+125
-58
lines changed

4 files changed

+125
-58
lines changed

src/build/build.cc

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ Build::Build(const std::filesystem::path &output_root)
2626
root_string{this->root.string()} {
2727
for (const auto &entry :
2828
std::filesystem::recursive_directory_iterator(this->root)) {
29-
this->entries[entry.path().native()].tracked = false;
29+
auto &map_entry{this->entries_[entry.path().native()]};
30+
map_entry.tracked = false;
31+
map_entry.is_directory = entry.is_directory();
3032
}
3133

3234
const auto deps_path{this->root / DEPENDENCIES_FILE};
@@ -65,7 +67,7 @@ Build::Build(const std::filesystem::path &output_root)
6567
switch (tag) {
6668
case 't':
6769
if (!current_key.empty()) {
68-
this->entries[current_key].dependencies = std::move(current_deps);
70+
this->entries_[current_key].dependencies = std::move(current_deps);
6971
current_deps = {};
7072
}
7173

@@ -97,7 +99,7 @@ Build::Build(const std::filesystem::path &output_root)
9799
std::string absolute_key{this->root_string};
98100
absolute_key += '/';
99101
absolute_key += path_part;
100-
this->entries[absolute_key].file_mark = mark_value;
102+
this->entries_[absolute_key].file_mark = mark_value;
101103
}
102104

103105
break;
@@ -110,19 +112,19 @@ Build::Build(const std::filesystem::path &output_root)
110112
}
111113

112114
if (!current_key.empty()) {
113-
this->entries[current_key].dependencies = std::move(current_deps);
115+
this->entries_[current_key].dependencies = std::move(current_deps);
114116
}
115117
this->has_previous_run = true;
116118
} catch (...) {
117-
this->entries.clear();
119+
this->entries_.clear();
118120
}
119121
}
120122

121123
auto Build::has_dependencies(const std::filesystem::path &path) const -> bool {
122124
assert(path.is_absolute());
123125
std::shared_lock lock{this->mutex};
124-
const auto match{this->entries.find(path.native())};
125-
return match != this->entries.end() && !match->second.dependencies.empty();
126+
const auto match{this->entries_.find(path.native())};
127+
return match != this->entries_.end() && !match->second.dependencies.empty();
126128
}
127129

128130
auto Build::finish() -> void {
@@ -132,7 +134,7 @@ auto Build::finish() -> void {
132134

133135
const auto root_prefix_size{this->root_string.size() + 1};
134136
std::shared_lock lock{this->mutex};
135-
for (const auto &entry : this->entries) {
137+
for (const auto &entry : this->entries_) {
136138
if (!entry.second.tracked) {
137139
continue;
138140
}
@@ -181,7 +183,7 @@ auto Build::finish() -> void {
181183

182184
// Remove untracked files inside the output directory
183185
std::shared_lock read_lock{this->mutex};
184-
for (const auto &entry : this->entries) {
186+
for (const auto &entry : this->entries_) {
185187
if (!entry.second.tracked &&
186188
entry.first.size() > this->root_string.size() &&
187189
entry.first.starts_with(this->root_string)) {
@@ -193,7 +195,7 @@ auto Build::finish() -> void {
193195
auto Build::refresh(const std::filesystem::path &path) -> void {
194196
const auto value{std::filesystem::file_time_type::clock::now()};
195197
std::unique_lock lock{this->mutex};
196-
this->entries[path.native()].file_mark = value;
198+
this->entries_[path.native()].file_mark = value;
197199
}
198200

199201
auto Build::mark(const std::filesystem::path &path)
@@ -202,8 +204,8 @@ auto Build::mark(const std::filesystem::path &path)
202204

203205
{
204206
std::shared_lock lock{this->mutex};
205-
const auto match{this->entries.find(path.native())};
206-
if (match != this->entries.end() && match->second.file_mark.has_value()) {
207+
const auto match{this->entries_.find(path.native())};
208+
if (match != this->entries_.end() && match->second.file_mark.has_value()) {
207209
return match->second.file_mark;
208210
}
209211
}
@@ -217,7 +219,7 @@ auto Build::mark(const std::filesystem::path &path)
217219
try {
218220
const auto value{std::filesystem::last_write_time(path)};
219221
std::unique_lock lock{this->mutex};
220-
this->entries[path.native()].file_mark = value;
222+
this->entries_[path.native()].file_mark = value;
221223
return value;
222224
} catch (const std::filesystem::filesystem_error &) {
223225
return std::nullopt;
@@ -227,8 +229,8 @@ auto Build::mark(const std::filesystem::path &path)
227229
auto Build::mark_locked(const std::filesystem::path &path) const
228230
-> std::optional<mark_type> {
229231
assert(path.is_absolute());
230-
const auto match{this->entries.find(path.native())};
231-
if (match != this->entries.end() && match->second.file_mark.has_value()) {
232+
const auto match{this->entries_.find(path.native())};
233+
if (match != this->entries_.end() && match->second.file_mark.has_value()) {
232234
return match->second.file_mark;
233235
}
234236

@@ -245,7 +247,7 @@ auto Build::track(const std::filesystem::path &path) -> void {
245247
assert(path.is_absolute());
246248
const auto &path_string{path.native()};
247249
std::unique_lock lock{this->mutex};
248-
this->entries[path_string].tracked = true;
250+
this->entries_[path_string].tracked = true;
249251
auto parent_key{path_string};
250252
while (true) {
251253
const auto slash{parent_key.rfind('/')};
@@ -254,19 +256,20 @@ auto Build::track(const std::filesystem::path &path) -> void {
254256
}
255257

256258
parent_key = parent_key.substr(0, slash);
257-
auto &parent_entry{this->entries[parent_key]};
259+
auto &parent_entry{this->entries_[parent_key]};
258260
if (parent_entry.tracked) {
259261
break;
260262
}
261263

262264
parent_entry.tracked = true;
265+
parent_entry.is_directory = true;
263266
}
264267
}
265268

266269
auto Build::is_untracked_file(const std::filesystem::path &path) const -> bool {
267270
std::shared_lock lock{this->mutex};
268-
const auto match{this->entries.find(path.native())};
269-
return match == this->entries.cend() || !match->second.tracked;
271+
const auto match{this->entries_.find(path.native())};
272+
return match == this->entries_.cend() || !match->second.tracked;
270273
}
271274

272275
auto Build::output_write_json(const std::filesystem::path &path,

src/build/include/sourcemeta/one/build.h

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ class SOURCEMETA_ONE_BUILD_EXPORT Build {
5959
-> bool {
6060
const auto &destination_string{destination.native()};
6161
std::shared_lock lock{this->mutex};
62-
const auto cached_match{this->entries.find(destination_string)};
63-
if (cached_match != this->entries.end()) {
62+
const auto cached_match{this->entries_.find(destination_string)};
63+
if (cached_match != this->entries_.end()) {
6464
const auto &entry{cached_match->second};
6565
if (entry.file_mark.has_value() && !entry.dependencies.empty()) {
6666
std::size_t static_index{0};
@@ -120,17 +120,28 @@ class SOURCEMETA_ONE_BUILD_EXPORT Build {
120120
this->track(destination);
121121
{
122122
std::unique_lock write_lock{this->mutex};
123-
this->entries[destination_string].dependencies =
123+
this->entries_[destination_string].dependencies =
124124
std::move(output_dependencies);
125125
}
126126

127127
return true;
128128
}
129129

130+
struct Entry {
131+
std::optional<mark_type> file_mark;
132+
Dependencies dependencies;
133+
bool tracked{false};
134+
bool is_directory{false};
135+
};
136+
130137
auto track(const std::filesystem::path &path) -> void;
131138
[[nodiscard]] auto is_untracked_file(const std::filesystem::path &path) const
132139
-> bool;
133140

141+
auto entries() const -> const std::unordered_map<std::string, Entry> & {
142+
return this->entries_;
143+
}
144+
134145
auto write_json_if_different(const std::filesystem::path &path,
135146
const sourcemeta::core::JSON &document) -> void;
136147

@@ -140,15 +151,9 @@ class SOURCEMETA_ONE_BUILD_EXPORT Build {
140151
auto output_write_json(const std::filesystem::path &path,
141152
const sourcemeta::core::JSON &document) -> void;
142153

143-
struct Entry {
144-
std::optional<mark_type> file_mark;
145-
Dependencies dependencies;
146-
bool tracked{false};
147-
};
148-
149154
std::filesystem::path root;
150155
std::string root_string;
151-
std::unordered_map<std::string, Entry> entries;
156+
std::unordered_map<std::string, Entry> entries_;
152157
mutable std::shared_mutex mutex;
153158
bool has_previous_run{false};
154159
};

src/index/explorer.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -370,8 +370,7 @@ struct GENERATE_EXPLORER_DIRECTORY_LIST {
370370
}
371371
}
372372
}
373-
// Pre-compute sort keys once per entry to avoid calling
374-
// try_parse_version O(N log N) times inside the comparator
373+
375374
struct SortKey {
376375
std::string_view type;
377376
std::optional<std::tuple<unsigned, unsigned, unsigned>> version;

src/index/index.cc

Lines changed: 87 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -445,47 +445,107 @@ static auto index_main(const std::string_view &program,
445445
directory_summaries;
446446
sourcemeta::one::Build::Dependencies dependencies;
447447
if (std::filesystem::exists(schemas_path)) {
448-
for (const auto &entry :
449-
std::filesystem::recursive_directory_iterator{schemas_path}) {
450-
if (output.is_untracked_file(entry.path())) {
448+
const auto &entries{output.entries()};
449+
const auto &schemas_path_string{schemas_path.native()};
450+
const auto schemas_prefix_size{schemas_path_string.size() + 1};
451+
452+
// Collect tracked entries under schemas_path into a sorted vector
453+
// so that dependency ordering is deterministic across runs
454+
struct SchemaEntry {
455+
const std::string *path;
456+
bool is_directory;
457+
};
458+
459+
std::vector<SchemaEntry> schema_entries;
460+
std::unordered_map<std::string, std::size_t> child_counts;
461+
std::unordered_set<std::string> has_sentinel;
462+
463+
for (const auto &[path_key, entry] : entries) {
464+
if (!entry.tracked || path_key.size() <= schemas_path_string.size() ||
465+
!path_key.starts_with(schemas_path_string) ||
466+
path_key[schemas_path_string.size()] != '/') {
467+
continue;
468+
}
469+
470+
schema_entries.push_back({&path_key, entry.is_directory});
471+
472+
const auto last_slash{path_key.rfind('/')};
473+
if (last_slash == std::string::npos) {
474+
continue;
475+
}
476+
477+
child_counts[path_key.substr(0, last_slash)]++;
478+
if (entry.is_directory &&
479+
std::string_view{path_key}.substr(last_slash + 1) == SENTINEL) {
480+
has_sentinel.insert(path_key.substr(0, last_slash));
481+
}
482+
}
483+
484+
std::ranges::sort(schema_entries, [](const auto &left, const auto &right) {
485+
return *left.path < *right.path;
486+
});
487+
488+
for (const auto &schema_entry : schema_entries) {
489+
const auto &path_key{*schema_entry.path};
490+
if (path_key.size() <= schemas_prefix_size) {
451491
continue;
452492
}
453493

454-
if (entry.is_directory() && entry.path().filename() != SENTINEL) {
494+
const auto last_slash{path_key.rfind('/')};
495+
const std::string_view filename{path_key.data() + last_slash + 1,
496+
path_key.size() - last_slash - 1};
497+
498+
if (schema_entry.is_directory && filename != SENTINEL) {
499+
const auto count_match{child_counts.find(path_key)};
455500
const auto children{
456-
std::distance(std::filesystem::directory_iterator(entry.path()),
457-
std::filesystem::directory_iterator{})};
501+
count_match != child_counts.end() ? count_match->second : 0u};
458502
if (children == 0 ||
459-
(std::filesystem::exists(entry.path() / SENTINEL) &&
460-
children == 1)) {
503+
(has_sentinel.count(path_key) > 0 && children == 1)) {
461504
continue;
462505
}
463506

464-
assert(entry.path().is_absolute());
465-
directories.emplace_back(entry.path());
466-
const auto child_directory_metapack{
467-
explorer_path /
468-
std::filesystem::relative(entry.path(), schemas_path) / SENTINEL /
469-
"directory.metapack"};
470-
directory_summaries[entry.path().parent_path().string()].emplace_back(
507+
directories.emplace_back(path_key);
508+
const std::string_view relative{path_key.data() + schemas_prefix_size,
509+
path_key.size() - schemas_prefix_size};
510+
const auto child_directory_metapack{explorer_path /
511+
std::filesystem::path{relative} /
512+
SENTINEL / "directory.metapack"};
513+
directory_summaries[path_key.substr(0, last_slash)].emplace_back(
471514
sourcemeta::one::Build::DependencyKind::Static,
472515
child_directory_metapack);
473-
} else if (entry.is_regular_file() &&
474-
entry.path().filename() == "schema.metapack" &&
475-
entry.path().parent_path().filename() == SENTINEL) {
476-
const auto explorer_summary_path{
477-
explorer_path /
478-
std::filesystem::relative(entry.path(), schemas_path)};
516+
} else if (!schema_entry.is_directory && filename == "schema.metapack") {
517+
const std::string_view parent_path{path_key.data(), last_slash};
518+
const auto parent_last_slash{parent_path.rfind('/')};
519+
if (parent_last_slash == std::string_view::npos) {
520+
continue;
521+
}
522+
523+
const std::string_view parent_filename{
524+
parent_path.data() + parent_last_slash + 1,
525+
parent_path.size() - parent_last_slash - 1};
526+
if (parent_filename != SENTINEL) {
527+
continue;
528+
}
529+
530+
const std::string_view relative{path_key.data() + schemas_prefix_size,
531+
path_key.size() - schemas_prefix_size};
532+
const auto explorer_summary_path{explorer_path /
533+
std::filesystem::path{relative}};
479534
summaries.emplace_back(sourcemeta::one::Build::DependencyKind::Static,
480535
explorer_summary_path);
481-
const auto containing_directory{
482-
entry.path().parent_path().parent_path().parent_path().string()};
483-
directory_summaries[containing_directory].emplace_back(
484-
sourcemeta::one::Build::DependencyKind::Static,
485-
explorer_summary_path);
536+
537+
const auto grandparent_slash{
538+
parent_path.substr(0, parent_last_slash).rfind('/')};
539+
if (grandparent_slash != std::string_view::npos) {
540+
directory_summaries[std::string{
541+
parent_path.substr(0, grandparent_slash)}]
542+
.emplace_back(sourcemeta::one::Build::DependencyKind::Static,
543+
explorer_summary_path);
544+
}
545+
486546
dependencies.emplace_back(
487547
sourcemeta::one::Build::DependencyKind::Static,
488-
entry.path().parent_path() / "dependencies.metapack");
548+
std::filesystem::path{parent_path} / "dependencies.metapack");
489549
}
490550
}
491551

0 commit comments

Comments
 (0)