Skip to content

Commit 7e4e89c

Browse files
perf: Convert JSON docs to the expected caching format in parallel
Insertion happens on the "main" thread, but the more expensive format conversion is executed on a threadpool.
1 parent 40ac905 commit 7e4e89c

File tree

5 files changed

+203
-61
lines changed

5 files changed

+203
-61
lines changed

compiler/pavex_test_runner/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ anyhow = { workspace = true }
1616
ahash = { workspace = true }
1717
console = { workspace = true }
1818
cargo_metadata = { workspace = true }
19+
cargo-like-utils = { workspace = true }
1920
fs-err = { workspace = true }
2021
libtest-mimic = { workspace = true }
2122
serde = { workspace = true, features = ["derive"] }
@@ -32,6 +33,7 @@ object-pool = { workspace = true }
3233
num_cpus = { workspace = true }
3334
globwalk = { workspace = true }
3435
tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] }
36+
pavex_cli_shell = { workspace = true }
3537
regex = { workspace = true }
3638
once_cell = { workspace = true }
3739
pavexc = { workspace = true }

compiler/pavex_test_runner/src/lib.rs

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@ use std::process::{Command, Output};
66

77
use ahash::HashSet;
88
use anyhow::Context;
9+
use cargo_like_utils::shell::Shell;
910
use cargo_metadata::diagnostic::DiagnosticLevel;
1011
use console::style;
1112
use guppy::graph::PackageGraph;
1213
use itertools::Itertools;
1314
use libtest_mimic::{Arguments, Conclusion, Failed, Trial};
15+
use pavex_cli_shell::try_init_shell;
1416
use pavexc::rustdoc::CrateCollection;
1517
use pavexc::{DEFAULT_DOCS_TOOLCHAIN, DiagnosticSink};
1618
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
@@ -112,6 +114,7 @@ pub fn run_tests(
112114
// acquire the global target directory lock. If multiple tests end up
113115
// doing this at the same time, their execution will be serialized,
114116
// which would have a major impact on the overall test suite runtime.
117+
try_init_shell(Shell::new());
115118
warm_up_rustdoc_cache(&package_graph, &test_name2test_data)?;
116119

117120
// First battery of UI tests.
@@ -426,7 +429,6 @@ fn warm_up_rustdoc_cache(
426429
// We want to ensure that all invocations of `pavexc generate` hit the cache
427430
// thus avoiding the need to invoke `rustdoc` and acquire a contentious
428431
// lock over the target directory.
429-
println!("Pre-computing JSON documentation for relevant crates");
430432
let crate_collection = CrateCollection::new(
431433
DEFAULT_DOCS_TOOLCHAIN.to_owned(),
432434
package_graph.clone(),
@@ -443,14 +445,20 @@ fn warm_up_rustdoc_cache(
443445
.workspace()
444446
.iter()
445447
.filter(|p| {
446-
!(p.name().starts_with("application_")
448+
!(
449+
// Avoid computing JOSN docs for:
450+
// - Generated code
451+
p.name().starts_with("application_")
452+
// - Integration test crates
447453
|| p.name().starts_with("integration_")
454+
// - The workspace hack package
448455
|| p.name() == "workspace_hack"
449-
|| (p.name().starts_with("app_") && !app_names.contains(p.name())))
456+
// - Tests that have been filtered out of this run
457+
|| (p.name().starts_with("app_") && !app_names.contains(p.name()))
458+
)
450459
})
451460
.map(|p| p.name().to_owned())
452461
.collect();
453-
crates.remove("workspace_hack");
454462
// Toolchain docs
455463
crates.insert("core".into());
456464
crates.insert("alloc".into());
@@ -471,16 +479,19 @@ fn warm_up_rustdoc_cache(
471479
crates.insert("yansi".into());
472480
crates.insert("serde".into());
473481
crates.insert("zerocopy".into());
474-
let package_ids = crate_collection
482+
let package_ids: Vec<_> = crate_collection
475483
.package_graph()
476484
.packages()
477485
.filter(|p| crates.contains(p.name()))
478-
.map(|p| p.id().to_owned());
486+
.map(|p| p.id().to_owned())
487+
.collect();
488+
let n_packages = package_ids.len();
489+
println!("Pre-computing JSON documentation for {n_packages} crates",);
479490
crate_collection
480-
.batch_compute_crates(package_ids)
491+
.batch_compute_crates(package_ids.into_iter())
481492
.context("Failed to warm rustdoc JSON cache")?;
482493
println!(
483-
"Pre-computed JSON documentation in {} seconds",
494+
"Pre-computed JSON documentation for {n_packages} crates in {} seconds",
484495
timer.elapsed().as_secs()
485496
);
486497

compiler/pavexc/src/rustdoc/compute/cache.rs

Lines changed: 114 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,9 @@ impl RustdocGlobalFsCache {
137137
}
138138
}
139139

140-
/// Store the JSON documentation generated by `rustdoc` in the cache.
141-
pub(crate) fn insert(
140+
/// Convert the JSON documentation generated by `rustdoc` into the format used by our cache,
141+
/// then store it.
142+
pub(crate) fn convert_and_insert(
142143
&self,
143144
cache_key: &RustdocCacheKey,
144145
krate: &crate::rustdoc::Crate,
@@ -147,17 +148,53 @@ impl RustdocGlobalFsCache {
147148
) -> Result<(), anyhow::Error> {
148149
let connection = self.connection_pool.get()?;
149150
match cache_key {
150-
RustdocCacheKey::ThirdPartyCrate(metadata) => self.third_party_cache.insert(
151-
metadata,
151+
RustdocCacheKey::ThirdPartyCrate(metadata) => {
152+
self.third_party_cache.convert_and_insert(
153+
metadata,
154+
krate,
155+
&self.cargo_fingerprint,
156+
&connection,
157+
cache_indexes,
158+
package_graph,
159+
)
160+
}
161+
RustdocCacheKey::ToolchainCrate(name) => self.toolchain_cache.convert_and_insert(
162+
name,
152163
krate,
153164
&self.cargo_fingerprint,
154165
&connection,
155-
cache_indexes,
156-
package_graph,
157166
),
167+
}
168+
}
169+
170+
/// Store the JSON documentation for a crate, which has already been converted to the expected
171+
/// format for caching.
172+
///
173+
/// This method should be preferred to [`Self::insert`] whenever multi-threading is involved,
174+
/// since [`RustdocGlobalFsCache`] isn't thread-safe, but the conversion into the caching format
175+
/// doesn't require its internals and can therefore be offloaded to another thread without issues.
176+
pub(crate) fn insert(
177+
&self,
178+
cache_key: &RustdocCacheKey,
179+
cache_entry: CacheEntry,
180+
package_graph: &PackageGraph,
181+
) -> Result<(), anyhow::Error> {
182+
let connection = self.connection_pool.get()?;
183+
match cache_key {
184+
RustdocCacheKey::ThirdPartyCrate(metadata) => {
185+
let Some(cache_key) = self.third_party_cache.cache_key(
186+
metadata,
187+
&self.cargo_fingerprint,
188+
package_graph,
189+
) else {
190+
return Ok(());
191+
};
192+
self.third_party_cache
193+
.insert(cache_key, &connection, cache_entry)
194+
}
158195
RustdocCacheKey::ToolchainCrate(name) => {
159196
self.toolchain_cache
160-
.insert(name, krate, &self.cargo_fingerprint, &connection)
197+
.insert(name, cache_entry, &self.cargo_fingerprint, &connection)
161198
}
162199
}
163200
}
@@ -317,7 +354,7 @@ impl ToolchainCache {
317354
let import_path2id = row.get_ref_unwrap(7).as_bytes()?;
318355
let re_exports = row.get_ref_unwrap(8).as_bytes()?;
319356

320-
let krate = CachedData {
357+
let krate = CacheEntry {
321358
root_item_id,
322359
external_crates: Cow::Borrowed(external_crates),
323360
paths: Cow::Borrowed(paths),
@@ -339,14 +376,26 @@ impl ToolchainCache {
339376

340377
/// Store the JSON documentation for a toolchain crate in the cache.
341378
#[instrument(name = "Cache rustdoc output on disk", skip_all, level=tracing::Level::DEBUG, fields(crate.name = name))]
342-
fn insert(
379+
fn convert_and_insert(
343380
&self,
344381
name: &str,
345382
krate: &crate::rustdoc::Crate,
346383
cargo_fingerprint: &str,
347384
connection: &rusqlite::Connection,
348385
) -> Result<(), anyhow::Error> {
349-
let cached_data = CachedData::new(krate).context("Failed to serialize docs")?;
386+
let cache_entry = CacheEntry::new(krate).context("Failed to serialize docs")?;
387+
self.insert(name, cache_entry, cargo_fingerprint, connection)
388+
}
389+
390+
/// Store the JSON documentation for a toolchain crate in the cache.
391+
#[instrument(name = "Cache rustdoc output on disk", skip_all, level=tracing::Level::DEBUG, fields(crate.name = name))]
392+
fn insert(
393+
&self,
394+
name: &str,
395+
cache_entry: CacheEntry<'_>,
396+
cargo_fingerprint: &str,
397+
connection: &rusqlite::Connection,
398+
) -> Result<(), anyhow::Error> {
350399
let mut stmt = connection.prepare_cached(
351400
"INSERT INTO rustdoc_toolchain_crates_cache (
352401
name,
@@ -365,23 +414,23 @@ impl ToolchainCache {
365414
stmt.execute(params![
366415
name,
367416
cargo_fingerprint,
368-
cached_data.root_item_id,
369-
cached_data.external_crates,
370-
cached_data.paths,
371-
cached_data.format_version,
372-
cached_data.items,
373-
cached_data.item_id2delimiters,
374-
cached_data
417+
cache_entry.root_item_id,
418+
cache_entry.external_crates,
419+
cache_entry.paths,
420+
cache_entry.format_version,
421+
cache_entry.items,
422+
cache_entry.item_id2delimiters,
423+
cache_entry
375424
.secondary_indexes
376425
.as_ref()
377426
.expect("Indexing never fails for toolchain crates")
378427
.import_index,
379-
cached_data
428+
cache_entry
380429
.secondary_indexes
381430
.as_ref()
382431
.expect("Indexing never fails for toolchain crates")
383432
.import_path2id,
384-
cached_data
433+
cache_entry
385434
.secondary_indexes
386435
.as_ref()
387436
.expect("Indexing never fails for toolchain crates")
@@ -537,7 +586,7 @@ impl ThirdPartyCrateCache {
537586
_ => None,
538587
};
539588

540-
let krate = CachedData {
589+
let krate = CacheEntry {
541590
root_item_id,
542591
external_crates: Cow::Borrowed(external_crates),
543592
paths: Cow::Borrowed(paths),
@@ -570,14 +619,30 @@ impl ThirdPartyCrateCache {
570619
outcome
571620
}
572621

573-
/// Store the JSON documentation generated by `rustdoc` in the cache.
622+
/// Compute the cache key for a given package.
623+
fn cache_key<'a>(
624+
&self,
625+
package_metadata: &'a PackageMetadata,
626+
cargo_fingerprint: &'a str,
627+
package_graph: &PackageGraph,
628+
) -> Option<ThirdPartyCrateCacheKey<'a>> {
629+
ThirdPartyCrateCacheKey::build(
630+
package_graph,
631+
package_metadata,
632+
cargo_fingerprint,
633+
self.cache_workspace_packages,
634+
)
635+
}
636+
637+
/// Convert the JSON documentation generated by `rustdoc` to the format used by our cache,
638+
/// then store it.
574639
#[instrument(
575-
name = "Cache third-party crate docs to disk",
640+
name = "Convert and cache docs for a third-party crate to disk",
576641
skip_all,
577642
level=tracing::Level::DEBUG,
578643
fields(crate.id = %package_metadata.id(), cache_key = tracing::field::Empty))
579644
]
580-
fn insert(
645+
fn convert_and_insert(
581646
&self,
582647
package_metadata: &PackageMetadata,
583648
krate: &crate::rustdoc::Crate,
@@ -596,11 +661,29 @@ impl ThirdPartyCrateCache {
596661
};
597662
tracing::Span::current().record("cache_key", tracing::field::debug(&cache_key));
598663
let cached_data = if cache_indexes {
599-
CachedData::new(krate)
664+
CacheEntry::new(krate)
600665
} else {
601-
CachedData::raw(krate)
666+
CacheEntry::raw(krate)
602667
}
603668
.context("Failed to serialize docs")?;
669+
self.insert(cache_key, connection, cached_data)
670+
}
671+
672+
/// Store the JSON documentation generated by `rustdoc` in the cache,
673+
/// without having to perform the conversion towards the caching format.
674+
#[instrument(
675+
name = "Stored cache data for third-party crate docs to disk",
676+
skip_all,
677+
level=tracing::Level::DEBUG,
678+
fields(cache_key = tracing::field::Empty))
679+
]
680+
fn insert(
681+
&self,
682+
cache_key: ThirdPartyCrateCacheKey<'_>,
683+
connection: &rusqlite::Connection,
684+
cached_data: CacheEntry<'_>,
685+
) -> Result<(), anyhow::Error> {
686+
tracing::Span::current().record("cache_key", tracing::field::debug(&cache_key));
604687
let mut stmt = connection.prepare_cached(
605688
"INSERT INTO rustdoc_3d_party_crates_cache (
606689
crate_name,
@@ -692,7 +775,7 @@ impl ThirdPartyCrateCache {
692775

693776
#[derive(Debug)]
694777
/// The serialized form of a crate's documentation, as stored in the cache.
695-
pub(super) struct CachedData<'a> {
778+
pub(in crate::rustdoc) struct CacheEntry<'a> {
696779
root_item_id: u32,
697780
external_crates: Cow<'a, [u8]>,
698781
paths: Cow<'a, [u8]>,
@@ -705,15 +788,15 @@ pub(super) struct CachedData<'a> {
705788
#[derive(Debug)]
706789
/// Data that can be computed starting from the raw JSON documentation for a crate,
707790
/// without having to re-invoke `rustdoc`.
708-
pub(super) struct SecondaryIndexes<'a> {
791+
pub(in crate::rustdoc) struct SecondaryIndexes<'a> {
709792
import_index: Cow<'a, [u8]>,
710793
annotated_items: Option<Cow<'a, [u8]>>,
711794
import_path2id: Cow<'a, [u8]>,
712795
re_exports: Cow<'a, [u8]>,
713796
}
714797

715-
impl<'a> CachedData<'a> {
716-
pub(super) fn new(krate: &'a crate::rustdoc::Crate) -> Result<CachedData<'a>, anyhow::Error> {
798+
impl<'a> CacheEntry<'a> {
799+
pub fn new(krate: &'a crate::rustdoc::Crate) -> Result<CacheEntry<'a>, anyhow::Error> {
717800
let mut cached = Self::raw(krate)?;
718801

719802
let import_index = bincode::serde::encode_to_vec(&krate.import_index, BINCODE_CONFIG)?;
@@ -732,7 +815,7 @@ impl<'a> CachedData<'a> {
732815
}
733816

734817
/// Cache only `rustdoc`'s output, no secondary indexes.
735-
pub(super) fn raw(krate: &'a crate::rustdoc::Crate) -> Result<CachedData<'a>, anyhow::Error> {
818+
pub fn raw(krate: &'a crate::rustdoc::Crate) -> Result<CacheEntry<'a>, anyhow::Error> {
736819
let crate_data = &krate.core.krate;
737820
let CrateItemIndex::Eager(index) = &crate_data.index else {
738821
anyhow::bail!(
@@ -752,7 +835,7 @@ impl<'a> CachedData<'a> {
752835
bincode::serde::encode_to_vec(&crate_data.external_crates, BINCODE_CONFIG)?;
753836
let paths = bincode::serde::encode_to_vec(&crate_data.paths, BINCODE_CONFIG)?;
754837

755-
Ok(CachedData {
838+
Ok(CacheEntry {
756839
root_item_id: crate_data.root_item_id.0,
757840
external_crates: Cow::Owned(external_crates),
758841
paths: Cow::Owned(paths),

compiler/pavexc/src/rustdoc/compute/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ mod format;
88
mod toolchain;
99

1010
use ahash::{HashMap, HashMapExt};
11+
pub(super) use cache::CacheEntry;
1112
pub(crate) use cache::{RustdocCacheKey, RustdocGlobalFsCache};
1213

1314
use anyhow::Context;

0 commit comments

Comments
 (0)