Skip to content

Commit 871dd74

Browse files
perf: Don't deserialize the entire import path <> id map.
1 parent 6b0f8e5 commit 871dd74

File tree

2 files changed

+112
-35
lines changed

2 files changed

+112
-35
lines changed

compiler/pavexc/src/rustdoc/compute/cache.rs

Lines changed: 45 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use std::{borrow::Cow, collections::BTreeSet};
22

33
use anyhow::Context;
4-
use bincode::{Decode, Encode};
54
use camino::Utf8Path;
65
use guppy::{
76
PackageId,
@@ -18,7 +17,9 @@ use crate::{
1817
DiagnosticSink,
1918
rustdoc::{
2019
annotations::AnnotatedItems,
21-
queries::{CrateData, CrateItemIndex, LazyCrateItemIndex},
20+
queries::{
21+
CrateData, CrateItemIndex, ImportPath2Id, LazyCrateItemIndex, LazyImportPath2Id,
22+
},
2223
},
2324
};
2425

@@ -381,12 +382,12 @@ impl ToolchainCache {
381382
external_crates: Cow::Borrowed(external_crates),
382383
paths: Cow::Borrowed(paths),
383384
format_version,
384-
items: CachedItems::Borrowed(items),
385+
items: RkyvCowBytes::Borrowed(items),
385386
secondary_indexes: Some(SecondaryIndexes {
386387
import_index: Cow::Borrowed(import_index),
387388
// Standard library crates don't have Pavex annotations.
388389
annotated_items: None,
389-
import_path2id: Cow::Borrowed(import_path2id),
390+
import_path2id: RkyvCowBytes::Borrowed(import_path2id),
390391
re_exports: Cow::Borrowed(re_exports),
391392
}),
392393
}
@@ -590,7 +591,7 @@ impl ThirdPartyCrateCache {
590591
Some(annotated_items),
591592
) => Some(SecondaryIndexes {
592593
import_index: Cow::Borrowed(import_index),
593-
import_path2id: Cow::Borrowed(import_path2id),
594+
import_path2id: RkyvCowBytes::Borrowed(import_path2id),
594595
re_exports: Cow::Borrowed(re_exports),
595596
annotated_items: Some(Cow::Borrowed(annotated_items)),
596597
}),
@@ -602,7 +603,7 @@ impl ThirdPartyCrateCache {
602603
external_crates: Cow::Borrowed(external_crates),
603604
paths: Cow::Borrowed(paths),
604605
format_version,
605-
items: CachedItems::Borrowed(items),
606+
items: RkyvCowBytes::Borrowed(items),
606607
secondary_indexes,
607608
}
608609
.hydrate(package_metadata.id().to_owned())
@@ -787,47 +788,56 @@ pub(in crate::rustdoc) struct CacheEntry<'a> {
787788
external_crates: Cow<'a, [u8]>,
788789
paths: Cow<'a, [u8]>,
789790
format_version: i64,
790-
items: CachedItems<'a>,
791+
items: RkyvCowBytes<'a>,
791792
secondary_indexes: Option<SecondaryIndexes<'a>>,
792793
}
793794

794795
#[derive(Debug)]
795-
/// `rkyv`-serialized `HashMap<Id, Item>`.
796-
pub(in crate::rustdoc) enum CachedItems<'a> {
796+
/// A `Cow` variant to work with `rkyv`'s `AlignedVec`.
797+
pub(in crate::rustdoc) enum RkyvCowBytes<'a> {
797798
Borrowed(&'a [u8]),
798799
Owned(AlignedVec),
799800
}
800801

801-
impl ToSql for CachedItems<'_> {
802+
impl ToSql for RkyvCowBytes<'_> {
802803
fn to_sql(&self) -> rusqlite::Result<rusqlite::types::ToSqlOutput<'_>> {
803804
let s = match self {
804-
CachedItems::Borrowed(items) => items,
805-
CachedItems::Owned(s) => s.as_slice(),
805+
RkyvCowBytes::Borrowed(items) => items,
806+
RkyvCowBytes::Owned(s) => s.as_slice(),
806807
};
807808
Ok(ToSqlOutput::Borrowed(rusqlite::types::ValueRef::Blob(s)))
808809
}
809810
}
810811

811-
impl<'a> CachedItems<'a> {
812+
impl<'a> RkyvCowBytes<'a> {
812813
pub fn into_owned(self) -> AlignedVec {
813814
match self {
814-
CachedItems::Borrowed(items) => {
815+
RkyvCowBytes::Borrowed(items) => {
815816
let mut v = AlignedVec::with_capacity(items.len());
816817
v.extend_from_slice(items);
817818
v
818819
}
819-
CachedItems::Owned(aligned_vec) => aligned_vec,
820+
RkyvCowBytes::Owned(aligned_vec) => aligned_vec,
820821
}
821822
}
822823
}
823824

824-
#[derive(Debug, Encode, Decode)]
825+
impl<'a> AsRef<[u8]> for RkyvCowBytes<'a> {
826+
fn as_ref(&self) -> &[u8] {
827+
match self {
828+
RkyvCowBytes::Borrowed(items) => items,
829+
RkyvCowBytes::Owned(aligned_vec) => aligned_vec.as_slice(),
830+
}
831+
}
832+
}
833+
834+
#[derive(Debug)]
825835
/// Data that can be computed starting from the raw JSON documentation for a crate,
826836
/// without having to re-invoke `rustdoc`.
827837
pub(in crate::rustdoc) struct SecondaryIndexes<'a> {
828838
import_index: Cow<'a, [u8]>,
829839
annotated_items: Option<Cow<'a, [u8]>>,
830-
import_path2id: Cow<'a, [u8]>,
840+
import_path2id: RkyvCowBytes<'a>,
831841
re_exports: Cow<'a, [u8]>,
832842
}
833843

@@ -837,13 +847,24 @@ impl<'a> CacheEntry<'a> {
837847
let import_index = bincode::serde::encode_to_vec(&krate.import_index, BINCODE_CONFIG)?;
838848
let annotated_items =
839849
bincode::serde::encode_to_vec(&krate.annotated_items, BINCODE_CONFIG)?;
840-
let import_path2id = bincode::serde::encode_to_vec(&krate.import_path2id, BINCODE_CONFIG)?;
841850
let re_exports = bincode::serde::encode_to_vec(&krate.external_re_exports, BINCODE_CONFIG)?;
842851

852+
// Serialize the items HashMap using rkyv for zero-copy deserialization later.
853+
let ImportPath2Id::Eager(import_path2id) = &krate.import_path2id else {
854+
anyhow::bail!(
855+
"The crate's import path<>id map is not deserialized. Are we trying to cache \
856+
the same crate twice? This is a bug."
857+
);
858+
};
859+
let import_path2id =
860+
rkyv::to_bytes::<rkyv::rancor::Error>(&import_path2id.0).map_err(|e| {
861+
anyhow::anyhow!(e).context("Failed to serialize import path<>id map with rkyv")
862+
})?;
863+
843864
cached.secondary_indexes = Some(SecondaryIndexes {
844865
import_index: Cow::Owned(import_index),
845866
annotated_items: Some(Cow::Owned(annotated_items)),
846-
import_path2id: Cow::Owned(import_path2id),
867+
import_path2id: RkyvCowBytes::Owned(import_path2id),
847868
re_exports: Cow::Owned(re_exports),
848869
});
849870
Ok(cached)
@@ -861,7 +882,7 @@ impl<'a> CacheEntry<'a> {
861882

862883
// Serialize the items HashMap using rkyv for zero-copy deserialization later.
863884
let items = rkyv::to_bytes::<rkyv::rancor::Error>(&index.index)
864-
.map_err(|e| anyhow::anyhow!("Failed to serialize items with rkyv: {e}"))?;
885+
.map_err(|e| anyhow::anyhow!(e).context("Failed to serialize crate items with rkyv"))?;
865886

866887
let external_crates =
867888
bincode::serde::encode_to_vec(&crate_data.external_crates, BINCODE_CONFIG)?;
@@ -872,7 +893,7 @@ impl<'a> CacheEntry<'a> {
872893
external_crates: Cow::Owned(external_crates),
873894
paths: Cow::Owned(paths),
874895
format_version: crate_data.format_version as i64,
875-
items: CachedItems::Owned(items),
896+
items: RkyvCowBytes::Owned(items),
876897
secondary_indexes: None,
877898
})
878899
}
@@ -908,13 +929,6 @@ impl<'a> CacheEntry<'a> {
908929
krate: crate_data,
909930
};
910931

911-
let import_path2id = tracing::trace_span!("Deserialize import_path2id")
912-
.in_scope(|| {
913-
bincode::decode_from_slice(&secondary_indexes.import_path2id, BINCODE_CONFIG)
914-
})
915-
.context("Failed to deserialize import_path2id")?
916-
.0;
917-
918932
let import_index =
919933
bincode::decode_from_slice(&secondary_indexes.import_index, BINCODE_CONFIG)
920934
.context("Failed to deserialize import_index")?
@@ -935,7 +949,9 @@ impl<'a> CacheEntry<'a> {
935949
let krate = crate::rustdoc::Crate {
936950
core,
937951
annotated_items,
938-
import_path2id,
952+
import_path2id: ImportPath2Id::Lazy(LazyImportPath2Id(
953+
secondary_indexes.import_path2id.into_owned(),
954+
)),
939955
external_re_exports: re_exports,
940956
import_index,
941957
crate_id2package_id: Default::default(),

compiler/pavexc/src/rustdoc/queries.rs

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ use indexmap::IndexSet;
1212
use rayon::iter::IntoParallelRefIterator;
1313
use rkyv::collections::swiss_table::ArchivedHashMap;
1414
use rkyv::rancor::Panic;
15+
use rkyv::string::ArchivedString;
1516
use rkyv::util::AlignedVec;
17+
use rkyv::vec::ArchivedVec;
1618
use rustc_hash::FxHashMap;
1719
use rustdoc_types::{
1820
ArchivedId, ArchivedItem, ExternalCrate, Item, ItemEnum, ItemKind, ItemSummary, Visibility,
@@ -643,7 +645,7 @@ pub struct Crate {
643645
/// The index does NOT contain macros, since macros and types live in two
644646
/// different namespaces and can contain items with the same name.
645647
/// E.g. `core::clone::Clone` is both a trait and a derive macro.
646-
pub(super) import_path2id: HashMap<Vec<String>, rustdoc_types::Id>,
648+
pub(super) import_path2id: ImportPath2Id,
647649
/// Types (or modules!) re-exported from other crates.
648650
pub(crate) external_re_exports: ExternalReExports,
649651
/// All the items in this crate that have been annotated with an attribute from the `diagnostic::pavex::*` namespace.
@@ -660,6 +662,68 @@ pub struct Crate {
660662
Arc<std::sync::RwLock<HashMap<(u32, Option<String>), PackageId>>>,
661663
}
662664

665+
#[derive(Debug, Clone)]
666+
/// An index to lookup the id of a type given one of its import paths, either
667+
/// public or private.
668+
///
669+
/// The index does NOT contain macros, since macros and types live in two
670+
/// different namespaces and can contain items with the same name.
671+
/// E.g. `core::clone::Clone` is both a trait and a derive macro.
672+
///
673+
/// Since the index can be quite large, we try to avoid deserializing it all at once.
674+
///
675+
/// The `Eager` variant contains the entire index, fully deserialized. This is what we get
676+
/// when we have had to index the documentation for the crate on the fly.
677+
///
678+
/// The `Lazy` variant contains the index as a byte array, with entries deserialized on demand.
679+
pub(crate) enum ImportPath2Id {
680+
Eager(EagerImportPath2Id),
681+
Lazy(LazyImportPath2Id),
682+
}
683+
684+
impl ImportPath2Id {
685+
pub fn get(&self, path: &[String]) -> Option<rustdoc_types::Id> {
686+
match self {
687+
ImportPath2Id::Eager(m) => m.0.get(path).cloned(),
688+
ImportPath2Id::Lazy(m) => m.get_deserialized(path),
689+
}
690+
}
691+
}
692+
693+
#[derive(Debug, Clone)]
694+
/// See [`ImportPath2Id`] for more information.
695+
pub(crate) struct EagerImportPath2Id(pub HashMap<Vec<String>, rustdoc_types::Id>);
696+
697+
/// See [`ImportPath2Id`] for more information.
698+
///
699+
/// Stores rkyv-serialized bytes of a `HashMap<Vec<String>, Id>` and provides zero-copy access.
700+
#[derive(Debug, Clone)]
701+
pub(crate) struct LazyImportPath2Id(pub AlignedVec);
702+
703+
impl LazyImportPath2Id {
704+
#[inline]
705+
fn archived(&self) -> &ArchivedHashMap<ArchivedVec<ArchivedString>, ArchivedId> {
706+
unsafe {
707+
rkyv::access_unchecked::<ArchivedHashMap<ArchivedVec<ArchivedString>, ArchivedId>>(
708+
&self.0,
709+
)
710+
}
711+
}
712+
713+
pub fn get(&self, path: &[String]) -> Option<&ArchivedId> {
714+
let path_vec: Vec<String> = path.to_vec();
715+
let bytes = rkyv::to_bytes::<Panic>(&path_vec).ok()?;
716+
717+
let archived_key = unsafe { rkyv::access_unchecked::<ArchivedVec<ArchivedString>>(&bytes) };
718+
self.archived().get(archived_key)
719+
}
720+
721+
pub fn get_deserialized(&self, path: &[String]) -> Option<rustdoc_types::Id> {
722+
let archived = self.get(path)?;
723+
Some(rkyv::deserialize::<_, Panic>(archived).unwrap())
724+
}
725+
}
726+
663727
#[derive(
664728
Debug, Clone, Default, serde::Serialize, serde::Deserialize, bincode::Encode, bincode::Decode,
665729
)]
@@ -1103,7 +1167,7 @@ impl Crate {
11031167

11041168
let mut self_ = Self {
11051169
core: CrateCore { package_id, krate },
1106-
import_path2id,
1170+
import_path2id: ImportPath2Id::Eager(EagerImportPath2Id(import_path2id)),
11071171
import_index,
11081172
external_re_exports,
11091173
annotated_items: AnnotatedItems::default(),
@@ -1199,10 +1263,7 @@ impl Crate {
11991263
krate_collection: &CrateCollection,
12001264
) -> Result<Result<GlobalItemId, UnknownItemPath>, CannotGetCrateData> {
12011265
if let Some(id) = self.import_path2id.get(path) {
1202-
return Ok(Ok(GlobalItemId::new(
1203-
id.to_owned(),
1204-
self.core.package_id.to_owned(),
1205-
)));
1266+
return Ok(Ok(GlobalItemId::new(id, self.core.package_id.to_owned())));
12061267
}
12071268

12081269
for (

0 commit comments

Comments
 (0)