Skip to content

Commit d0d6ec6

Browse files
perf: Use rkyv to deserialize paths, on a need-to-use basis
1 parent 45b5a37 commit d0d6ec6

File tree

3 files changed

+148
-21
lines changed

3 files changed

+148
-21
lines changed

compiler/pavex_rustdoc_types/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,8 @@ pub struct Id(pub u32);
798798
rkyv::Deserialize,
799799
)]
800800
#[rkyv(derive(Debug))]
801+
#[rkyv(compare(PartialEq))]
802+
#[repr(u8)]
801803
#[serde(rename_all = "snake_case")]
802804
pub enum ItemKind {
803805
/// A module declaration, e.g. `mod foo;` or `mod foo {}`

compiler/pavexc/src/rustdoc/compute/cache.rs

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ use crate::{
1818
rustdoc::{
1919
annotations::AnnotatedItems,
2020
queries::{
21-
CrateData, CrateItemIndex, ImportPath2Id, LazyCrateItemIndex, LazyImportPath2Id,
21+
CrateData, CrateItemIndex, CrateItemPaths, ImportPath2Id, LazyCrateItemIndex,
22+
LazyCrateItemPaths, LazyImportPath2Id,
2223
},
2324
},
2425
};
@@ -380,7 +381,7 @@ impl ToolchainCache {
380381
let krate = CacheEntry {
381382
root_item_id,
382383
external_crates: Cow::Borrowed(external_crates),
383-
paths: Cow::Borrowed(paths),
384+
paths: RkyvCowBytes::Borrowed(paths),
384385
format_version,
385386
items: RkyvCowBytes::Borrowed(items),
386387
secondary_indexes: Some(SecondaryIndexes {
@@ -601,7 +602,7 @@ impl ThirdPartyCrateCache {
601602
let krate = CacheEntry {
602603
root_item_id,
603604
external_crates: Cow::Borrowed(external_crates),
604-
paths: Cow::Borrowed(paths),
605+
paths: RkyvCowBytes::Borrowed(paths),
605606
format_version,
606607
items: RkyvCowBytes::Borrowed(items),
607608
secondary_indexes,
@@ -786,7 +787,7 @@ impl ThirdPartyCrateCache {
786787
pub(in crate::rustdoc) struct CacheEntry<'a> {
787788
root_item_id: u32,
788789
external_crates: Cow<'a, [u8]>,
789-
paths: Cow<'a, [u8]>,
790+
paths: RkyvCowBytes<'a>,
790791
format_version: i64,
791792
items: RkyvCowBytes<'a>,
792793
secondary_indexes: Option<SecondaryIndexes<'a>>,
@@ -879,19 +880,26 @@ impl<'a> CacheEntry<'a> {
879880
the same crate twice? This is a bug."
880881
);
881882
};
883+
let CrateItemPaths::Eager(paths) = &crate_data.paths else {
884+
anyhow::bail!(
885+
"The crate item paths is not deserialized. Are we trying to cache \
886+
the same crate twice? This is a bug."
887+
);
888+
};
882889

883-
// Serialize the items HashMap using rkyv for zero-copy deserialization later.
884890
let items = rkyv::to_bytes::<rkyv::rancor::Error>(&index.index)
885891
.map_err(|e| anyhow::anyhow!(e).context("Failed to serialize crate items with rkyv"))?;
886892

887893
let external_crates =
888894
bincode::serde::encode_to_vec(&crate_data.external_crates, BINCODE_CONFIG)?;
889-
let paths = bincode::serde::encode_to_vec(&crate_data.paths, BINCODE_CONFIG)?;
895+
let paths = rkyv::to_bytes::<rkyv::rancor::Error>(&paths.paths).map_err(|e| {
896+
anyhow::anyhow!(e).context("Failed to serialize item summaries with rkyv")
897+
})?;
890898

891899
Ok(CacheEntry {
892900
root_item_id: crate_data.root_item_id.0,
893901
external_crates: Cow::Owned(external_crates),
894-
paths: Cow::Owned(paths),
902+
paths: RkyvCowBytes::Owned(paths),
895903
format_version: crate_data.format_version as i64,
896904
items: RkyvCowBytes::Owned(items),
897905
secondary_indexes: None,
@@ -904,17 +912,14 @@ impl<'a> CacheEntry<'a> {
904912
/// since it can be quite large and deserialization can be slow for large crates.
905913
/// The item index is stored as rkyv-serialized bytes for zero-copy access.
906914
pub(super) fn hydrate(self, package_id: PackageId) -> Result<RustdocCacheEntry, anyhow::Error> {
907-
let paths = tracing::trace_span!("Deserialize paths")
908-
.in_scope(|| bincode::decode_from_slice(&self.paths, BINCODE_CONFIG))
909-
.context("Failed to deserialize paths")?
910-
.0;
911-
912915
let crate_data = CrateData {
913916
root_item_id: rustdoc_types::Id(self.root_item_id.to_owned()),
914917
external_crates: bincode::decode_from_slice(&self.external_crates, BINCODE_CONFIG)
915918
.context("Failed to deserialize external_crates")?
916919
.0,
917-
paths,
920+
paths: CrateItemPaths::Lazy(LazyCrateItemPaths {
921+
bytes: self.paths.into_owned(),
922+
}),
918923
format_version: self.format_version.try_into()?,
919924
index: CrateItemIndex::Lazy(LazyCrateItemIndex {
920925
bytes: self.items.into_owned(),

compiler/pavexc/src/rustdoc/queries.rs

Lines changed: 128 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,15 @@ use guppy::{PackageId, Version};
1111
use indexmap::IndexSet;
1212
use rayon::iter::IntoParallelRefIterator;
1313
use rkyv::collections::swiss_table::ArchivedHashMap;
14+
use rkyv::hash::FxHasher64;
1415
use rkyv::rancor::Panic;
1516
use rkyv::string::ArchivedString;
1617
use rkyv::util::AlignedVec;
1718
use rkyv::vec::ArchivedVec;
1819
use rustc_hash::FxHashMap;
1920
use rustdoc_types::{
20-
ArchivedId, ArchivedItem, ExternalCrate, Item, ItemEnum, ItemKind, ItemSummary, Visibility,
21+
ArchivedId, ArchivedItem, ArchivedItemSummary, ExternalCrate, Item, ItemEnum, ItemKind,
22+
ItemSummary, Visibility,
2123
};
2224
use tracing::Span;
2325
use tracing_log_error::log_error;
@@ -989,13 +991,131 @@ pub(crate) struct CrateData {
989991
pub external_crates: FxHashMap<u32, ExternalCrate>,
990992
/// A mapping from the id of a type to its fully qualified path.
991993
/// Primarily useful for foreign items that are being re-exported by this crate.
992-
#[allow(clippy::disallowed_types)]
993-
pub paths: FxHashMap<rustdoc_types::Id, ItemSummary>,
994+
pub paths: CrateItemPaths,
994995
/// The version of the JSON format used by rustdoc.
995996
pub format_version: u32,
996997
/// The index of all the items in the crate.
997998
pub index: CrateItemIndex,
998999
}
1000+
1001+
#[derive(Debug, Clone)]
1002+
/// A mapping from the id of a type to its fully qualified path.
1003+
///
1004+
/// Primarily useful for foreign items that are being re-exported by this crate.
1005+
pub(crate) enum CrateItemPaths {
1006+
Eager(EagerCrateItemPaths),
1007+
Lazy(LazyCrateItemPaths),
1008+
}
1009+
1010+
impl CrateItemPaths {
1011+
/// Retrieve an item summary from the index given its id.
1012+
pub fn get(&self, id: &rustdoc_types::Id) -> Option<Cow<'_, ItemSummary>> {
1013+
match self {
1014+
Self::Eager(m) => m.paths.get(id).map(Cow::Borrowed),
1015+
Self::Lazy(m) => {
1016+
let item = m.get_deserialized(id)?;
1017+
Some(Cow::Owned(item))
1018+
}
1019+
}
1020+
}
1021+
1022+
pub fn iter(&self) -> impl Iterator<Item = (rustdoc_types::Id, ItemSummaryRef<'_>)> {
1023+
match self {
1024+
CrateItemPaths::Eager(paths) => CrateItemPathsIter::Eager(paths.paths.iter()),
1025+
CrateItemPaths::Lazy(paths) => CrateItemPathsIter::Lazy(paths.archived().iter()),
1026+
}
1027+
}
1028+
}
1029+
1030+
pub enum CrateItemPathsIter<'a> {
1031+
Eager(std::collections::hash_map::Iter<'a, rustdoc_types::Id, ItemSummary>),
1032+
Lazy(
1033+
rkyv::collections::swiss_table::map::Iter<'a, ArchivedId, ArchivedItemSummary, FxHasher64>,
1034+
),
1035+
}
1036+
1037+
pub enum ItemSummaryRef<'a> {
1038+
Eager(&'a ItemSummary),
1039+
Lazy(&'a ArchivedItemSummary),
1040+
}
1041+
1042+
impl<'a> ItemSummaryRef<'a> {
1043+
pub fn crate_id(&self) -> u32 {
1044+
match self {
1045+
ItemSummaryRef::Eager(s) => s.crate_id,
1046+
ItemSummaryRef::Lazy(s) => s.crate_id.to_native(),
1047+
}
1048+
}
1049+
1050+
pub fn kind(&self) -> ItemKind {
1051+
match self {
1052+
ItemSummaryRef::Eager(s) => s.kind,
1053+
ItemSummaryRef::Lazy(s) => {
1054+
// Safe to do since the enum is repr(u8)
1055+
rkyv::deserialize::<_, rkyv::rancor::Infallible>(&s.kind).unwrap()
1056+
}
1057+
}
1058+
}
1059+
1060+
pub fn path(&self) -> Cow<'_, [String]> {
1061+
match self {
1062+
ItemSummaryRef::Eager(s) => Cow::Borrowed(&s.path),
1063+
ItemSummaryRef::Lazy(s) => {
1064+
Cow::Owned(s.path.iter().map(|s| s.as_str().to_owned()).collect())
1065+
}
1066+
}
1067+
}
1068+
}
1069+
1070+
impl<'a> Iterator for CrateItemPathsIter<'a> {
1071+
type Item = (rustdoc_types::Id, ItemSummaryRef<'a>);
1072+
1073+
fn next(&mut self) -> Option<Self::Item> {
1074+
match self {
1075+
Self::Eager(iter) => iter.next().map(|(k, v)| (*k, ItemSummaryRef::Eager(v))),
1076+
Self::Lazy(iter) => iter
1077+
.next()
1078+
.map(|(k, v)| (rustdoc_types::Id(k.0.to_native()), ItemSummaryRef::Lazy(v))),
1079+
}
1080+
}
1081+
}
1082+
1083+
#[derive(Debug, Clone)]
1084+
/// See [`CrateItemPaths`] for more information.
1085+
pub(crate) struct EagerCrateItemPaths {
1086+
#[allow(clippy::disallowed_types)]
1087+
pub paths: FxHashMap<rustdoc_types::Id, ItemSummary>,
1088+
}
1089+
1090+
/// See [`CrateItemPaths`] for more information.
1091+
#[derive(Debug, Clone)]
1092+
pub(crate) struct LazyCrateItemPaths {
1093+
pub(super) bytes: AlignedVec,
1094+
}
1095+
1096+
impl LazyCrateItemPaths {
1097+
/// Get zero-copy access to the archived HashMap.
1098+
#[inline]
1099+
fn archived(&self) -> &ArchivedHashMap<ArchivedId, ArchivedItemSummary> {
1100+
// SAFETY: The bytes were serialized by rkyv from a valid HashMap<Id, ItemSummary>.
1101+
// We trust the cache to contain valid data.
1102+
unsafe {
1103+
rkyv::access_unchecked::<ArchivedHashMap<ArchivedId, ArchivedItemSummary>>(&self.bytes)
1104+
}
1105+
}
1106+
1107+
/// Get an item by its ID, returning a reference to the archived summary.
1108+
pub fn get(&self, id: &rustdoc_types::Id) -> Option<&ArchivedItemSummary> {
1109+
self.archived().get(&ArchivedId(id.0.into()))
1110+
}
1111+
1112+
/// Deserialize a summary by its ID.
1113+
pub fn get_deserialized(&self, id: &rustdoc_types::Id) -> Option<ItemSummary> {
1114+
let archived = self.get(id)?;
1115+
Some(rkyv::deserialize::<ItemSummary, Panic>(archived).unwrap())
1116+
}
1117+
}
1118+
9991119
#[derive(Debug, Clone)]
10001120
/// The index of all the items in the crate.
10011121
///
@@ -1107,7 +1227,7 @@ impl Crate {
11071227
index: CrateItemIndex::Eager(EagerCrateItemIndex { index: krate.index }),
11081228
external_crates: krate.external_crates,
11091229
format_version: krate.format_version,
1110-
paths: krate.paths,
1230+
paths: CrateItemPaths::Eager(EagerCrateItemPaths { paths: krate.paths }),
11111231
};
11121232
Self::index(crate_data, package_id, diagnostics)
11131233
}
@@ -1123,18 +1243,18 @@ impl Crate {
11231243
.iter()
11241244
.filter_map(|(id, summary)| {
11251245
// We only want types, no macros
1126-
if matches!(summary.kind, ItemKind::Macro | ItemKind::ProcDerive) {
1246+
if matches!(summary.kind(), ItemKind::Macro | ItemKind::ProcDerive) {
11271247
return None;
11281248
}
11291249
// We will index local items on our own.
11301250
// We don't get them from `paths` because it may include private items
11311251
// as well, and we don't have a way to figure out if an item is private
11321252
// or not from the summary info.
1133-
if summary.crate_id == 0 {
1253+
if summary.crate_id() == 0 {
11341254
return None;
11351255
}
11361256

1137-
Some((summary.path.clone(), id.to_owned()))
1257+
Some((summary.path().into_owned(), id.to_owned()))
11381258
})
11391259
.collect();
11401260

@@ -1312,7 +1432,7 @@ impl Crate {
13121432
fn get_summary_by_local_type_id(
13131433
&self,
13141434
id: &rustdoc_types::Id,
1315-
) -> Result<&rustdoc_types::ItemSummary, anyhow::Error> {
1435+
) -> Result<Cow<'_, rustdoc_types::ItemSummary>, anyhow::Error> {
13161436
self.core.krate.paths.get(id).ok_or_else(|| {
13171437
anyhow!(
13181438
"Failed to look up the type id `{}` in the rustdoc's path index for `{}`. \

0 commit comments

Comments
 (0)