Skip to content

Commit c8abe3c

Browse files
authored
refactor: improve persistent cache load error handling (#13608)
* refactor: remove useless BuildModuleGraphArtifact state * refactor: improve persistent cache load error handling * feat: add trace log in cache context
1 parent 4847e20 commit c8abe3c

File tree

15 files changed

+466
-215
lines changed

15 files changed

+466
-215
lines changed

crates/rspack_core/src/artifacts/build_module_graph_artifact.rs

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,6 @@ use crate::{
1313
utils::{FileCounter, ResourceId},
1414
};
1515

16-
/// Enum used to mark whether module graph has been built.
17-
///
18-
/// The persistent cache will recovery `MakeArtifact` when `MakeArtifact.state` is `Uninitialized`.
19-
/// Make stage will update `MakeArtifact.state` to `Initialized`, and incremental rebuild will reuse
20-
/// the previous MakeArtifact, so persistent cache will never recovery again.
21-
#[derive(Debug, Default)]
22-
pub enum BuildModuleGraphArtifactState {
23-
#[default]
24-
Uninitialized,
25-
Initialized,
26-
}
27-
2816
/// Make Artifact, including all side effects of the make stage.
2917
#[derive(Debug)]
3018
pub struct BuildModuleGraphArtifact {
@@ -43,11 +31,6 @@ pub struct BuildModuleGraphArtifact {
4331
pub issuer_update_modules: IdentifierSet,
4432

4533
// data
46-
/// Field to mark whether artifact has been initialized.
47-
///
48-
/// Only `BuildModuleGraphArtifact::new()` is Uninitialized, `update_module_graph` will set this field to Initialized
49-
/// Persistent cache will update BuildModuleGraphArtifact and set force_build_deps to this field when this is Uninitialized.
50-
pub state: BuildModuleGraphArtifactState,
5134
/// Module graph data
5235
pub module_graph: ModuleGraph,
5336
pub side_effects_state_artifact: SideEffectsStateArtifact,
@@ -77,7 +60,6 @@ impl BuildModuleGraphArtifact {
7760
affected_modules: Default::default(),
7861
affected_dependencies: Default::default(),
7962
issuer_update_modules: Default::default(),
80-
state: BuildModuleGraphArtifactState::Uninitialized,
8163
module_graph: Default::default(),
8264
side_effects_state_artifact: Default::default(),
8365
module_to_lazy_make: Default::default(),

crates/rspack_core/src/cache/persistent/build_dependencies/mod.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ impl BuildDeps {
4747
}
4848
}
4949

50+
/// Reset build dependencies scope in storage
51+
pub fn reset(&self, storage: &mut dyn Storage) {
52+
storage.reset(SnapshotScope::BUILD.name());
53+
}
54+
5055
/// Add build dependencies
5156
///
5257
/// For performance reasons, recursive searches will stop for build dependencies in node_modules.
@@ -176,7 +181,7 @@ mod test {
176181
.await
177182
.expect("should validate success");
178183
assert!(!validate_result);
179-
storage.reset().await;
184+
storage.reset(scope);
180185

181186
let data = storage.load(scope).await.expect("should load success");
182187
assert_eq!(data.len(), 0);
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
use rspack_paths::{ArcPath, ArcPathSet};
2+
3+
use super::{
4+
build_dependencies::BuildDeps,
5+
occasion::Occasion,
6+
snapshot::{Snapshot, SnapshotScope},
7+
storage::BoxStorage,
8+
};
9+
10+
/// Per-build runtime state shared across all cache operations.
11+
///
12+
/// `load_failed` gates every `load_*` call in a single build: once any
13+
/// load fails it is set to `true` and all subsequent loads are skipped.
14+
/// Call [`CacheContext::reset`] at the end of each build to prepare the
15+
/// context for the next one.
16+
#[derive(Debug)]
17+
pub struct CacheContext {
18+
/// Set when build dependencies have changed, meaning the cached data is
19+
/// structurally stale. Unlike `load_failed`, this flag persists across
20+
/// builds in readonly mode because the cache cannot be rebuilt there.
21+
invalid: bool,
22+
/// Per-build load gate. Flipped to `true` on the first failed `load_*`
23+
/// call; all subsequent `load_*` calls become no-ops for this build.
24+
/// Restored to `false` (or derived from `invalid`) by `reset`.
25+
load_failed: bool,
26+
/// When `true`, all `save_*` and scope `reset` calls to storage are skipped.
27+
///
28+
/// This is a user-configured option, distinct from `DB::readonly` in the
29+
/// storage layer. Skipping at this level is cheaper: occasion serialisation
30+
/// and snapshot diffing are never executed, whereas `DB::readonly` only
31+
/// suppresses the final disk write after all that work has already been done.
32+
readonly: bool,
33+
// TODO replace with a logger and emit warnings directly.
34+
warnings: Vec<String>,
35+
storage: BoxStorage,
36+
}
37+
38+
impl CacheContext {
39+
pub fn new(storage: BoxStorage, readonly: bool) -> Self {
40+
Self {
41+
invalid: false,
42+
load_failed: false,
43+
readonly,
44+
warnings: Default::default(),
45+
storage,
46+
}
47+
}
48+
49+
/// Validates build dependencies and sets `invalid` + `load_failed` on
50+
/// failure. Resets the BUILD scope when invalid and not readonly.
51+
///
52+
/// Normally called only once per compiler instance, guarded by the
53+
/// `initialized` flag in `PersistentCache::initialize`.
54+
#[tracing::instrument("Cache::Context::load_build_deps", skip_all)]
55+
pub async fn load_build_deps(&mut self, build_deps: &mut BuildDeps) {
56+
match build_deps.validate(&*self.storage).await {
57+
Ok(is_success) => {
58+
self.invalid = !is_success;
59+
if self.invalid {
60+
self.load_failed = true;
61+
tracing::debug!("build deps changed, cache invalidated");
62+
}
63+
}
64+
Err(err) => {
65+
self.load_failed = true;
66+
self.warnings.push(err.to_string());
67+
tracing::warn!("build deps validation failed: {err}");
68+
}
69+
}
70+
if self.load_failed && !self.readonly {
71+
build_deps.reset(&mut *self.storage);
72+
}
73+
}
74+
75+
/// Saves build dependency hashes. No-op in readonly mode.
76+
#[tracing::instrument("Cache::Context::save_build_deps", skip_all)]
77+
pub async fn save_build_deps(
78+
&mut self,
79+
build_deps: &mut BuildDeps,
80+
added: impl Iterator<Item = ArcPath>,
81+
) {
82+
if self.readonly {
83+
return;
84+
}
85+
86+
self
87+
.warnings
88+
.extend(build_deps.add(&mut *self.storage, added).await);
89+
}
90+
91+
/// Computes modified/removed paths from all snapshot scopes.
92+
///
93+
/// Returns `None` when the cache is invalid or any scope fails to load.
94+
/// On failure all snapshot scopes are reset (unless readonly) so they
95+
/// are fully rewritten this build.
96+
#[tracing::instrument("Cache::Context::load_snapshot", skip_all)]
97+
pub async fn load_snapshot(
98+
&mut self,
99+
snapshot: &Snapshot,
100+
) -> Option<(bool, ArcPathSet, ArcPathSet)> {
101+
if !self.load_failed {
102+
let mut is_hot_start = false;
103+
let mut modified_paths = ArcPathSet::default();
104+
let mut removed_paths = ArcPathSet::default();
105+
let data = vec![
106+
snapshot
107+
.calc_modified_paths(&*self.storage, SnapshotScope::FILE)
108+
.await,
109+
snapshot
110+
.calc_modified_paths(&*self.storage, SnapshotScope::CONTEXT)
111+
.await,
112+
snapshot
113+
.calc_modified_paths(&*self.storage, SnapshotScope::MISSING)
114+
.await,
115+
];
116+
for item in data {
117+
match item {
118+
Ok((a, b, c, _)) => {
119+
is_hot_start = is_hot_start || a;
120+
modified_paths.extend(b);
121+
removed_paths.extend(c);
122+
}
123+
Err(err) => {
124+
self.warnings.push(err.to_string());
125+
self.load_failed = true;
126+
tracing::warn!("snapshot scope load failed: {err}");
127+
}
128+
}
129+
}
130+
if !self.load_failed {
131+
tracing::debug!(
132+
is_hot_start,
133+
modified = modified_paths.len(),
134+
removed = removed_paths.len(),
135+
"snapshot loaded"
136+
);
137+
return Some((is_hot_start, modified_paths, removed_paths));
138+
}
139+
}
140+
141+
// load_failed: reset snapshot scopes so they are fully rewritten this build.
142+
if !self.readonly {
143+
snapshot.reset(&mut *self.storage);
144+
}
145+
None
146+
}
147+
148+
/// Persists snapshot data for all three scopes. No-op in readonly mode.
149+
#[tracing::instrument("Cache::Context::save_snapshot", skip_all)]
150+
pub async fn save_snapshot(
151+
&mut self,
152+
snapshot: &Snapshot,
153+
file_deps: (impl Iterator<Item = ArcPath>, impl Iterator<Item = ArcPath>),
154+
context_deps: (impl Iterator<Item = ArcPath>, impl Iterator<Item = ArcPath>),
155+
missing_deps: (impl Iterator<Item = ArcPath>, impl Iterator<Item = ArcPath>),
156+
) {
157+
if self.readonly {
158+
return;
159+
}
160+
161+
let (file_added, file_removed) = file_deps;
162+
let (context_added, context_removed) = context_deps;
163+
let (missing_added, missing_removed) = missing_deps;
164+
snapshot.remove(&mut *self.storage, SnapshotScope::FILE, file_removed);
165+
snapshot.remove(&mut *self.storage, SnapshotScope::CONTEXT, context_removed);
166+
snapshot.remove(&mut *self.storage, SnapshotScope::MISSING, missing_removed);
167+
snapshot
168+
.add(&mut *self.storage, SnapshotScope::FILE, file_added)
169+
.await;
170+
snapshot
171+
.add(&mut *self.storage, SnapshotScope::CONTEXT, context_added)
172+
.await;
173+
snapshot
174+
.add(&mut *self.storage, SnapshotScope::MISSING, missing_added)
175+
.await;
176+
}
177+
178+
/// Loads an occasion's artifact from storage.
179+
///
180+
/// Returns `None` and resets the occasion's scope when the cache is
181+
/// invalid or recovery fails.
182+
#[tracing::instrument("Cache::Context::load_occasion", skip_all)]
183+
pub async fn load_occasion<O: Occasion>(&mut self, occasion: &O) -> Option<O::Artifact> {
184+
if !self.load_failed {
185+
match occasion.recovery(&*self.storage).await {
186+
Ok(artifact) => {
187+
tracing::debug!("occasion recovery succeeded");
188+
return Some(artifact);
189+
}
190+
Err(err) => {
191+
self.warnings.push(err.to_string());
192+
self.load_failed = true;
193+
tracing::warn!("occasion recovery failed: {err}");
194+
}
195+
}
196+
}
197+
if !self.readonly {
198+
occasion.reset(&mut *self.storage);
199+
}
200+
None
201+
}
202+
203+
/// Persists an occasion's artifact. No-op in readonly mode.
204+
#[tracing::instrument("Cache::Context::save_occasion", skip_all)]
205+
pub fn save_occasion<O: Occasion>(&mut self, occasion: &O, artifact: &O::Artifact) {
206+
if self.readonly {
207+
return;
208+
}
209+
210+
occasion.save(&mut *self.storage, artifact);
211+
}
212+
213+
/// Enqueues a background persistence flush. No-op in readonly mode.
214+
///
215+
/// The write completes asynchronously; call [`CacheContext::flush_storage`]
216+
/// to wait for it.
217+
pub fn save_storage(&mut self) {
218+
if self.readonly {
219+
return;
220+
}
221+
222+
self.storage.save();
223+
}
224+
225+
/// Waits for all background storage writes to complete.
226+
///
227+
/// Must be called before process exit to avoid losing buffered data.
228+
pub async fn flush_storage(&self) {
229+
self.storage.flush().await
230+
}
231+
232+
/// Resets per-build state and returns accumulated warnings.
233+
///
234+
/// In non-readonly mode both flags are cleared; scope resets done during
235+
/// this build ensure a clean slate next time.
236+
///
237+
/// In readonly mode `invalid` is preserved (the cache is still stale and
238+
/// cannot be rebuilt), so `load_failed` is derived from it — stale-cache
239+
/// loads are skipped on the next build as well. Transient errors
240+
/// (`load_failed` without `invalid`) are cleared so the next build retries.
241+
pub fn reset(&mut self) -> Vec<String> {
242+
if !self.readonly {
243+
self.invalid = false;
244+
self.load_failed = false
245+
} else {
246+
self.load_failed = self.invalid;
247+
}
248+
std::mem::take(&mut self.warnings)
249+
}
250+
}

0 commit comments

Comments
 (0)