Skip to content

Commit a9bfc64

Browse files
authored
[Turbopack] handle state serialization (#69670)
### What? State need some special handling for persistent caching. In particular we need to differ between two kind of states: * Persistent State: It will be stored in the persistent cache and value will be kept between builds. * Transient State: It's only valid for a session and will reset when restoring the persistent cache. We didn't have the separating before, so this PR introduces a new type `TransientState<T>` next to `State<T>` which handles transient state. The value will always be an `Option<T>` and resets to `None` when restoring the persistent cache. This also means all task that depend on transient state will be invalidated on restoring. Transient State can also be used when the value is not serializable. e. g. this is the case for the `last_successful_parse` state in ecmascript modules. We use that to avoid large structure changes to the module graph when introducing parse errors to modules. Using transient state for that means we only apply this optimization on a session, and it resets when restoring from persistent cache.
1 parent 87e91d5 commit a9bfc64

File tree

10 files changed

+391
-73
lines changed

10 files changed

+391
-73
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

turbopack/crates/turbo-tasks-fs/src/lib.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ use tokio::{
5858
};
5959
use tracing::Instrument;
6060
use turbo_tasks::{
61-
mark_stateful, trace::TraceRawVcs, Completion, Invalidator, RcStr, ReadRef, ValueToString, Vc,
61+
mark_stateful, trace::TraceRawVcs, Completion, Invalidator, RcStr, ReadRef,
62+
SerializationInvalidator, ValueToString, Vc,
6263
};
6364
use turbo_tasks_hash::{hash_xxh3_hash64, DeterministicHash, DeterministicHasher};
6465
use util::{extract_disk_access, join_path, normalize_path, sys_to_unix, unix_to_sys};
@@ -106,6 +107,8 @@ pub struct DiskFileSystem {
106107
invalidator_map: Arc<InvalidatorMap>,
107108
#[turbo_tasks(debug_ignore, trace_ignore)]
108109
dir_invalidator_map: Arc<InvalidatorMap>,
110+
#[turbo_tasks(debug_ignore, trace_ignore)]
111+
serialization_invalidator: SerializationInvalidator,
109112
/// Lock that makes invalidation atomic. It will keep a write lock during
110113
/// watcher invalidation and a read lock during other operations.
111114
#[turbo_tasks(debug_ignore, trace_ignore)]
@@ -126,6 +129,7 @@ impl DiskFileSystem {
126129
fn register_invalidator(&self, path: &Path) -> Result<()> {
127130
let invalidator = turbo_tasks::get_invalidator();
128131
self.invalidator_map.insert(path_to_key(path), invalidator);
132+
self.serialization_invalidator.invalidate();
129133
#[cfg(not(any(target_os = "macos", target_os = "windows")))]
130134
if let Some(dir) = path.parent() {
131135
self.watcher.ensure_watching(dir, self.root_path())?;
@@ -140,6 +144,8 @@ impl DiskFileSystem {
140144
let invalidator = turbo_tasks::get_invalidator();
141145
let mut invalidator_map = self.invalidator_map.lock().unwrap();
142146
let old_invalidators = invalidator_map.insert(path_to_key(path), [invalidator].into());
147+
drop(invalidator_map);
148+
self.serialization_invalidator.invalidate();
143149
#[cfg(not(any(target_os = "macos", target_os = "windows")))]
144150
if let Some(dir) = path.parent() {
145151
self.watcher.ensure_watching(dir, self.root_path())?;
@@ -153,6 +159,7 @@ impl DiskFileSystem {
153159
let invalidator = turbo_tasks::get_invalidator();
154160
self.dir_invalidator_map
155161
.insert(path_to_key(path), invalidator);
162+
self.serialization_invalidator.invalidate();
156163
#[cfg(not(any(target_os = "macos", target_os = "windows")))]
157164
self.watcher.ensure_watching(path, self.root_path())?;
158165
Ok(())
@@ -172,6 +179,7 @@ impl DiskFileSystem {
172179
for (_, invalidators) in take(&mut *self.dir_invalidator_map.lock().unwrap()).into_iter() {
173180
invalidators.into_iter().for_each(|i| i.invalidate());
174181
}
182+
self.serialization_invalidator.invalidate();
175183
}
176184

177185
pub fn invalidate_with_reason(&self) {
@@ -189,6 +197,7 @@ impl DiskFileSystem {
189197
.into_iter()
190198
.for_each(|i| i.invalidate_with_reason(reason.clone()));
191199
}
200+
self.serialization_invalidator.invalidate();
192201
}
193202

194203
pub fn start_watching(&self) -> Result<()> {
@@ -217,6 +226,7 @@ impl DiskFileSystem {
217226
invalidator_map,
218227
dir_invalidator_map,
219228
)?;
229+
self.serialization_invalidator.invalidate();
220230

221231
Ok(())
222232
}
@@ -293,7 +303,7 @@ impl DiskFileSystem {
293303
/// ignore specific subpaths from each.
294304
#[turbo_tasks::function]
295305
pub async fn new(name: RcStr, root: RcStr, ignored_subpaths: Vec<RcStr>) -> Result<Vc<Self>> {
296-
mark_stateful();
306+
let serialization_invalidator = mark_stateful();
297307
// create the directory for the filesystem on disk, if it doesn't exist
298308
fs::create_dir_all(&root).await?;
299309

@@ -304,6 +314,7 @@ impl DiskFileSystem {
304314
invalidation_lock: Default::default(),
305315
invalidator_map: Arc::new(InvalidatorMap::new()),
306316
dir_invalidator_map: Arc::new(InvalidatorMap::new()),
317+
serialization_invalidator,
307318
watcher: Arc::new(DiskWatcher::new(
308319
ignored_subpaths.into_iter().map(PathBuf::from).collect(),
309320
)),
@@ -561,6 +572,7 @@ impl FileSystem for DiskFileSystem {
561572
for i in old_invalidators {
562573
self.invalidator_map.insert(key.clone(), i);
563574
}
575+
self.serialization_invalidator.invalidate();
564576
}
565577
return Ok(Completion::unchanged());
566578
}

turbopack/crates/turbo-tasks-testing/src/lib.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,10 @@ impl TurboTasksApi for VcStorage {
177177
unreachable!()
178178
}
179179

180+
fn invalidate_serialization(&self, _task: TaskId) {
181+
// ingore
182+
}
183+
180184
fn notify_scheduled_tasks(&self) {
181185
// ignore
182186
}
@@ -309,6 +313,10 @@ impl TurboTasksApi for VcStorage {
309313
// no-op
310314
}
311315

316+
fn mark_own_task_as_dirty_when_persisted(&self, _task: TaskId) {
317+
// no-op
318+
}
319+
312320
fn detached_for_testing(
313321
&self,
314322
_f: std::pin::Pin<Box<dyn Future<Output = Result<()>> + Send + 'static>>,

turbopack/crates/turbo-tasks/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ futures = { workspace = true }
2828
indexmap = { workspace = true, features = ["serde"] }
2929
mopa = "0.2.0"
3030
once_cell = { workspace = true }
31-
parking_lot = { workspace = true }
31+
parking_lot = { workspace = true, features = ["serde"]}
3232
pin-project-lite = { workspace = true }
3333
regex = { workspace = true }
3434
rustc-hash = { workspace = true }

turbopack/crates/turbo-tasks/src/backend.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,13 @@ pub trait Backend: Sync + Send {
455455
fn invalidate_tasks(&self, tasks: &[TaskId], turbo_tasks: &dyn TurboTasksBackendApi<Self>);
456456
fn invalidate_tasks_set(&self, tasks: &TaskIdSet, turbo_tasks: &dyn TurboTasksBackendApi<Self>);
457457

458+
fn invalidate_serialization(
459+
&self,
460+
_task: TaskId,
461+
_turbo_tasks: &dyn TurboTasksBackendApi<Self>,
462+
) {
463+
}
464+
458465
fn get_task_description(&self, task: TaskId) -> String;
459466

460467
/// Task-local state that stored inside of [`TurboTasksBackendApi`]. Constructed with
@@ -623,6 +630,14 @@ pub trait Backend: Sync + Send {
623630
// Do nothing by default
624631
}
625632

633+
fn mark_own_task_as_dirty_when_persisted(
634+
&self,
635+
_task: TaskId,
636+
_turbo_tasks: &dyn TurboTasksBackendApi<Self>,
637+
) {
638+
// Do nothing by default
639+
}
640+
626641
fn create_transient_task(
627642
&self,
628643
task_type: TransientTaskType,

turbopack/crates/turbo-tasks/src/lib.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ mod raw_vc;
6262
mod rcstr;
6363
mod read_ref;
6464
pub mod registry;
65+
mod serialization_invalidation;
6566
pub mod small_duration;
6667
mod state;
6768
pub mod task;
@@ -91,16 +92,17 @@ pub use invalidation::{
9192
pub use join_iter_ext::{JoinIterExt, TryFlatJoinIterExt, TryJoinIterExt};
9293
pub use magic_any::MagicAny;
9394
pub use manager::{
94-
dynamic_call, dynamic_this_call, emit, mark_finished, mark_stateful, prevent_gc, run_once,
95-
run_once_with_reason, spawn_blocking, spawn_thread, trait_call, turbo_tasks, CurrentCellRef,
96-
ReadConsistency, TaskPersistence, TurboTasks, TurboTasksApi, TurboTasksBackendApi,
97-
TurboTasksBackendApiExt, TurboTasksCallApi, Unused, UpdateInfo,
95+
dynamic_call, dynamic_this_call, emit, mark_dirty_when_persisted, mark_finished, mark_stateful,
96+
prevent_gc, run_once, run_once_with_reason, spawn_blocking, spawn_thread, trait_call,
97+
turbo_tasks, CurrentCellRef, ReadConsistency, TaskPersistence, TurboTasks, TurboTasksApi,
98+
TurboTasksBackendApi, TurboTasksBackendApiExt, TurboTasksCallApi, Unused, UpdateInfo,
9899
};
99100
pub use native_function::{FunctionMeta, NativeFunction};
100101
pub use raw_vc::{CellId, RawVc, ReadRawVcFuture, ResolveTypeError};
101102
pub use read_ref::ReadRef;
102103
use rustc_hash::FxHasher;
103-
pub use state::State;
104+
pub use serialization_invalidation::SerializationInvalidator;
105+
pub use state::{State, TransientState};
104106
pub use task::{task_input::TaskInput, SharedReference};
105107
pub use trait_ref::{IntoTraitRef, TraitRef};
106108
pub use turbo_tasks_macros::{function, value, value_impl, value_trait, TaskInput};

turbopack/crates/turbo-tasks/src/manager.rs

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ use crate::{
3939
magic_any::MagicAny,
4040
raw_vc::{CellId, RawVc},
4141
registry::{self, get_function},
42+
serialization_invalidation::SerializationInvalidator,
4243
task::shared_reference::TypedSharedReference,
4344
trace::TraceRawVcs,
4445
trait_helpers::get_trait_method,
@@ -115,6 +116,8 @@ pub trait TurboTasksApi: TurboTasksCallApi + Sync + Send {
115116
fn invalidate(&self, task: TaskId);
116117
fn invalidate_with_reason(&self, task: TaskId, reason: StaticOrArc<dyn InvalidationReason>);
117118

119+
fn invalidate_serialization(&self, task: TaskId);
120+
118121
/// Eagerly notifies all tasks that were scheduled for notifications via
119122
/// `schedule_notify_tasks_set()`
120123
fn notify_scheduled_tasks(&self);
@@ -180,6 +183,7 @@ pub trait TurboTasksApi: TurboTasksCallApi + Sync + Send {
180183
fn read_own_task_cell(&self, task: TaskId, index: CellId) -> Result<TypedCellContent>;
181184
fn update_own_task_cell(&self, task: TaskId, index: CellId, content: CellContent);
182185
fn mark_own_task_as_finished(&self, task: TaskId);
186+
fn mark_own_task_as_dirty_when_persisted(&self, task: TaskId);
183187

184188
fn connect_task(&self, task: TaskId);
185189

@@ -1256,6 +1260,10 @@ impl<B: Backend + 'static> TurboTasksApi for TurboTasks<B> {
12561260
self.backend.invalidate_task(task, self);
12571261
}
12581262

1263+
fn invalidate_serialization(&self, task: TaskId) {
1264+
self.backend.invalidate_serialization(task, self);
1265+
}
1266+
12591267
fn notify_scheduled_tasks(&self) {
12601268
let _ = CURRENT_GLOBAL_TASK_STATE.try_with(|cell| {
12611269
let tasks = {
@@ -1395,6 +1403,11 @@ impl<B: Backend + 'static> TurboTasksApi for TurboTasks<B> {
13951403
self.backend.mark_own_task_as_finished(task, self);
13961404
}
13971405

1406+
fn mark_own_task_as_dirty_when_persisted(&self, task: TaskId) {
1407+
self.backend
1408+
.mark_own_task_as_dirty_when_persisted(task, self);
1409+
}
1410+
13981411
/// Creates a future that inherits the current task id and task state. The current global task
13991412
/// will wait for this future to be dropped before exiting.
14001413
fn detached_for_testing(
@@ -1677,6 +1690,15 @@ pub fn current_task_for_testing() -> TaskId {
16771690
CURRENT_GLOBAL_TASK_STATE.with(|ts| ts.read().unwrap().task_id)
16781691
}
16791692

1693+
/// Marks the current task as dirty when restored from persistent cache.
1694+
pub fn mark_dirty_when_persisted() {
1695+
with_turbo_tasks(|tt| {
1696+
tt.mark_own_task_as_dirty_when_persisted(current_task(
1697+
"turbo_tasks::mark_dirty_when_persisted()",
1698+
))
1699+
});
1700+
}
1701+
16801702
/// Marks the current task as finished. This excludes it from waiting for
16811703
/// strongly consistency.
16821704
pub fn mark_finished() {
@@ -1687,10 +1709,15 @@ pub fn mark_finished() {
16871709

16881710
/// Marks the current task as stateful. This prevents the tasks from being
16891711
/// dropped without persisting the state.
1690-
pub fn mark_stateful() {
1712+
/// Returns a [`SerializationInvalidator`] that can be used to invalidate the
1713+
/// serialization of the current task cells
1714+
pub fn mark_stateful() -> SerializationInvalidator {
16911715
CURRENT_GLOBAL_TASK_STATE.with(|cell| {
1692-
let CurrentGlobalTaskState { stateful, .. } = &mut *cell.write().unwrap();
1716+
let CurrentGlobalTaskState {
1717+
stateful, task_id, ..
1718+
} = &mut *cell.write().unwrap();
16931719
*stateful = true;
1720+
SerializationInvalidator::new(*task_id)
16941721
})
16951722
}
16961723

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
use std::{
2+
hash::Hash,
3+
sync::{Arc, Weak},
4+
};
5+
6+
use serde::{de::Visitor, Deserialize, Serialize};
7+
use tokio::runtime::Handle;
8+
9+
use crate::{manager::with_turbo_tasks, trace::TraceRawVcs, TaskId, TurboTasksApi};
10+
11+
pub struct SerializationInvalidator {
12+
task: TaskId,
13+
turbo_tasks: Weak<dyn TurboTasksApi>,
14+
handle: Handle,
15+
}
16+
17+
impl Hash for SerializationInvalidator {
18+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
19+
self.task.hash(state);
20+
}
21+
}
22+
23+
impl PartialEq for SerializationInvalidator {
24+
fn eq(&self, other: &Self) -> bool {
25+
self.task == other.task
26+
}
27+
}
28+
29+
impl Eq for SerializationInvalidator {}
30+
31+
impl SerializationInvalidator {
32+
pub fn invalidate(&self) {
33+
let SerializationInvalidator {
34+
task,
35+
turbo_tasks,
36+
handle,
37+
} = self;
38+
let _ = handle.enter();
39+
if let Some(turbo_tasks) = turbo_tasks.upgrade() {
40+
turbo_tasks.invalidate_serialization(*task);
41+
}
42+
}
43+
44+
pub(crate) fn new(task_id: TaskId) -> Self {
45+
Self {
46+
task: task_id,
47+
turbo_tasks: with_turbo_tasks(Arc::downgrade),
48+
handle: Handle::current(),
49+
}
50+
}
51+
}
52+
53+
impl TraceRawVcs for SerializationInvalidator {
54+
fn trace_raw_vcs(&self, _context: &mut crate::trace::TraceRawVcsContext) {
55+
// nothing here
56+
}
57+
}
58+
59+
impl Serialize for SerializationInvalidator {
60+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
61+
where
62+
S: serde::Serializer,
63+
{
64+
serializer.serialize_newtype_struct("SerializationInvalidator", &self.task)
65+
}
66+
}
67+
68+
impl<'de> Deserialize<'de> for SerializationInvalidator {
69+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
70+
where
71+
D: serde::Deserializer<'de>,
72+
{
73+
struct V;
74+
75+
impl<'de> Visitor<'de> for V {
76+
type Value = SerializationInvalidator;
77+
78+
fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
79+
write!(f, "an SerializationInvalidator")
80+
}
81+
82+
fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
83+
where
84+
D: serde::Deserializer<'de>,
85+
{
86+
Ok(SerializationInvalidator {
87+
task: TaskId::deserialize(deserializer)?,
88+
turbo_tasks: with_turbo_tasks(Arc::downgrade),
89+
handle: tokio::runtime::Handle::current(),
90+
})
91+
}
92+
}
93+
deserializer.deserialize_newtype_struct("SerializationInvalidator", V)
94+
}
95+
}

0 commit comments

Comments
 (0)