Skip to content

Commit 0ad1311

Browse files
authored
Turbopack: Rework watcher data structures, store watched list and watcher behind the same RwLock (#82258)
Reworks the data structures used for the watcher to achieve a few goals: - I want to avoid multiple locks, as I don't think we actually need two different locks, and multiple locks leaves more room for bugs (e.g. lock inversion). - The information about if we're in non-recursive mode or recursive mode needs to be available outside of the lock for performance reasons. - The `watching`/`watched` set needs to be ordered. We don't have a convenient concurrent BTreeSet or Trie/Radix Tree pulled in, and I'd like to avoid adding more dependencies. We end up locking `notify_watcher` anywhere we'd need a write lock anyways, so it might block a few reads, but otherwise it's not that bad. Fixes in this PR: - Fixes an issue where multiple concurrent calls to `ensure_watching` for the same directory could cause one of those calls to exit early. This may cause a file in that directory to be read before the filesystem watcher is set up, potentially missing events. - Improves the time complexity of `restore_if_watching` using an ordered set to avoid iterating over all of `watched` to find child directories. See `OrderedPathSetExt`. This time complexity problem is introduced in the previous PR, #82130. Other changes: - It turns out that we don't need to explicitly set up any watchers during `start_watching` except for the `root_dir`. If paths have already been read, they'll set up watchers before re-reading the file when we invalidate it! This simplifies things, and means there's less stuff we need to track if you never start the watcher (i.e. `next build`). - Added a `--start-watching-late` option to the fuzzer to test calling `start_watching` after all the initial reads happen, to help validate the safety of the claim in the previous bullet point. Minor (aesthetic) changes: - Use past tense: `watching` is now `watched` and `ensure_watching` is now `ensure_watched`, as the `watched` list includes both currently active paths *and* previously watched but deleted paths. - Shorten the names of non-public types, because we have a lot more of them, and the names would get long otherwise ## Benchmarking This replaces a `DashSet<_>` with (essentially) a `RwLock<BTreeSet<_>>`. There's some risk of increased contention, particularly in the read-only fast-path for `ensure_watched`/`ensure_watching`. Under the assumption that the most heavily lock-contended use-case for this code is after restoring from persistent cache on a large repository: 1. Check out `vercel-site` in `front`. 2. Hack the new tracing span onto the canary version. 3. Build a release version with `pnpm pack-next --project ~/front/apps/vercel-site --no-js-build -- --release` 4. `rm -rf .next` 5. `NEXT_TURBOPACK_TRACING='turbo_tasks_fs' TURBO_ENGINE_IGNORE_DIRTY=1 pnpm next dev --turbopack` 6. Load `http://localhost:3000/vercel`. Wait for it to fully finish loading. Stop the server with `ctrl+c`. 7. Restart the server, wait for it to finish initializing, and then exit. 8. Start the trace viewer: `pnpm next internal trace .next/trace-turbopack`, and look at the aggregated times of `ensure_watched`/`ensure_watching`. Results: before (canary): 313ms ensure_watching (2235x spans) after (this pr): 279ms ensure_watched (2235x spans) So no significant difference.
1 parent 78c544b commit 0ad1311

File tree

4 files changed

+343
-149
lines changed

4 files changed

+343
-149
lines changed

turbopack/crates/turbo-tasks-fs/src/lib.rs

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -261,11 +261,7 @@ impl DiskFileSystemInner {
261261
let invalidator = turbo_tasks::get_invalidator();
262262
self.invalidator_map
263263
.insert(path.to_owned(), invalidator, None);
264-
if let Some(non_recursive) = &self.watcher.non_recursive_state
265-
&& let Some(dir) = path.parent()
266-
{
267-
non_recursive.ensure_watching(&self.watcher, dir, self.root_path())?;
268-
}
264+
self.watcher.ensure_watched_file(path, self.root_path())?;
269265
Ok(())
270266
}
271267

@@ -291,11 +287,7 @@ impl DiskFileSystemInner {
291287
.collect::<Vec<_>>();
292288
invalidators.insert(invalidator, Some(write_content));
293289
drop(invalidator_map);
294-
if let Some(non_recursive) = &self.watcher.non_recursive_state
295-
&& let Some(dir) = path.parent()
296-
{
297-
non_recursive.ensure_watching(&self.watcher, dir, self.root_path())?;
298-
}
290+
self.watcher.ensure_watched_file(path, self.root_path())?;
299291
Ok(old_invalidators)
300292
}
301293

@@ -305,9 +297,7 @@ impl DiskFileSystemInner {
305297
let invalidator = turbo_tasks::get_invalidator();
306298
self.dir_invalidator_map
307299
.insert(path.to_owned(), invalidator, None);
308-
if let Some(non_recursive) = &self.watcher.non_recursive_state {
309-
non_recursive.ensure_watching(&self.watcher, path, self.root_path())?;
310-
}
300+
self.watcher.ensure_watched_dir(path, self.root_path())?;
311301
Ok(())
312302
}
313303

Lines changed: 86 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use std::{
2-
collections::{BTreeMap, btree_map::CursorMut},
2+
collections::{BTreeMap, BTreeSet, btree_map, btree_set},
33
ops::Bound,
44
path::{Path, PathBuf},
55
};
@@ -9,24 +9,31 @@ use std::{
99
///
1010
/// In the future, this may use a more efficient representation, like a radix tree or trie.
1111
pub trait OrderedPathMapExt<V> {
12-
fn extract_path_with_children<'a>(&'a mut self, path: &'a Path) -> ExtractWithChildren<'a, V>;
12+
fn extract_path_with_children<'a>(
13+
&'a mut self,
14+
path: &'a Path,
15+
) -> PathMapExtractPathWithChildren<'a, V>;
1316
}
1417

1518
impl<V> OrderedPathMapExt<V> for BTreeMap<PathBuf, V> {
16-
fn extract_path_with_children<'a>(&'a mut self, path: &'a Path) -> ExtractWithChildren<'a, V> {
17-
ExtractWithChildren {
19+
/// Iterates over and removes `path` and all of its children.
20+
fn extract_path_with_children<'a>(
21+
&'a mut self,
22+
path: &'a Path,
23+
) -> PathMapExtractPathWithChildren<'a, V> {
24+
PathMapExtractPathWithChildren {
1825
cursor: self.lower_bound_mut(Bound::Included(path)),
1926
parent_path: path,
2027
}
2128
}
2229
}
2330

24-
pub struct ExtractWithChildren<'a, V> {
25-
cursor: CursorMut<'a, PathBuf, V>,
31+
pub struct PathMapExtractPathWithChildren<'a, V> {
32+
cursor: btree_map::CursorMut<'a, PathBuf, V>,
2633
parent_path: &'a Path,
2734
}
2835

29-
impl<V> Iterator for ExtractWithChildren<'_, V> {
36+
impl<V> Iterator for PathMapExtractPathWithChildren<'_, V> {
3037
type Item = (PathBuf, V);
3138

3239
fn next(&mut self) -> Option<Self::Item> {
@@ -44,20 +51,61 @@ impl<V> Iterator for ExtractWithChildren<'_, V> {
4451
}
4552
}
4653

54+
/// A thin wrapper around [`BTreeSet<PathBuf>`] that provides efficient iteration of child paths.
55+
///
56+
/// In the future, this may use a more efficient representation, like a radix tree or trie.
57+
pub trait OrderedPathSetExt {
58+
/// Iterates over the children of `path`, excluding `path` itself.
59+
fn iter_path_children<'a>(&'a mut self, path: &'a Path) -> PathSetIterPathChildren<'a>;
60+
}
61+
62+
impl OrderedPathSetExt for BTreeSet<PathBuf> {
63+
fn iter_path_children<'a>(&'a mut self, path: &'a Path) -> PathSetIterPathChildren<'a> {
64+
PathSetIterPathChildren {
65+
// this is range written weirdly due to type inference limitations:
66+
// https://stackoverflow.com/a/66130898
67+
range: self.range::<Path, _>((Bound::Excluded(path), Bound::Unbounded)),
68+
parent_path: path,
69+
}
70+
}
71+
}
72+
73+
pub struct PathSetIterPathChildren<'a> {
74+
// we don't need the nightly cursors API for this, the `Range` type is sufficient.
75+
range: btree_set::Range<'a, PathBuf>,
76+
parent_path: &'a Path,
77+
}
78+
79+
impl<'a> Iterator for PathSetIterPathChildren<'a> {
80+
type Item = &'a Path;
81+
82+
fn next(&mut self) -> Option<Self::Item> {
83+
// this simple implementation works because `Path` implements `Ord` (and `starts_with`)
84+
// using path component comparision, rather than raw byte comparisions. The parent path is
85+
// always guaranteed to be placed immediately before its children (pre-order traversal).
86+
let current_path = self.range.next()?;
87+
if !current_path.starts_with(self.parent_path) {
88+
return None;
89+
}
90+
Some(&**current_path)
91+
}
92+
}
93+
4794
#[cfg(test)]
4895
mod tests {
4996
use super::*;
5097

5198
#[test]
52-
fn test_extract_with_children() {
99+
fn test_map_extract_path_with_children() {
53100
let mut map = BTreeMap::default();
54101
map.insert(PathBuf::from("a"), 1);
55102
map.insert(PathBuf::from("a/b"), 2);
56103
map.insert(PathBuf::from("a/b/c"), 3);
57104
map.insert(PathBuf::from("a/b/d"), 4);
58-
map.insert(PathBuf::from("a/c"), 5);
59-
map.insert(PathBuf::from("x/y/z"), 6);
60-
map.insert(PathBuf::from("z/a/b"), 7);
105+
map.insert(PathBuf::from("a/b/d/e"), 5);
106+
map.insert(PathBuf::from("a/c"), 6);
107+
map.insert(PathBuf::from("x/y/z"), 7);
108+
map.insert(PathBuf::from("z/a/b"), 8);
61109

62110
let parent_path = PathBuf::from("a/b");
63111
let extracted: Vec<_> = map.extract_path_with_children(&parent_path).collect();
@@ -66,15 +114,39 @@ mod tests {
66114
(PathBuf::from("a/b"), 2),
67115
(PathBuf::from("a/b/c"), 3),
68116
(PathBuf::from("a/b/d"), 4),
117+
(PathBuf::from("a/b/d/e"), 5),
69118
];
70119
assert_eq!(extracted, expected_extracted);
71120

72121
let mut expected_remaining = BTreeMap::new();
73122
expected_remaining.insert(PathBuf::from("a"), 1);
74-
expected_remaining.insert(PathBuf::from("a/c"), 5);
75-
expected_remaining.insert(PathBuf::from("x/y/z"), 6);
76-
expected_remaining.insert(PathBuf::from("z/a/b"), 7);
123+
expected_remaining.insert(PathBuf::from("a/c"), 6);
124+
expected_remaining.insert(PathBuf::from("x/y/z"), 7);
125+
expected_remaining.insert(PathBuf::from("z/a/b"), 8);
77126

78127
assert_eq!(map, expected_remaining);
79128
}
129+
130+
#[test]
131+
fn test_set_iter_path_children() {
132+
let mut set = BTreeSet::default();
133+
set.insert(PathBuf::from("a"));
134+
set.insert(PathBuf::from("a/b"));
135+
set.insert(PathBuf::from("a/b/c"));
136+
set.insert(PathBuf::from("a/b/d"));
137+
set.insert(PathBuf::from("a/b/d/e"));
138+
set.insert(PathBuf::from("a/c"));
139+
set.insert(PathBuf::from("x/y/z"));
140+
set.insert(PathBuf::from("z/a/b"));
141+
142+
let parent_path = PathBuf::from("a/b");
143+
let iterated: Vec<_> = set.iter_path_children(&parent_path).collect();
144+
145+
let expected_iterated = vec![
146+
PathBuf::from("a/b/c"),
147+
PathBuf::from("a/b/d"),
148+
PathBuf::from("a/b/d/e"),
149+
];
150+
assert_eq!(iterated, expected_iterated);
151+
}
80152
}

0 commit comments

Comments
 (0)