Skip to content

Commit 4cad3cb

Browse files
committed
Replace 1-level index with 2-level index in IndexingRescanner to improve performance of IndexingRescanner
1 parent 634aad3 commit 4cad3cb

File tree

1 file changed

+100
-20
lines changed

1 file changed

+100
-20
lines changed

src/main/java/engineering/swat/watch/impl/overflows/IndexingRescanner.java

Lines changed: 100 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -42,20 +42,94 @@
4242

4343
import org.apache.logging.log4j.LogManager;
4444
import org.apache.logging.log4j.Logger;
45+
import org.checkerframework.checker.nullness.qual.Nullable;
4546

4647
import engineering.swat.watch.WatchEvent;
4748
import engineering.swat.watch.WatchScope;
4849
import engineering.swat.watch.impl.EventHandlingWatch;
4950

5051
public class IndexingRescanner extends MemorylessRescanner {
5152
private final Logger logger = LogManager.getLogger();
52-
private final Map<Path, FileTime> index = new ConcurrentHashMap<>();
53+
private final Index index = new Index();
5354

5455
public IndexingRescanner(Executor exec, Path path, WatchScope scope) {
5556
super(exec);
5657
new Indexer(path, scope).walkFileTree(); // Make an initial scan to populate the index
5758
}
5859

60+
private static class Index {
61+
private final Map<Path, Map<Path, FileTime>> lastModifiedTimes = new ConcurrentHashMap<>();
62+
// ^^^^ ^^^^
63+
// Parent File name (possibly a directory itself)
64+
65+
public @Nullable FileTime putLastModifiedTime(Path p, FileTime time) {
66+
var parent = p.getParent();
67+
var fileName = p.getFileName();
68+
if (parent != null && fileName != null) {
69+
return putLastModifiedTime(parent, fileName, time);
70+
} else {
71+
throw new IllegalArgumentException("A path key should have both a parent and a file name");
72+
}
73+
}
74+
75+
public @Nullable FileTime putLastModifiedTime(Path parent, Path fileName, FileTime time) {
76+
var nested = lastModifiedTimes.computeIfAbsent(parent, x -> new ConcurrentHashMap<>());
77+
return nested.put(fileName, time);
78+
}
79+
80+
public @Nullable FileTime getLastModifiedTime(Path p) {
81+
var parent = p.getParent();
82+
var fileName = p.getFileName();
83+
if (parent != null && fileName != null) {
84+
return getLastModifiedTime(parent, fileName);
85+
} else {
86+
throw new IllegalArgumentException("A path key should have both a parent and a file name");
87+
}
88+
}
89+
90+
public @Nullable FileTime getLastModifiedTime(Path parent, Path fileName) {
91+
var nested = lastModifiedTimes.get(parent);
92+
return nested == null ? null : nested.get(fileName);
93+
}
94+
95+
public @Nullable Set<Path> getFileNames(Path parent) {
96+
var nested = lastModifiedTimes.get(parent);
97+
return nested == null ? null : nested.keySet();
98+
}
99+
100+
public @Nullable FileTime remove(Path p) {
101+
var parent = p.getParent();
102+
var fileName = p.getFileName();
103+
if (parent != null && fileName != null) {
104+
return remove(parent, fileName);
105+
} else {
106+
throw new IllegalArgumentException("A path key should have both a parent and a file name");
107+
}
108+
}
109+
110+
public @Nullable FileTime remove(Path parent, Path fileName) {
111+
var nested = lastModifiedTimes.get(parent);
112+
if (nested != null) {
113+
var removed = nested.remove(fileName);
114+
if (nested.isEmpty()) {
115+
lastModifiedTimes.remove(parent, nested);
116+
// Note: Between checking `nested` for non-emptiness and
117+
// removing it from `lastModifiedTimes`, other threads may
118+
// have put new entries in it. After the removal, these
119+
// entries are lost, so the index doesn't completely reflect
120+
// the file system anymore, and redundant events may be
121+
// issued. This doesn't break the contract with the client,
122+
// though (because this rescanner still provides an
123+
// over-approximation). Avoiding this race would be costly
124+
// in terms of synchronization.
125+
}
126+
return removed;
127+
} else {
128+
return null;
129+
}
130+
}
131+
}
132+
59133
private class Indexer extends BaseFileVisitor {
60134
public Indexer(Path path, WatchScope scope) {
61135
super(path, scope);
@@ -64,14 +138,14 @@ public Indexer(Path path, WatchScope scope) {
64138
@Override
65139
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
66140
if (!path.equals(dir)) {
67-
index.put(dir, attrs.lastModifiedTime());
141+
index.putLastModifiedTime(dir, attrs.lastModifiedTime());
68142
}
69143
return FileVisitResult.CONTINUE;
70144
}
71145

72146
@Override
73147
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
74-
index.put(file, attrs.lastModifiedTime());
148+
index.putLastModifiedTime(file, attrs.lastModifiedTime());
75149
return FileVisitResult.CONTINUE;
76150
}
77151
}
@@ -84,30 +158,32 @@ protected MemorylessRescanner.Generator newGenerator(Path path, WatchScope scope
84158
}
85159

86160
protected class Generator extends MemorylessRescanner.Generator {
87-
// Field to keep track of (a stack of) the paths that are visited during
88-
// the current rescan (one frame for each nested subdirectory), to
89-
// approximate `DELETED` events that happened since the previous rescan.
90-
// Instances of this class are supposed to be used non-concurrently, so
91-
// no synchronization to access this field is needed.
161+
// Field to keep track of (a stack of sets, of file names, of) the paths
162+
// that are visited during the current rescan (one frame for each nested
163+
// subdirectory), to approximate `DELETED` events that happened since
164+
// the previous rescan. Instances of this class are supposed to be used
165+
// non-concurrently, so no synchronization to access this field is
166+
// needed.
92167
private final Deque<Set<Path>> visited = new ArrayDeque<>();
93168

94169
public Generator(Path path, WatchScope scope) {
95170
super(path, scope);
96171
this.visited.push(new HashSet<>()); // Initial set for content of `path`
97172
}
98173

99-
private <T> void addToPeeked(Deque<Set<T>> deque, T t) {
174+
private void addToPeeked(Deque<Set<Path>> deque, Path p) {
100175
var peeked = deque.peek();
101-
if (peeked != null) {
102-
peeked.add(t);
176+
var fileName = p.getFileName();
177+
if (peeked != null && fileName != null) {
178+
peeked.add(fileName);
103179
}
104180
}
105181

106182
// -- MemorylessRescanner.Generator --
107183

108184
@Override
109185
protected void generateEvents(Path path, BasicFileAttributes attrs) {
110-
var lastModifiedTimeOld = index.get(path);
186+
var lastModifiedTimeOld = index.getLastModifiedTime(path);
111187
var lastModifiedTimeNew = attrs.lastModifiedTime();
112188

113189
// The path isn't indexed yet
@@ -138,14 +214,18 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
138214
@Override
139215
public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
140216
// Issue `DELETED` events based on the set of paths visited in `dir`
217+
var indexedInDir = index.getFileNames(dir);
141218
var visitedInDir = visited.pop();
142-
if (visitedInDir != null) {
143-
for (var p : index.keySet()) {
144-
if (dir.equals(p.getParent()) && !visitedInDir.contains(p) && !Files.exists(p)) {
145-
// Note: The third subcondition is needed because the
146-
// index may have been updated during the visit. In that
147-
// case, `p` might not be in `visitedInDir`, but exist.
148-
events.add(new WatchEvent(WatchEvent.Kind.DELETED, p));
219+
if (indexedInDir != null && visitedInDir != null) {
220+
for (var p : indexedInDir) {
221+
if (!visitedInDir.contains(p)) {
222+
var fullPath = dir.resolve(p);
223+
// The index may have been updated during the visit, so
224+
// even if `p` isn't contained in `visitedInDir`, by
225+
// now, it might have come into existance.
226+
if (!Files.exists(fullPath)) {
227+
events.add(new WatchEvent(WatchEvent.Kind.DELETED, fullPath));
228+
}
149229
}
150230
}
151231
}
@@ -169,7 +249,7 @@ public void accept(EventHandlingWatch watch, WatchEvent event) {
169249
case MODIFIED:
170250
try {
171251
var lastModifiedTimeNew = Files.getLastModifiedTime(fullPath);
172-
var lastModifiedTimeOld = index.put(fullPath, lastModifiedTimeNew);
252+
var lastModifiedTimeOld = index.putLastModifiedTime(fullPath, lastModifiedTimeNew);
173253

174254
// If a `MODIFIED` event happens for a path that wasn't in
175255
// the index yet, then a `CREATED` event has somehow been

0 commit comments

Comments
 (0)