Skip to content

Commit f438c9e

Browse files
committed
Add support for timeout to cleanup task
1 parent ff6ff64 commit f438c9e

File tree

2 files changed

+173
-20
lines changed

2 files changed

+173
-20
lines changed

dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java

Lines changed: 105 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
import java.time.temporal.ChronoUnit;
1717
import java.util.Set;
1818
import java.util.concurrent.CompletableFuture;
19+
import java.util.concurrent.ExecutionException;
20+
import java.util.concurrent.TimeUnit;
21+
import java.util.concurrent.TimeoutException;
1922
import org.slf4j.Logger;
2023
import org.slf4j.LoggerFactory;
2124

@@ -56,7 +59,7 @@ default FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOE
5659
return null;
5760
}
5861

59-
default void onCleanupStart() {}
62+
default void onCleanupStart(boolean selfCleanup, long timeout, TimeUnit unit) {}
6063
}
6164

6265
private class CleanupVisitor implements FileVisitor<Path> {
@@ -66,15 +69,35 @@ private class CleanupVisitor implements FileVisitor<Path> {
6669

6770
private final boolean cleanSelf;
6871
private final Instant cutoff;
72+
private final Instant timeoutTarget;
6973

70-
CleanupVisitor(boolean cleanSelf) {
74+
private boolean terminated = false;
75+
76+
CleanupVisitor(boolean cleanSelf, long timeout, TimeUnit unit) {
7177
this.cleanSelf = cleanSelf;
7278
this.cutoff = Instant.now().minus(cutoffSeconds, ChronoUnit.SECONDS);
79+
this.timeoutTarget =
80+
timeout > -1
81+
? Instant.now().plus(TimeUnit.MILLISECONDS.convert(timeout, unit), ChronoUnit.MILLIS)
82+
: null;
83+
}
84+
85+
boolean isTerminated() {
86+
return terminated;
87+
}
88+
89+
private boolean isTimedOut() {
90+
return timeoutTarget != null && Instant.now().isAfter(timeoutTarget);
7391
}
7492

7593
@Override
7694
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs)
7795
throws IOException {
96+
if (isTimedOut()) {
97+
log.debug("Cleaning task timed out");
98+
terminated = true;
99+
return FileVisitResult.TERMINATE;
100+
}
78101
cleanupTestHook.preVisitDirectory(dir, attrs);
79102

80103
if (dir.equals(baseTempDir)) {
@@ -93,6 +116,11 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs)
93116

94117
@Override
95118
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
119+
if (isTimedOut()) {
120+
log.debug("Cleaning task timed out");
121+
terminated = true;
122+
return FileVisitResult.TERMINATE;
123+
}
96124
cleanupTestHook.visitFile(file, attrs);
97125
try {
98126
if (Files.getLastModifiedTime(file).toInstant().isAfter(cutoff)) {
@@ -107,6 +135,11 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
107135

108136
@Override
109137
public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException {
138+
if (isTimedOut()) {
139+
log.debug("Cleaning task timed out");
140+
terminated = true;
141+
return FileVisitResult.TERMINATE;
142+
}
110143
cleanupTestHook.visitFileFailed(file, exc);
111144
// do not log files/directories removed by another process running concurrently
112145
if (!(exc instanceof NoSuchFileException) && log.isDebugEnabled()) {
@@ -117,6 +150,11 @@ public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOExce
117150

118151
@Override
119152
public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
153+
if (isTimedOut()) {
154+
log.debug("Cleaning task timed out");
155+
terminated = true;
156+
return FileVisitResult.TERMINATE;
157+
}
120158
cleanupTestHook.postVisitDirectory(dir, exc);
121159
if (exc instanceof NoSuchFileException) {
122160
return FileVisitResult.CONTINUE;
@@ -162,7 +200,11 @@ public static TempLocationManager getInstance() {
162200
static TempLocationManager getInstance(boolean waitForCleanup) {
163201
TempLocationManager instance = SingletonHolder.INSTANCE;
164202
if (waitForCleanup) {
165-
instance.waitForCleanup();
203+
try {
204+
instance.waitForCleanup(5, TimeUnit.SECONDS);
205+
} catch (TimeoutException ignored) {
206+
207+
}
166208
}
167209
return instance;
168210
}
@@ -216,17 +258,21 @@ private TempLocationManager() {
216258
tempDir = baseTempDir.resolve("pid_" + pid);
217259
cleanupTask = CompletableFuture.runAsync(() -> cleanup(false));
218260

219-
Runtime.getRuntime()
220-
.addShutdownHook(
221-
new Thread(
222-
() -> {
223-
try {
224-
cleanupTask.join();
225-
} finally {
226-
cleanup(true);
227-
}
228-
},
229-
"Temp Location Manager Cleanup"));
261+
Thread selfCleanup =
262+
new Thread(
263+
() -> {
264+
try {
265+
waitForCleanup(1, TimeUnit.SECONDS);
266+
} catch (TimeoutException e) {
267+
log.info(
268+
"Cleanup task timed out. {} temp directory might not have been cleaned up properly",
269+
tempDir);
270+
} finally {
271+
cleanup(true);
272+
}
273+
},
274+
"Temp Location Manager Cleanup");
275+
Runtime.getRuntime().addShutdownHook(selfCleanup);
230276
}
231277

232278
/**
@@ -273,21 +319,61 @@ public Path getTempDir(Path subPath, boolean create) {
273319
return rslt;
274320
}
275321

276-
void cleanup(boolean cleanSelf) {
322+
/**
323+
* Walk the base temp directory recursively and remove all inactive per-process entries. No
324+
* timeout is applied.
325+
*
326+
* @param cleanSelf {@literal true} will call only this process' temp directory, {@literal false}
327+
* only the other processes will be cleaned up
328+
* @return {@literal true} if cleanup fully succeeded or {@literal false} otherwise (eg.
329+
* interruption etc.)
330+
*/
331+
boolean cleanup(boolean cleanSelf) {
332+
return cleanup(cleanSelf, -1, TimeUnit.SECONDS);
333+
}
334+
335+
/**
336+
* Walk the base temp directory recursively and remove all inactive per-process entries
337+
*
338+
* @param cleanSelf {@literal true} will call only this process' temp directory, {@literal false}
339+
* only the other processes will be cleaned up
340+
* @param timeout the task timeout; may be {@literal -1} to signal no timeout
341+
* @param unit the task timeout unit
342+
* @return {@literal true} if cleanup fully succeeded or {@literal false} otherwise (timeout,
343+
* interruption etc.)
344+
*/
345+
boolean cleanup(boolean cleanSelf, long timeout, TimeUnit unit) {
277346
try {
278-
cleanupTestHook.onCleanupStart();
279-
Files.walkFileTree(baseTempDir, new CleanupVisitor(cleanSelf));
347+
cleanupTestHook.onCleanupStart(cleanSelf, timeout, unit);
348+
CleanupVisitor visitor = new CleanupVisitor(cleanSelf, timeout, unit);
349+
Files.walkFileTree(baseTempDir, visitor);
350+
return !visitor.isTerminated();
280351
} catch (IOException e) {
281352
if (log.isDebugEnabled()) {
282353
log.warn("Unable to cleanup temp location {}", baseTempDir, e);
283354
} else {
284355
log.warn("Unable to cleanup temp location {}", baseTempDir);
285356
}
286357
}
358+
return false;
287359
}
288360

289361
// accessible for tests
290-
void waitForCleanup() {
291-
cleanupTask.join();
362+
void waitForCleanup(long timeout, TimeUnit unit) throws TimeoutException {
363+
try {
364+
cleanupTask.get(timeout, unit);
365+
} catch (InterruptedException e) {
366+
cleanupTask.cancel(true);
367+
Thread.currentThread().interrupt();
368+
} catch (TimeoutException e) {
369+
cleanupTask.cancel(true);
370+
throw e;
371+
} catch (ExecutionException e) {
372+
if (log.isDebugEnabled()) {
373+
log.debug("Failed to cleanup temp directory: {}", tempDir, e);
374+
} else {
375+
log.debug("Failed to cleanup temp directory: {}", tempDir);
376+
}
377+
}
292378
}
293379
}

dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,18 @@
1212
import java.nio.file.Paths;
1313
import java.nio.file.attribute.BasicFileAttributes;
1414
import java.nio.file.attribute.PosixFilePermissions;
15+
import java.util.ArrayList;
16+
import java.util.List;
1517
import java.util.Properties;
1618
import java.util.UUID;
1719
import java.util.concurrent.Phaser;
20+
import java.util.concurrent.TimeUnit;
21+
import java.util.concurrent.locks.LockSupport;
22+
import java.util.stream.Stream;
1823
import org.junit.jupiter.api.Test;
1924
import org.junit.jupiter.params.ParameterizedTest;
25+
import org.junit.jupiter.params.provider.Arguments;
26+
import org.junit.jupiter.params.provider.MethodSource;
2027
import org.junit.jupiter.params.provider.ValueSource;
2128

2229
public class TempLocationManagerTest {
@@ -172,12 +179,72 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx
172179
phaser.arriveAndAwaitAdvance();
173180
Files.deleteIfExists(fakeTempFile);
174181
phaser.arriveAndAwaitAdvance();
175-
mgr.waitForCleanup();
182+
mgr.waitForCleanup(30, TimeUnit.SECONDS);
176183

177184
assertFalse(Files.exists(fakeTempFile));
178185
assertFalse(Files.exists(fakeTempDir));
179186
}
180187

188+
@ParameterizedTest
189+
@MethodSource("timeoutTestArguments")
190+
void testCleanupWithTimeout(boolean selfCleanup, String section) throws Exception {
191+
long timeoutMs = 500;
192+
TempLocationManager.CleanupHook delayer =
193+
new TempLocationManager.CleanupHook() {
194+
@Override
195+
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs)
196+
throws IOException {
197+
if (section.equals("preVisitDirectory")) {
198+
LockSupport.parkNanos(timeoutMs * 1_000_000);
199+
}
200+
return TempLocationManager.CleanupHook.super.preVisitDirectory(dir, attrs);
201+
}
202+
203+
@Override
204+
public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException {
205+
if (section.equals("visitFileFailed")) {
206+
LockSupport.parkNanos(timeoutMs * 1_000_000);
207+
}
208+
return TempLocationManager.CleanupHook.super.visitFileFailed(file, exc);
209+
}
210+
211+
@Override
212+
public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
213+
if (section.equals("postVisitDirectory")) {
214+
LockSupport.parkNanos(timeoutMs * 1_000_000);
215+
}
216+
return TempLocationManager.CleanupHook.super.postVisitDirectory(dir, exc);
217+
}
218+
219+
@Override
220+
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
221+
throws IOException {
222+
if (section.equals("visitFile")) {
223+
LockSupport.parkNanos(timeoutMs * 1_000_000);
224+
}
225+
return TempLocationManager.CleanupHook.super.visitFile(file, attrs);
226+
}
227+
};
228+
Path baseDir =
229+
Files.createTempDirectory(
230+
"ddprof-test-",
231+
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
232+
TempLocationManager instance = instance(baseDir, delayer);
233+
boolean rslt = instance.cleanup(selfCleanup, timeoutMs, TimeUnit.MILLISECONDS);
234+
assertTrue(rslt);
235+
}
236+
237+
private static Stream<Arguments> timeoutTestArguments() {
238+
List<Arguments> argumentsList = new ArrayList<>();
239+
for (boolean selfCleanup : new boolean[] {true, false}) {
240+
for (String intercepted :
241+
new String[] {"preVisitDirectory", "visitFile", "postVisitDirectory"}) {
242+
argumentsList.add(Arguments.of(selfCleanup, intercepted));
243+
}
244+
}
245+
return argumentsList.stream();
246+
}
247+
181248
private TempLocationManager instance(Path baseDir, TempLocationManager.CleanupHook cleanupHook)
182249
throws IOException {
183250
Properties props = new Properties();

0 commit comments

Comments
 (0)