Skip to content

Commit 608aadb

Browse files
Remove temp repositories (#494)
This commit prevents `/tmp` folder from growing indefinitely large by implementing two fixes to the git repo conflict management logic: - When a conflict is detected, the git repository is cloned on a temporary folder directly on the `gitStorage` fs (not in the system `/tmp` directory). This prevents using Docker's ephemeral fs under the hood. - A temporary repository is deleted as soon as the invoking method releases the git semaphore. This makes sense because the primary copy of each repository will still be preserved in cache.
1 parent ebe253c commit 608aadb

File tree

6 files changed

+86
-7
lines changed

6 files changed

+86
-7
lines changed

src/main/java/org/commonwl/view/cwl/CWLToolRunner.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ public void createWorkflowFromQueued(QueuedWorkflow queuedWorkflow)
171171
FileUtils.deleteGitRepository(repo);
172172
} finally {
173173
gitSemaphore.release(repoUrl);
174+
FileUtils.deleteTemporaryGitRepository(repo);
174175
queuedWorkflowRepository.save(queuedWorkflow);
175176
}
176177
}

src/main/java/org/commonwl/view/git/GitService.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919

2020
package org.commonwl.view.git;
2121

22-
import static org.apache.jena.ext.com.google.common.io.Files.createTempDir;
23-
2422
import java.io.File;
2523
import java.io.IOException;
2624
import java.net.URI;
@@ -29,6 +27,7 @@
2927
import java.nio.file.Path;
3028
import java.util.HashSet;
3129
import java.util.Set;
30+
import java.util.UUID;
3231
import org.apache.commons.codec.digest.DigestUtils;
3332
import org.commonwl.view.researchobject.HashableAgent;
3433
import org.eclipse.jgit.api.Git;
@@ -52,10 +51,10 @@ public class GitService {
5251
private final Logger logger = LoggerFactory.getLogger(this.getClass());
5352

5453
// Location to check out git repositories into
55-
private Path gitStorage;
54+
private final Path gitStorage;
5655

5756
// Whether submodules are also cloned
58-
private boolean cloneSubmodules;
57+
private final boolean cloneSubmodules;
5958

6059
@Autowired
6160
public GitService(
@@ -218,4 +217,10 @@ protected Git cloneRepo(String repoUrl, File directory) throws GitAPIException {
218217
.setCloneAllBranches(true)
219218
.call();
220219
}
220+
221+
protected File createTempDir() throws IOException {
222+
Path repoDir = gitStorage.resolve(String.valueOf(UUID.randomUUID()));
223+
Files.createDirectory(repoDir);
224+
return repoDir.toFile();
225+
}
221226
}

src/main/java/org/commonwl/view/util/FileUtils.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import java.io.File;
44
import java.io.IOException;
55
import java.nio.file.Path;
6+
import java.util.regex.Pattern;
7+
68
import org.apache.taverna.robundle.Bundle;
79
import org.apache.taverna.robundle.fs.BundleFileSystem;
810
import org.eclipse.jgit.api.Git;
@@ -17,6 +19,9 @@
1719
*/
1820
public class FileUtils {
1921

22+
private static final Pattern UUID_REGEX_PATTERN =
23+
Pattern.compile("^[{]?[0-9a-fA-F]{8}-([0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}[}]?$");
24+
2025
private FileUtils() {}
2126

2227
/**
@@ -47,6 +52,36 @@ public static void deleteGitRepository(Git repo) throws IOException {
4752
}
4853
}
4954

55+
/**
56+
* Deletes the directory of a temporary git repository. Note that the <code>Git</code> object
57+
* contains a repository with a directory, but this directory points to the
58+
*
59+
* <pre>.git</pre>
60+
*
61+
* directory. This method will delete the parent of the
62+
*
63+
* <pre>.git</pre>
64+
*
65+
* directory, which corresponds to the cloned folder with the source code from git. Since
66+
* temporary folders are generated using <code>UUID.randomUUID()</code> instead of the commit hex
67+
* digest, if the folder name is a valid UUID it is identified as temporary.
68+
*
69+
* @param repo Git repository object
70+
* @throws IOException if it fails to delete the Git repository directory
71+
* @since 1.4.6
72+
*/
73+
public static void deleteTemporaryGitRepository(Git repo) throws IOException {
74+
if (repo != null
75+
&& repo.getRepository() != null
76+
&& repo.getRepository().getDirectory() != null
77+
&& repo.getRepository().getDirectory().getParentFile() != null) {
78+
if (UUID_REGEX_PATTERN.matcher(
79+
repo.getRepository().getDirectory().getParentFile().getName()).matches()) {
80+
deleteGitRepository(repo);
81+
}
82+
}
83+
}
84+
5085
/**
5186
* Deletes the bundle temporary directory.
5287
*

src/main/java/org/commonwl/view/workflow/WorkflowService.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,9 +216,9 @@ public Workflow getWorkflow(GitDetails gitInfo) {
216216
public List<WorkflowOverview> getWorkflowsFromDirectory(GitDetails gitInfo)
217217
throws IOException, GitAPIException {
218218
List<WorkflowOverview> workflowsInDir = new ArrayList<>();
219+
Git repo = null;
219220
try {
220221
boolean safeToAccess = gitSemaphore.acquire(gitInfo.getRepoUrl());
221-
Git repo = null;
222222
while (repo == null) {
223223
try {
224224
repo = gitService.getRepository(gitInfo, safeToAccess);
@@ -260,6 +260,7 @@ public List<WorkflowOverview> getWorkflowsFromDirectory(GitDetails gitInfo)
260260
}
261261
} finally {
262262
gitSemaphore.release(gitInfo.getRepoUrl());
263+
FileUtils.deleteTemporaryGitRepository(repo);
263264
}
264265
return workflowsInDir;
265266
}
@@ -389,6 +390,7 @@ public QueuedWorkflow createQueuedWorkflow(GitDetails gitInfo)
389390
throw e;
390391
} finally {
391392
gitSemaphore.release(gitInfo.getRepoUrl());
393+
FileUtils.deleteTemporaryGitRepository(repo);
392394
}
393395

394396
// Return this model to be displayed
@@ -563,12 +565,14 @@ private boolean cacheExpired(Workflow workflow) {
563565
logger.info(
564566
"Time has expired for caching, checking commits for workflow " + workflow.getID());
565567
String currentHead;
568+
Git repo = null;
566569
boolean safeToAccess = gitSemaphore.acquire(workflow.getRetrievedFrom().getRepoUrl());
567570
try {
568-
Git repo = gitService.getRepository(workflow.getRetrievedFrom(), safeToAccess);
571+
repo = gitService.getRepository(workflow.getRetrievedFrom(), safeToAccess);
569572
currentHead = gitService.getCurrentCommitID(repo);
570573
} finally {
571574
gitSemaphore.release(workflow.getRetrievedFrom().getRepoUrl());
575+
FileUtils.deleteTemporaryGitRepository(repo);
572576
}
573577
logger.info(
574578
"Current: "

src/test/java/org/commonwl/view/git/GitServiceTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
import java.io.File;
3131
import java.io.IOException;
32+
import java.nio.file.Path;
3233
import org.eclipse.jgit.api.CheckoutCommand;
3334
import org.eclipse.jgit.api.Git;
3435
import org.eclipse.jgit.api.errors.GitAPIException;
@@ -53,7 +54,7 @@ public class GitServiceTest {
5354

5455
@BeforeEach
5556
public void setup() throws GitAPIException {
56-
GitService gitService = new GitService(null, false);
57+
GitService gitService = new GitService(Path.of(System.getProperty("java.io.tmpdir")), false);
5758
this.spyGitService = spy(gitService);
5859
this.mockGit = mock(Git.class);
5960
this.mockGoodCheckoutCommand = mock(CheckoutCommand.class);

src/test/java/org/commonwl/view/util/FileUtilsTest.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,13 @@
88

99
import java.io.File;
1010
import java.io.IOException;
11+
import java.nio.file.Files;
1112
import java.nio.file.Path;
13+
import java.util.UUID;
14+
import org.apache.commons.codec.digest.DigestUtils;
1215
import org.apache.taverna.robundle.Bundle;
1316
import org.apache.taverna.robundle.fs.BundleFileSystem;
17+
import org.commonwl.view.git.GitDetails;
1418
import org.eclipse.jgit.api.Git;
1519
import org.eclipse.jgit.lib.Repository;
1620
import org.junit.Rule;
@@ -250,4 +254,33 @@ public void testBundleParentDirectory() throws IOException {
250254
FileUtils.deleteBundleParentDirectory(bundle);
251255
assertFalse(bundleTemporaryDirectory.exists());
252256
}
257+
258+
@Test
259+
public void testRemoveTemporaryRepository() throws IOException {
260+
Git tempRepository = mock(Git.class);
261+
Repository tempRepository1 = mock(Repository.class);
262+
when(tempRepository.getRepository()).thenReturn(tempRepository1);
263+
File tempGitRepositoryParent = temporaryFolder.newFolder(String.valueOf(UUID.randomUUID()));
264+
File tempGitRepository = tempGitRepositoryParent.toPath().resolve(".git").toFile();
265+
Files.createDirectory(tempGitRepository.toPath());
266+
when(tempRepository1.getDirectory()).thenReturn(tempGitRepository);
267+
assertTrue(tempGitRepository.exists());
268+
FileUtils.deleteTemporaryGitRepository(tempRepository);
269+
assertFalse(tempGitRepository.exists());
270+
271+
Git notTempRepository = mock(Git.class);
272+
Repository notTempRepository1 = mock(Repository.class);
273+
when(notTempRepository.getRepository()).thenReturn(notTempRepository1);
274+
File notTempGitRepositoryParent =
275+
temporaryFolder.newFolder(
276+
DigestUtils.sha1Hex(
277+
GitDetails.normaliseUrl(
278+
"https://github.com/common-workflow-language/cwlviewer.git")));
279+
File notTempGitRepository = notTempGitRepositoryParent.toPath().resolve(".git").toFile();
280+
Files.createDirectory(notTempGitRepository.toPath());
281+
when(notTempRepository1.getDirectory()).thenReturn(notTempGitRepository);
282+
assertTrue(notTempGitRepository.exists());
283+
FileUtils.deleteTemporaryGitRepository(notTempRepository);
284+
assertTrue(notTempGitRepository.exists());
285+
}
253286
}

0 commit comments

Comments
 (0)