Skip to content

Commit 49f3f37

Browse files
committed
Investigation into fetching from remote repo with JGit
As a solution to concurrency issues
1 parent c49e60d commit 49f3f37

File tree

5 files changed

+96
-40
lines changed

5 files changed

+96
-40
lines changed

src/main/java/org/commonwl/view/cwl/CWLService.java

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import com.fasterxml.jackson.databind.node.ArrayNode;
2525
import com.fasterxml.jackson.databind.node.ObjectNode;
2626
import com.fasterxml.jackson.databind.node.TextNode;
27-
import org.apache.commons.io.FileUtils;
2827
import org.apache.commons.io.FilenameUtils;
2928
import org.apache.jena.ontology.OntModelSpec;
3029
import org.apache.jena.query.QuerySolution;
@@ -36,26 +35,24 @@
3635
import org.apache.jena.riot.RiotException;
3736
import org.commonwl.view.docker.DockerService;
3837
import org.commonwl.view.git.GitDetails;
38+
import org.commonwl.view.git.GitService;
3939
import org.commonwl.view.graphviz.ModelDotWriter;
4040
import org.commonwl.view.graphviz.RDFDotWriter;
4141
import org.commonwl.view.workflow.Workflow;
42+
import org.eclipse.jgit.errors.LargeObjectException;
4243
import org.slf4j.Logger;
4344
import org.slf4j.LoggerFactory;
4445
import org.springframework.beans.factory.annotation.Autowired;
45-
import org.springframework.beans.factory.annotation.Value;
4646
import org.springframework.stereotype.Service;
4747
import org.yaml.snakeyaml.Yaml;
4848

4949
import java.io.ByteArrayInputStream;
50-
import java.io.File;
5150
import java.io.IOException;
5251
import java.io.StringWriter;
5352
import java.nio.file.Path;
5453
import java.nio.file.Paths;
5554
import java.util.*;
5655

57-
import static org.apache.commons.io.FileUtils.readFileToString;
58-
5956
/**
6057
* Provides CWL parsing for workflows to gather an overview
6158
* for display and visualisation
@@ -68,7 +65,7 @@ public class CWLService {
6865
// Autowired properties/services
6966
private final RDFService rdfService;
7067
private final CWLTool cwlTool;
71-
private final int singleFileSizeLimit;
68+
private final GitService gitService;
7269

7370
// CWL specific strings
7471
private final String DOC_GRAPH = "$graph";
@@ -98,30 +95,27 @@ public class CWLService {
9895
* Constructor for the Common Workflow Language service
9996
* @param rdfService A service for handling RDF queries
10097
* @param cwlTool Handles cwltool integration
101-
* @param singleFileSizeLimit The file size limit for single files
98+
* @param gitService Handles Git repository functionality
10299
*/
103100
@Autowired
104101
public CWLService(RDFService rdfService,
105102
CWLTool cwlTool,
106-
@Value("${singleFileSizeLimit}") int singleFileSizeLimit) {
103+
GitService gitService) {
107104
this.rdfService = rdfService;
108105
this.cwlTool = cwlTool;
109-
this.singleFileSizeLimit = singleFileSizeLimit;
106+
this.gitService = gitService;
110107
}
111108

112109
/**
113110
* Gets the Workflow object from internal parsing
114-
* @param workflowFile The workflow file to be parsed
111+
* @param gitDetails The details for the workflow in the git repository
115112
* @return The constructed workflow object
116113
*/
117-
public Workflow parseWorkflowNative(File workflowFile) throws IOException {
118-
119-
// Check file size limit before parsing
120-
long fileSizeBytes = workflowFile.length();
121-
if (fileSizeBytes <= singleFileSizeLimit) {
122-
114+
public Workflow parseWorkflowNative(GitDetails gitDetails) throws IOException {
115+
try {
123116
// Parse file as yaml
124-
JsonNode cwlFile = yamlStringToJson(readFileToString(workflowFile));
117+
JsonNode cwlFile = yamlStringToJson(gitService.getFile(gitService
118+
.getRepository(gitDetails).getRepository(), gitDetails.getPath()));
125119

126120
// If the CWL file is packed there can be multiple workflows in a file
127121
Map<String, JsonNode> packedFiles = new HashMap<>();
@@ -138,7 +132,7 @@ public Workflow parseWorkflowNative(File workflowFile) throws IOException {
138132
// Use filename for label if there is no defined one
139133
String label = extractLabel(cwlFile);
140134
if (label == null) {
141-
label = FilenameUtils.getName(workflowFile.getPath());
135+
label = FilenameUtils.getName(gitDetails.getPath());
142136
}
143137

144138
// Construct the rest of the workflow model
@@ -163,22 +157,19 @@ public Workflow parseWorkflowNative(File workflowFile) throws IOException {
163157

164158
return workflowModel;
165159

166-
} else {
167-
throw new IOException("File '" + workflowFile.getName() + "' is over singleFileSizeLimit - " +
168-
FileUtils.byteCountToDisplaySize(fileSizeBytes) + "/" +
169-
FileUtils.byteCountToDisplaySize(singleFileSizeLimit));
160+
} catch (LargeObjectException ex) {
161+
throw new IOException("File '" + FilenameUtils.getName(gitDetails.getPath()) +
162+
"' is over singleFileSizeLimit";
170163
}
171164

172165
}
173166

174167
/**
175168
* Create a workflow model using cwltool rdf output
176169
* @param basicModel The basic workflow object created thus far
177-
* @param workflowFile The workflow file to run cwltool on
178170
* @return The constructed workflow object
179171
*/
180-
public Workflow parseWorkflowWithCwltool(Workflow basicModel,
181-
File workflowFile) throws CWLValidationException {
172+
public Workflow parseWorkflowWithCwltool(Workflow basicModel) throws CWLValidationException {
182173
GitDetails gitDetails = basicModel.getRetrievedFrom();
183174
String latestCommit = basicModel.getLastCommit();
184175
String packedWorkflowID = basicModel.getPackedWorkflowID();

src/main/java/org/commonwl/view/cwl/CWLToolRunner.java

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import org.springframework.scheduling.annotation.EnableAsync;
3232
import org.springframework.stereotype.Component;
3333

34-
import java.io.File;
3534
import java.io.IOException;
3635
import java.util.Date;
3736

@@ -64,16 +63,14 @@ public CWLToolRunner(WorkflowRepository workflowRepository,
6463
}
6564

6665
@Async
67-
public void createWorkflowFromQueued(QueuedWorkflow queuedWorkflow, File workflowFile)
66+
public void createWorkflowFromQueued(QueuedWorkflow queuedWorkflow)
6867
throws IOException, InterruptedException {
6968

7069
Workflow tempWorkflow = queuedWorkflow.getTempRepresentation();
7170

7271
// Parse using cwltool and replace in database
7372
try {
74-
Workflow newWorkflow = cwlService.parseWorkflowWithCwltool(
75-
tempWorkflow,
76-
workflowFile);
73+
Workflow newWorkflow = cwlService.parseWorkflowWithCwltool(tempWorkflow);
7774

7875
// Success
7976
newWorkflow.setRetrievedFrom(tempWorkflow.getRetrievedFrom());

src/main/java/org/commonwl/view/git/GitService.java

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,15 @@
2323
import org.commonwl.view.researchobject.HashableAgent;
2424
import org.eclipse.jgit.api.Git;
2525
import org.eclipse.jgit.api.errors.GitAPIException;
26+
import org.eclipse.jgit.lib.ObjectId;
27+
import org.eclipse.jgit.lib.ObjectLoader;
2628
import org.eclipse.jgit.lib.PersonIdent;
29+
import org.eclipse.jgit.lib.Repository;
2730
import org.eclipse.jgit.revwalk.RevCommit;
31+
import org.eclipse.jgit.revwalk.RevTree;
32+
import org.eclipse.jgit.revwalk.RevWalk;
33+
import org.eclipse.jgit.treewalk.TreeWalk;
34+
import org.eclipse.jgit.treewalk.filter.PathFilter;
2835
import org.slf4j.Logger;
2936
import org.slf4j.LoggerFactory;
3037
import org.springframework.beans.factory.annotation.Autowired;
@@ -53,11 +60,16 @@ public class GitService {
5360
// Whether submodules are also cloned
5461
private boolean cloneSubmodules;
5562

63+
// File size limit for loading in bytes
64+
private int singleFileSizeLimit;
65+
5666
@Autowired
5767
public GitService(@Value("${gitStorage}") Path gitStorage,
58-
@Value("${gitAPI.cloneSubmodules}") boolean cloneSubmodules) {
68+
@Value("${gitAPI.cloneSubmodules}") boolean cloneSubmodules,
69+
@Value("${singleFileSizeLimit}") int singleFileSizeLimit) {
5970
this.gitStorage = gitStorage;
6071
this.cloneSubmodules = cloneSubmodules;
72+
this.singleFileSizeLimit = singleFileSizeLimit;
6173
}
6274

6375
/**
@@ -108,6 +120,46 @@ public Git getRepository(GitDetails gitDetails)
108120
return repo;
109121
}
110122

123+
/**
124+
* Get the contents of a file from the Git repository
125+
* @param repository The Git repository
126+
* @param refOrCommitId The branch name or commit ID as a string
127+
* @return The contents of the file as a string
128+
* @throws IOException Any errors in retrieving the file
129+
*/
130+
public String getFile(Repository repository, String refOrCommitId) throws IOException {
131+
String content;
132+
133+
// Get the ObjectID from the ref or commit ID string
134+
if (!ObjectId.isId(refOrCommitId)) {
135+
refOrCommitId = "refs/remotes/origin/" + refOrCommitId;
136+
}
137+
ObjectId commitId = repository.resolve(refOrCommitId);
138+
139+
// Walk over commits using defined filtering
140+
try (RevWalk revWalk = new RevWalk(repository)) {
141+
RevCommit commit = revWalk.parseCommit(commitId);
142+
RevTree tree = commit.getTree();
143+
144+
// Try to find specific file in the repository
145+
try (TreeWalk treeWalk = new TreeWalk(repository)) {
146+
treeWalk.addTree(tree);
147+
treeWalk.setRecursive(true);
148+
treeWalk.setFilter(PathFilter.create("README.md"));
149+
if (!treeWalk.next()) {
150+
throw new IllegalStateException("Did not find expected file");
151+
}
152+
153+
ObjectId objectId = treeWalk.getObjectId(0);
154+
ObjectLoader loader = repository.open(objectId);
155+
content = new String(loader.getCachedBytes(singleFileSizeLimit));
156+
}
157+
revWalk.dispose();
158+
}
159+
160+
return content;
161+
}
162+
111163
/**
112164
* Gets the commit ID of the HEAD for the given repository
113165
* @param repo The Git repository

src/main/java/org/commonwl/view/workflow/WorkflowService.java

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -236,14 +236,7 @@ public QueuedWorkflow createQueuedWorkflow(GitDetails gitInfo)
236236
File localPath = repo.getRepository().getWorkTree();
237237
String latestCommit = gitService.getCurrentCommitID(repo);
238238

239-
Path pathToWorkflowFile = localPath.toPath().resolve(gitInfo.getPath()).normalize().toAbsolutePath();
240-
// Prevent path traversal attacks
241-
if (!pathToWorkflowFile.startsWith(localPath.toPath().normalize().toAbsolutePath())) {
242-
throw new WorkflowNotFoundException();
243-
}
244-
245-
File workflowFile = new File(pathToWorkflowFile.toString());
246-
Workflow basicModel = cwlService.parseWorkflowNative(workflowFile);
239+
Workflow basicModel = cwlService.parseWorkflowNative(gitInfo);
247240

248241
// Set origin details
249242
basicModel.setRetrievedOn(new Date());
@@ -258,7 +251,7 @@ public QueuedWorkflow createQueuedWorkflow(GitDetails gitInfo)
258251
// ASYNC OPERATIONS
259252
// Parse with cwltool and update model
260253
try {
261-
cwlToolRunner.createWorkflowFromQueued(queuedWorkflow, workflowFile);
254+
cwlToolRunner.createWorkflowFromQueued(queuedWorkflow);
262255
} catch (Exception e) {
263256
logger.error("Could not update workflow with cwltool", e);
264257
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package org.commonwl.view.git;
2+
3+
import org.eclipse.jgit.api.Git;
4+
import org.junit.Test;
5+
6+
import java.io.File;
7+
8+
/**
9+
* Created by mark on 08/08/17.
10+
*/
11+
public class GitServiceTest {
12+
13+
@Test
14+
public void name() throws Exception {
15+
GitService gitService = new GitService(new File("/tmp").toPath(), true);
16+
17+
Git test = Git.open(new File("/home/mark/workflows/.git"));
18+
String content = gitService.getFile(test.getRepository(), "aab378267a528c67f634e421be40ab6f19f3b078");
19+
20+
System.out.println(content);
21+
}
22+
23+
}

0 commit comments

Comments
 (0)