Skip to content

Commit 4d1093e

Browse files
author
Mark Robinson
committed
Add support for single file size limits
CWL files are required to be lower than the limit. Non-CWL files may be larger and will be externally linked in the RO manifest Contributes to #13
1 parent f3854ff commit 4d1093e

File tree

5 files changed

+74
-61
lines changed

5 files changed

+74
-61
lines changed

src/main/java/org/commonwl/viewer/domain/CWLCollection.java

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import com.fasterxml.jackson.databind.node.ArrayNode;
2525
import com.fasterxml.jackson.databind.node.ObjectNode;
2626
import com.fasterxml.jackson.databind.node.TextNode;
27+
import org.apache.commons.io.FileUtils;
2728
import org.apache.commons.io.FilenameUtils;
2829
import org.commonwl.viewer.services.DockerService;
2930
import org.eclipse.egit.github.core.RepositoryContents;
@@ -43,6 +44,8 @@ public class CWLCollection {
4344
private GithubDetails githubInfo;
4445
private String commitSha;
4546

47+
private int singleFileSizeLimit;
48+
4649
// Maps of ID to associated JSON
4750
private Map<String, JsonNode> workflows = new HashMap<>();
4851

@@ -90,10 +93,11 @@ public class CWLCollection {
9093
* @throws IOException Any API errors which may have occurred
9194
*/
9295
public CWLCollection(GitHubService githubService, GithubDetails githubInfo,
93-
String commitSha) throws IOException {
96+
String commitSha, int singleFileSizeLimit) throws IOException {
9497
this.githubInfo = githubInfo;
9598
this.githubService = githubService;
9699
this.commitSha = commitSha;
100+
this.singleFileSizeLimit = singleFileSizeLimit;
97101

98102
// Add any CWL files from the Github repo to this collection
99103
List<RepositoryContents> repoContents = githubService.getContents(githubInfo);
@@ -129,19 +133,24 @@ private void addDocs(List<RepositoryContents> repoContents) throws IOException {
129133

130134
// If this is a cwl file which needs to be parsed
131135
if (extension.equals(CWL_EXTENSION)) {
132-
133-
// Get the content of this file from Github
134-
GithubDetails githubFile = new GithubDetails(githubInfo.getOwner(),
135-
githubInfo.getRepoName(), githubInfo.getBranch(), repoContent.getPath());
136-
String fileContent = githubService.downloadFile(githubFile, commitSha);
137-
138-
// Parse yaml to JsonNode
139-
Yaml reader = new Yaml();
140-
ObjectMapper mapper = new ObjectMapper();
141-
JsonNode cwlFile = mapper.valueToTree(reader.load(fileContent));
142-
143-
// Add document to those being considered
144-
addDoc(cwlFile, repoContent.getName());
136+
if (repoContent.getSize() <= singleFileSizeLimit) {
137+
// Get the content of this file from Github
138+
GithubDetails githubFile = new GithubDetails(githubInfo.getOwner(),
139+
githubInfo.getRepoName(), githubInfo.getBranch(), repoContent.getPath());
140+
String fileContent = githubService.downloadFile(githubFile, commitSha);
141+
142+
// Parse yaml to JsonNode
143+
Yaml reader = new Yaml();
144+
ObjectMapper mapper = new ObjectMapper();
145+
JsonNode cwlFile = mapper.valueToTree(reader.load(fileContent));
146+
147+
// Add document to those being considered
148+
addDoc(cwlFile, repoContent.getName());
149+
} else {
150+
throw new IOException("File '" + repoContent.getName() + "' is over singleFileSizeLimit - " +
151+
FileUtils.byteCountToDisplaySize(repoContent.getSize()) + "/" +
152+
FileUtils.byteCountToDisplaySize(singleFileSizeLimit));
153+
}
145154
}
146155
}
147156

src/main/java/org/commonwl/viewer/domain/ROBundle.java

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
package org.commonwl.viewer.domain;
2121

22+
import org.apache.commons.io.FileUtils;
2223
import org.apache.commons.io.FilenameUtils;
2324
import org.apache.taverna.robundle.Bundle;
2425
import org.apache.taverna.robundle.Bundles;
@@ -30,7 +31,6 @@
3031
import org.slf4j.Logger;
3132
import org.slf4j.LoggerFactory;
3233

33-
import java.io.File;
3434
import java.io.IOException;
3535
import java.net.URI;
3636
import java.net.URISyntaxException;
@@ -56,6 +56,7 @@ public class ROBundle {
5656
private GithubDetails githubInfo;
5757
private String commitSha;
5858
private Agent thisApp;
59+
private int singleFileSizeLimit;
5960
private Set<HashableAgent> authors = new HashSet<HashableAgent>();
6061

6162
// Pattern for extracting version from a cwl file
@@ -68,8 +69,9 @@ public class ROBundle {
6869
* @throws IOException Any API errors which may have occurred
6970
*/
7071
public ROBundle(GitHubService githubService, GithubDetails githubInfo, String commitSha,
71-
String appName, String appURL) throws IOException {
72-
// TODO: Add back file size checking on individual files as well as whole bundle
72+
String appName, String appURL, int singleFileSizeLimit) throws IOException {
73+
// File size limits
74+
this.singleFileSizeLimit = singleFileSizeLimit;
7375

7476
// Create a new RO bundle
7577
this.bundle = Bundles.createBundle();
@@ -137,29 +139,48 @@ private void addFiles(List<RepositoryContents> repoContents, Path path) throws I
137139
// Otherwise this is a file so add to the bundle
138140
} else if (repoContent.getType().equals("file")) {
139141

140-
// Get the content of this file from Github
141-
GithubDetails githubFile = new GithubDetails(githubInfo.getOwner(),
142-
githubInfo.getRepoName(), githubInfo.getBranch(), repoContent.getPath());
143-
String fileContent = githubService.downloadFile(githubFile, commitSha);
144-
145-
// Save file to research object bundle
146-
Path newFilePort = path.resolve(repoContent.getName());
147-
Bundles.setStringValue(newFilePort, fileContent);
142+
try {
143+
// Where to store the new file in bundle
144+
Path bundleFilePath = path.resolve(repoContent.getName());
145+
146+
// Raw URI of the bundle
147+
GithubDetails githubFile = new GithubDetails(githubInfo.getOwner(),
148+
githubInfo.getRepoName(), githubInfo.getBranch(), repoContent.getPath());
149+
URI rawURI = new URI("https://raw.githubusercontent.com/" + githubFile.getOwner() + "/" +
150+
githubFile.getRepoName() + "/" + commitSha + "/" + githubFile.getPath());
151+
152+
// Variable to store file contents
153+
String fileContent = null;
154+
155+
// Download or externally link if oversized
156+
if (repoContent.getSize() <= singleFileSizeLimit) {
157+
// Get the content of this file from Github
158+
fileContent = githubService.downloadFile(githubFile, commitSha);
159+
160+
// Save file to research object bundle
161+
Bundles.setStringValue(bundleFilePath, fileContent);
162+
} else {
163+
logger.info("File " + repoContent.getName() + " is too large to download -" +
164+
FileUtils.byteCountToDisplaySize(repoContent.getSize()) + "/" +
165+
FileUtils.byteCountToDisplaySize(singleFileSizeLimit) +
166+
" + linking externally to RO bundle");
167+
bundleFilePath = Bundles.setReference(bundleFilePath, rawURI);
168+
}
148169

149-
// Manifest aggregation
150-
PathMetadata aggregation = bundle.getManifest().getAggregation(newFilePort);
170+
// Manifest aggregation
171+
PathMetadata aggregation = bundle.getManifest().getAggregation(bundleFilePath);
151172

152-
try {
153173
// Special handling for cwl files
154174
if (FilenameUtils.getExtension(repoContent.getName()).equals("cwl")) {
155175
// Correct mime type (no official standard for yaml)
156176
aggregation.setMediatype("text/x-yaml");
157177

158178
// Add conformsTo for version extracted from regex
159-
// Lower overhead vs parsing entire file
160-
Matcher m = cwlVersionPattern.matcher(fileContent);
161-
if (m.find()) {
162-
aggregation.setConformsTo(new URI("https://w3id.org/cwl/" + m.group(1)));
179+
if (fileContent != null) {
180+
Matcher m = cwlVersionPattern.matcher(fileContent);
181+
if (m.find()) {
182+
aggregation.setConformsTo(new URI("https://w3id.org/cwl/" + m.group(1)));
183+
}
163184
}
164185
}
165186

@@ -169,15 +190,13 @@ private void addFiles(List<RepositoryContents> repoContents, Path path) throws I
169190
aggregation.setAuthoredBy(new ArrayList<Agent>(fileAuthors));
170191

171192
// Set retrieved information for this file in the manifest
172-
aggregation.setRetrievedFrom(new URI("https://raw.githubusercontent.com/" + githubFile.getOwner() + "/" +
173-
githubFile.getRepoName() + "/" + commitSha + "/" + githubFile.getPath()));
193+
aggregation.setRetrievedFrom(rawURI);
174194
aggregation.setRetrievedBy(thisApp);
175195
aggregation.setRetrievedOn(aggregation.getCreatedOn());
176196

177197
} catch (URISyntaxException ex) {
178198
logger.error("Error creating URI for RO Bundle", ex);
179199
}
180-
181200
}
182201
}
183202
}

src/main/java/org/commonwl/viewer/services/ROBundleFactory.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,21 @@ public class ROBundleFactory {
4747

4848
private final String applicationName;
4949
private final String applicationURL;
50+
private final int singleFileSizeLimit;
5051
private final Path storageLocation;
5152
private final WorkflowRepository workflowRepository;
5253

5354
@Autowired
5455
public ROBundleFactory(@Value("${applicationName}") String applicationName,
5556
@Value("${applicationURL}") String applicationURL,
5657
@Value("${graphvizStorage}") Path graphvizStorage,
58+
@Value("${singleFileSizeLimit}") int singleFileSizeLimit,
5759
WorkflowRepository workflowRepository) {
5860
this.applicationName = applicationName;
5961
this.applicationURL = applicationURL;
6062
this.storageLocation = graphvizStorage;
6163
this.workflowRepository = workflowRepository;
64+
this.singleFileSizeLimit = singleFileSizeLimit;
6265
}
6366

6467
/**
@@ -75,7 +78,7 @@ void workflowROFromGithub(GitHubService githubService, GithubDetails githubInfo,
7578

7679
// Create a new Research Object Bundle with Github contents
7780
ROBundle bundle = new ROBundle(githubService, githubInfo, commitSha,
78-
applicationName, applicationURL);
81+
applicationName, applicationURL, singleFileSizeLimit);
7982

8083
// Save the bundle to the storage location in properties
8184
Path bundleLocation = bundle.saveToFile(storageLocation);

src/main/java/org/commonwl/viewer/services/WorkflowService.java

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import org.springframework.stereotype.Service;
3030

3131
import java.io.File;
32-
import java.io.IOException;
3332
import java.util.Calendar;
3433
import java.util.Date;
3534

@@ -43,18 +42,21 @@ public class WorkflowService {
4342
private final ROBundleFactory ROBundleFactory;
4443
private final int cacheDays;
4544
private final String graphvizStorage;
45+
private final int singleFileSizeLimit;
4646

4747
@Autowired
4848
public WorkflowService(GitHubService githubService,
4949
WorkflowRepository workflowRepository,
5050
ROBundleFactory ROBundleFactory,
5151
@Value("${cacheDays}") int cacheDays,
52-
@Value("${graphvizStorage}") String graphvizStorage) {
52+
@Value("${graphvizStorage}") String graphvizStorage,
53+
@Value("${singleFileSizeLimit}") int singleFileSizeLimit) {
5354
this.githubService = githubService;
5455
this.workflowRepository = workflowRepository;
5556
this.ROBundleFactory = ROBundleFactory;
5657
this.cacheDays = cacheDays;
5758
this.graphvizStorage = graphvizStorage;
59+
this.singleFileSizeLimit = singleFileSizeLimit;
5860
}
5961

6062
/**
@@ -69,7 +71,7 @@ public Workflow newWorkflowFromGithub(GithubDetails githubInfo) {
6971
String latestCommit = githubService.getCommitSha(githubInfo);
7072

7173
// Set up CWL utility to collect the documents
72-
CWLCollection cwlFiles = new CWLCollection(githubService, githubInfo, latestCommit);
74+
CWLCollection cwlFiles = new CWLCollection(githubService, githubInfo, latestCommit, singleFileSizeLimit);
7375

7476
// Get the workflow model
7577
Workflow workflowModel = cwlFiles.getWorkflow();
@@ -170,24 +172,4 @@ public boolean cacheExpired(Workflow workflow) {
170172
return false;
171173
}
172174
}
173-
174-
/*
175-
// Check total file size
176-
int totalFileSize = 0;
177-
for (RepositoryContents repoContent : repoContents) {
178-
totalFileSize += repoContent.getSize();
179-
}
180-
if (totalFileSize > totalFileSizeLimit) {
181-
throw new IOException("Files within the Github directory can not be above "
182-
+ totalFileSizeLimit + "B in size");
183-
}
184-
*/
185-
186-
187-
/*
188-
// Check file size before downloading
189-
if (file.getSize() > singleFileSizeLimit) {
190-
throw new IOException("Files within the Github directory can not be above " + singleFileSizeLimit + "B in size");
191-
}
192-
*/
193175
}

src/main/resources/application.properties

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ graphvizStorage = /tmp
1616
cacheDays = 1
1717

1818
# File size limit for individual files in bytes
19-
singleFileSizeLimit = 5000000
19+
singleFileSizeLimit = 5242880
2020

2121
# File size limit for the contents of the entire Research Object Bundle in bytes
22-
totalFileSizeLimit = 10000000
22+
totalFileSizeLimit = 1073741824
2323

2424
#=======================
2525
# Github API settings

0 commit comments

Comments
 (0)