Skip to content

Commit 026229d

Browse files
authored
Merge pull request #68 from common-workflow-language/main-workflow-discovery
Add workflow discovery based on indegree of run params
2 parents a60a22d + 1ea044b commit 026229d

File tree

5 files changed

+111
-30
lines changed

5 files changed

+111
-30
lines changed

src/main/java/org/commonwl/viewer/domain/CWLCollection.java

Lines changed: 91 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,14 @@
2424
import com.fasterxml.jackson.databind.node.ArrayNode;
2525
import com.fasterxml.jackson.databind.node.ObjectNode;
2626
import com.fasterxml.jackson.databind.node.TextNode;
27+
import org.apache.commons.io.FilenameUtils;
2728
import org.commonwl.viewer.services.DockerService;
2829
import org.eclipse.egit.github.core.RepositoryContents;
2930
import org.commonwl.viewer.services.GitHubService;
3031
import org.yaml.snakeyaml.Yaml;
3132

3233
import java.io.IOException;
3334
import java.util.*;
34-
import java.util.regex.Matcher;
35-
import java.util.regex.Pattern;
3635

3736
/**
3837
* Provides CWL parsing for workflows to gather an overview
@@ -45,7 +44,7 @@ public class CWLCollection {
4544
private String commitSha;
4645

4746
// Maps of ID to associated JSON
48-
private Map<String, JsonNode> cwlDocs = new HashMap<>();
47+
private Map<String, JsonNode> workflows = new HashMap<>();
4948

5049
// The main workflow
5150
private String mainWorkflowKey;
@@ -77,15 +76,12 @@ public class CWLCollection {
7776
private final String ARRAY = "array";
7877
private final String ARRAY_ITEMS = "items";
7978
private final String LOCATION = "location";
79+
private final String RUN = "run";
8080
private final String REQUIREMENTS = "requirements";
8181
private final String HINTS = "hints";
8282
private final String DOCKER_REQUIREMENT = "DockerRequirement";
8383
private final String DOCKER_PULL = "dockerPull";
8484

85-
// URL validation for docker pull id
86-
private final String DOCKERHUB_ID_REGEX = "^([0-9a-z]{4,30})(?:\\/([a-zA-Z0-9_-]+))?(?:\\:[a-zA-Z0-9_-]+)?$";
87-
private final Pattern dockerhubPattern = Pattern.compile(DOCKERHUB_ID_REGEX);
88-
8985
/**
9086
* Creates a new collection of CWL files from a Github repository
9187
* @param githubService Service to provide the Github API functionality
@@ -163,26 +159,78 @@ private void addDoc(JsonNode newDoc, String fileName) {
163159
if (newDoc.has(DOC_GRAPH)) {
164160
// Add each of the sub documents
165161
for (JsonNode jsonNode : newDoc.get(DOC_GRAPH)) {
166-
cwlDocs.put(extractID(jsonNode), jsonNode);
162+
if (isWorkflow(jsonNode)) {
163+
workflows.put(extractID(jsonNode), jsonNode);
164+
}
167165
}
168166
} else {
169167
// Otherwise just add the document itself with ID of document name
170-
cwlDocs.put(fileName, newDoc);
168+
if (isWorkflow(newDoc)) {
169+
workflows.put(fileName, newDoc);
170+
}
171171
}
172172
}
173173

174174
/**
175175
* Find the main workflow object in the group of files being considered
176+
* by finding the minimal inDegree in a graph of run: parameters within steps
177+
* @return The file name/key of the workflow
176178
*/
177-
private void findMainWorkflow() {
178-
// Find the first workflow we come across
179-
// TODO: Consider relationship between run: parameters to better discover this
180-
for (Map.Entry<String, JsonNode> doc : cwlDocs.entrySet()) {
181-
if (doc.getValue().get(CLASS).asText().equals(WORKFLOW)) {
182-
mainWorkflowKey = doc.getKey();
183-
return;
179+
private String findMainWorkflow() {
180+
// TODO: make this path dependant so it doesn't get messed up by duplicate filenames or graphs
181+
// Currently this strategy fails gracefully and returns the first workflow in the case of a graph
182+
183+
// Store the in degree of each workflow
184+
Map<String, Integer> inDegrees = new HashMap<String, Integer>();
185+
for (String key : workflows.keySet()) {
186+
inDegrees.put(key, 0);
187+
}
188+
189+
// Loop through documents and calculate in degrees
190+
for (Map.Entry<String, JsonNode> doc : workflows.entrySet()) {
191+
JsonNode content = doc.getValue();
192+
if (content.get(CLASS).asText().equals(WORKFLOW)) {
193+
// Parse workflow steps and see whether other workflows are run
194+
JsonNode steps = content.get(STEPS);
195+
if (steps.getClass() == ArrayNode.class) {
196+
// Explicit ID and other fields within each input list
197+
for (JsonNode step : steps) {
198+
String run = FilenameUtils.getName(extractRun(step));
199+
if (run != null && inDegrees.containsKey(run)) {
200+
inDegrees.put(run, inDegrees.get(run) + 1);
201+
}
202+
}
203+
} else if (steps.getClass() == ObjectNode.class) {
204+
// ID is the key of each object
205+
Iterator<Map.Entry<String, JsonNode>> iterator = steps.fields();
206+
while (iterator.hasNext()) {
207+
Map.Entry<String, JsonNode> stepNode = iterator.next();
208+
JsonNode stepJson = stepNode.getValue();
209+
String run = FilenameUtils.getName(extractRun(stepJson));
210+
if (run != null && inDegrees.containsKey(run)) {
211+
inDegrees.put(run, inDegrees.get(run) + 1);
212+
}
213+
}
214+
}
184215
}
185216
}
217+
218+
// Find a workflow with minimum inDegree and return
219+
int minVal = Integer.MAX_VALUE;
220+
String minKey = null;
221+
for (Map.Entry<String, Integer> inDegree : inDegrees.entrySet()) {
222+
if (inDegree.getValue() < minVal) {
223+
minKey = inDegree.getKey();
224+
minVal = inDegree.getValue();
225+
}
226+
227+
// Early escape if minVal is already minimal
228+
if (minVal == 0) {
229+
return minKey;
230+
}
231+
}
232+
233+
return minKey;
186234
}
187235

188236
/**
@@ -192,14 +240,14 @@ private void findMainWorkflow() {
192240
public Workflow getWorkflow() {
193241
// Get the main workflow
194242
if (mainWorkflowKey == null) {
195-
findMainWorkflow();
243+
mainWorkflowKey = findMainWorkflow();
196244

197-
// If it is still less than 0 there is no workflow to be found
245+
// If it is still null there is no workflow to be found
198246
if (mainWorkflowKey == null) {
199247
return null;
200248
}
201249
}
202-
JsonNode mainWorkflow = cwlDocs.get(mainWorkflowKey);
250+
JsonNode mainWorkflow = workflows.get(mainWorkflowKey);
203251

204252
// Use ID/filename for label if there is no defined one
205253
String label = extractLabel(mainWorkflow);
@@ -624,4 +672,28 @@ private String extractTypes(JsonNode typeNode) {
624672
}
625673
return null;
626674
}
675+
676+
/**
677+
* Extract the run parameter from a node representing a step
678+
* @param stepNode The root node of a step
679+
* @return A string with the run parameter if it exists
680+
*/
681+
private String extractRun(JsonNode stepNode) {
682+
if (stepNode != null) {
683+
if (stepNode.has(RUN)) {
684+
return stepNode.get(RUN).asText();
685+
}
686+
}
687+
return null;
688+
}
689+
690+
/**
691+
* Identify a JsonNode as a workflow
692+
* @param rootNode The root node
693+
* @return Whether or not the node is a workflow
694+
*/
695+
private boolean isWorkflow(JsonNode rootNode) {
696+
return (rootNode.has(CLASS)
697+
&& rootNode.get(CLASS).asText().equals(WORKFLOW));
698+
}
627699
}

src/main/java/org/commonwl/viewer/domain/ROBundle.java

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ public class ROBundle {
5454
private Bundle bundle;
5555
private GithubDetails githubInfo;
5656
private String commitSha;
57+
private Agent thisApp;
5758
private Set<HashableAgent> authors = new HashSet<HashableAgent>();
5859

5960
// Pattern for extracting version from a cwl file
@@ -80,12 +81,13 @@ public ROBundle(GitHubService githubService, GithubDetails githubInfo, String co
8081
// Simplified attribution for RO bundle
8182
try {
8283
// Tool attribution in createdBy
83-
Agent cwlViewer = new Agent(appName);
84-
cwlViewer.setUri(new URI(appURL));
85-
manifest.setCreatedBy(cwlViewer);
84+
thisApp = new Agent(appName);
85+
thisApp.setUri(new URI(appURL));
86+
manifest.setCreatedBy(thisApp);
8687

8788
// Retrieval Info
88-
manifest.setRetrievedBy(cwlViewer);
89+
// TODO: Make this importedBy/On/From
90+
manifest.setRetrievedBy(thisApp);
8991
manifest.setRetrievedOn(manifest.getCreatedOn());
9092
manifest.setRetrievedFrom(new URI("https://github.com/" + githubInfo.getOwner() + "/"
9193
+ githubInfo.getRepoName() + "/tree/" + commitSha + "/" + githubInfo.getPath()));
@@ -165,9 +167,12 @@ private void addFiles(List<RepositoryContents> repoContents, Path path) throws I
165167
authors.addAll(fileAuthors);
166168
aggregation.setAuthoredBy(new ArrayList<Agent>(fileAuthors));
167169

168-
// Set retrievedFrom information for this file in the manifest
169-
aggregation.setRetrievedFrom(new URI("https://github.com/" + githubFile.getOwner() + "/" +
170-
githubFile.getRepoName() + "/blob/" + commitSha + "/" + githubFile.getPath()));
170+
// Set retrieved information for this file in the manifest
171+
aggregation.setRetrievedFrom(new URI("https://raw.githubusercontent.com/" + githubFile.getOwner() + "/" +
172+
githubFile.getRepoName() + "/" + commitSha + "/" + githubFile.getPath()));
173+
aggregation.setRetrievedBy(thisApp);
174+
aggregation.setRetrievedOn(aggregation.getCreatedOn());
175+
171176
} catch (URISyntaxException ex) {
172177
logger.error("Error creating URI for RO Bundle", ex);
173178
}

src/main/resources/application.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# Application details for Research Object Bundle attribution
66
applicationName = Common Workflow Language Viewer
7-
applicationURL = http://view.commonwl.org
7+
applicationURL = https://view.commonwl.org
88

99
# Path to a directory in which the RO Bundles will be stored
1010
bundleStorage = /tmp

src/main/resources/templates/fragments/footer.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
distributed under <a href="https://www.apache.org/licenses/LICENSE-2.0" rel="noopener" target="_blank">Apache license, version 2.0</a>
3535
<br />
3636
<span th:if="${workflow != null}">
37-
<a th:href="@{'https://github.com/' + ${workflow.retrievedFrom.owner} + '/' + ${workflow.retrievedFrom.repoName}}" href="#" rel="noopener" target="_blank">Shown workflow</a> has separate copyright and license
37+
<a th:href="@{'https://github.com/' + ${workflow.retrievedFrom.owner} + '/' + ${workflow.retrievedFrom.repoName} + '/tree/' + ${workflow.lastCommit} + '/' + ${workflow.retrievedFrom.path}}" href="#" rel="noopener" target="_blank">Shown workflow</a> has separate copyright and license
3838
</span>
3939
</div>
4040
</div>

src/main/resources/templates/workflows.html

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,16 @@ <h1>Explore Workflows</h1>
5252
<img class="workflow-thumb" th:src="@{'/workflows/' + ${workflow.id} + '/graph/svg'}" alt="workflow graph" />
5353
</a>
5454
</td>
55-
<td th:text="${workflow.label}"></td>
55+
<td>
56+
<span th:text="${workflow.label}">Name</span>
57+
<p><i th:text="${workflow.doc}">Description</i></p>
58+
</td>
5659
<td>
5760
<a th:href="@{'https://github.com/' + ${workflow.retrievedFrom.owner} + '/' + ${workflow.retrievedFrom.repoName} + '/tree/' + ${workflow.lastCommit} + '/' + ${workflow.retrievedFrom.path}}" rel="noopener" target="_blank">
5861
<img id="githubLogo" src="../static/img/GitHub-Mark-32px.png" th:src="@{/img/GitHub-Mark-32px.png}" width="20" height="20" />
59-
<span th:text="@{${workflow.retrievedFrom.owner} + '/' + ${workflow.retrievedFrom.repoName} + '/' + ${workflow.retrievedFrom.path}}"></span>
62+
<span th:text="@{${workflow.retrievedFrom.owner} + '/' + ${workflow.retrievedFrom.repoName} + '/' + ${workflow.retrievedFrom.path}}">https://github.com</span>
6063
</a>
64+
<p>Branch/Commit ID: <i th:text="${workflow.retrievedFrom.branch}">master</i></p>
6165
</td>
6266
<td><a th:href="${workflowURL}"><span class="icon-view glyphicon glyphicon-chevron-right"></span></a></td>
6367
</tr>

0 commit comments

Comments
 (0)