24
24
import com .fasterxml .jackson .databind .node .ArrayNode ;
25
25
import com .fasterxml .jackson .databind .node .ObjectNode ;
26
26
import com .fasterxml .jackson .databind .node .TextNode ;
27
+ import org .apache .commons .io .FilenameUtils ;
27
28
import org .commonwl .viewer .services .DockerService ;
28
29
import org .eclipse .egit .github .core .RepositoryContents ;
29
30
import org .commonwl .viewer .services .GitHubService ;
30
31
import org .yaml .snakeyaml .Yaml ;
31
32
32
33
import java .io .IOException ;
33
34
import java .util .*;
34
- import java .util .regex .Matcher ;
35
- import java .util .regex .Pattern ;
36
35
37
36
/**
38
37
* Provides CWL parsing for workflows to gather an overview
@@ -45,7 +44,7 @@ public class CWLCollection {
45
44
private String commitSha ;
46
45
47
46
// Maps of ID to associated JSON
48
- private Map <String , JsonNode > cwlDocs = new HashMap <>();
47
+ private Map <String , JsonNode > workflows = new HashMap <>();
49
48
50
49
// The main workflow
51
50
private String mainWorkflowKey ;
@@ -77,15 +76,12 @@ public class CWLCollection {
77
76
private final String ARRAY = "array" ;
78
77
private final String ARRAY_ITEMS = "items" ;
79
78
private final String LOCATION = "location" ;
79
+ private final String RUN = "run" ;
80
80
private final String REQUIREMENTS = "requirements" ;
81
81
private final String HINTS = "hints" ;
82
82
private final String DOCKER_REQUIREMENT = "DockerRequirement" ;
83
83
private final String DOCKER_PULL = "dockerPull" ;
84
84
85
- // URL validation for docker pull id
86
- private final String DOCKERHUB_ID_REGEX = "^([0-9a-z]{4,30})(?:\\ /([a-zA-Z0-9_-]+))?(?:\\ :[a-zA-Z0-9_-]+)?$" ;
87
- private final Pattern dockerhubPattern = Pattern .compile (DOCKERHUB_ID_REGEX );
88
-
89
85
/**
90
86
* Creates a new collection of CWL files from a Github repository
91
87
* @param githubService Service to provide the Github API functionality
@@ -163,26 +159,78 @@ private void addDoc(JsonNode newDoc, String fileName) {
163
159
if (newDoc .has (DOC_GRAPH )) {
164
160
// Add each of the sub documents
165
161
for (JsonNode jsonNode : newDoc .get (DOC_GRAPH )) {
166
- cwlDocs .put (extractID (jsonNode ), jsonNode );
162
+ if (isWorkflow (jsonNode )) {
163
+ workflows .put (extractID (jsonNode ), jsonNode );
164
+ }
167
165
}
168
166
} else {
169
167
// Otherwise just add the document itself with ID of document name
170
- cwlDocs .put (fileName , newDoc );
168
+ if (isWorkflow (newDoc )) {
169
+ workflows .put (fileName , newDoc );
170
+ }
171
171
}
172
172
}
173
173
174
174
/**
175
175
* Find the main workflow object in the group of files being considered
176
+ * by finding the minimal inDegree in a graph of run: parameters within steps
177
+ * @return The file name/key of the workflow
176
178
*/
177
- private void findMainWorkflow () {
178
- // Find the first workflow we come across
179
- // TODO: Consider relationship between run: parameters to better discover this
180
- for (Map .Entry <String , JsonNode > doc : cwlDocs .entrySet ()) {
181
- if (doc .getValue ().get (CLASS ).asText ().equals (WORKFLOW )) {
182
- mainWorkflowKey = doc .getKey ();
183
- return ;
179
+ private String findMainWorkflow () {
180
+ // TODO: make this path dependant so it doesn't get messed up by duplicate filenames or graphs
181
+ // Currently this strategy fails gracefully and returns the first workflow in the case of a graph
182
+
183
+ // Store the in degree of each workflow
184
+ Map <String , Integer > inDegrees = new HashMap <String , Integer >();
185
+ for (String key : workflows .keySet ()) {
186
+ inDegrees .put (key , 0 );
187
+ }
188
+
189
+ // Loop through documents and calculate in degrees
190
+ for (Map .Entry <String , JsonNode > doc : workflows .entrySet ()) {
191
+ JsonNode content = doc .getValue ();
192
+ if (content .get (CLASS ).asText ().equals (WORKFLOW )) {
193
+ // Parse workflow steps and see whether other workflows are run
194
+ JsonNode steps = content .get (STEPS );
195
+ if (steps .getClass () == ArrayNode .class ) {
196
+ // Explicit ID and other fields within each input list
197
+ for (JsonNode step : steps ) {
198
+ String run = FilenameUtils .getName (extractRun (step ));
199
+ if (run != null && inDegrees .containsKey (run )) {
200
+ inDegrees .put (run , inDegrees .get (run ) + 1 );
201
+ }
202
+ }
203
+ } else if (steps .getClass () == ObjectNode .class ) {
204
+ // ID is the key of each object
205
+ Iterator <Map .Entry <String , JsonNode >> iterator = steps .fields ();
206
+ while (iterator .hasNext ()) {
207
+ Map .Entry <String , JsonNode > stepNode = iterator .next ();
208
+ JsonNode stepJson = stepNode .getValue ();
209
+ String run = FilenameUtils .getName (extractRun (stepJson ));
210
+ if (run != null && inDegrees .containsKey (run )) {
211
+ inDegrees .put (run , inDegrees .get (run ) + 1 );
212
+ }
213
+ }
214
+ }
184
215
}
185
216
}
217
+
218
+ // Find a workflow with minimum inDegree and return
219
+ int minVal = Integer .MAX_VALUE ;
220
+ String minKey = null ;
221
+ for (Map .Entry <String , Integer > inDegree : inDegrees .entrySet ()) {
222
+ if (inDegree .getValue () < minVal ) {
223
+ minKey = inDegree .getKey ();
224
+ minVal = inDegree .getValue ();
225
+ }
226
+
227
+ // Early escape if minVal is already minimal
228
+ if (minVal == 0 ) {
229
+ return minKey ;
230
+ }
231
+ }
232
+
233
+ return minKey ;
186
234
}
187
235
188
236
/**
@@ -192,14 +240,14 @@ private void findMainWorkflow() {
192
240
public Workflow getWorkflow () {
193
241
// Get the main workflow
194
242
if (mainWorkflowKey == null ) {
195
- findMainWorkflow ();
243
+ mainWorkflowKey = findMainWorkflow ();
196
244
197
- // If it is still less than 0 there is no workflow to be found
245
+ // If it is still null there is no workflow to be found
198
246
if (mainWorkflowKey == null ) {
199
247
return null ;
200
248
}
201
249
}
202
- JsonNode mainWorkflow = cwlDocs .get (mainWorkflowKey );
250
+ JsonNode mainWorkflow = workflows .get (mainWorkflowKey );
203
251
204
252
// Use ID/filename for label if there is no defined one
205
253
String label = extractLabel (mainWorkflow );
@@ -624,4 +672,28 @@ private String extractTypes(JsonNode typeNode) {
624
672
}
625
673
return null ;
626
674
}
675
+
676
+ /**
677
+ * Extract the run parameter from a node representing a step
678
+ * @param stepNode The root node of a step
679
+ * @return A string with the run parameter if it exists
680
+ */
681
+ private String extractRun (JsonNode stepNode ) {
682
+ if (stepNode != null ) {
683
+ if (stepNode .has (RUN )) {
684
+ return stepNode .get (RUN ).asText ();
685
+ }
686
+ }
687
+ return null ;
688
+ }
689
+
690
+ /**
691
+ * Identify a JsonNode as a workflow
692
+ * @param rootNode The root node
693
+ * @return Whether or not the node is a workflow
694
+ */
695
+ private boolean isWorkflow (JsonNode rootNode ) {
696
+ return (rootNode .has (CLASS )
697
+ && rootNode .get (CLASS ).asText ().equals (WORKFLOW ));
698
+ }
627
699
}
0 commit comments