Skip to content

Commit e9dcb3f

Browse files
authored
Merge pull request #133 from common-workflow-language/ro-workflow-description
Schema.org parsing for author information in RO manifest
2 parents df8fb7f + dd36123 commit e9dcb3f

File tree

10 files changed

+243
-28
lines changed

10 files changed

+243
-28
lines changed

src/main/java/org/commonwl/view/cwl/RDFService.java

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ public class RDFService {
3737
"PREFIX sld: <https://w3id.org/cwl/salad#>\n" +
3838
"PREFIX Workflow: <https://w3id.org/cwl/cwl#Workflow/>\n" +
3939
"PREFIX DockerRequirement: <https://w3id.org/cwl/cwl#DockerRequirement/>\n" +
40-
"PREFIX rdfs: <rdfs:>";
40+
"PREFIX rdfs: <rdfs:>\n" +
41+
"PREFIX s: <http://schema.org/>";
4142

4243
private String rdfService;
4344

@@ -103,6 +104,7 @@ public boolean ontPropertyExists(String ontUri) {
103104

104105
/**
105106
* Get the label and doc strings for a workflow resource
107+
* @param path The path within the Git repository to the workflow
106108
* @param workflowURI The URI of the workflow
107109
* @return Result set with label and doc strings
108110
*/
@@ -112,6 +114,7 @@ public ResultSet getLabelAndDoc(String path, String workflowURI) {
112114
"SELECT ?label ?doc\n" +
113115
"WHERE {\n" +
114116
" GRAPH ?graphName {" +
117+
" ?wf rdf:type ?type .\n" +
115118
" OPTIONAL { ?wf sld:label|rdfs:label ?label }\n" +
116119
" OPTIONAL { ?wf sld:doc|rdfs:comment ?doc }\n" +
117120
" FILTER(regex(str(?wf), ?wfFilter, \"i\" ))" +
@@ -148,6 +151,7 @@ public String getOntLabel(String ontologyURI) {
148151

149152
/**
150153
* Get the inputs for the workflow in the model
154+
* @param path The path within the Git repository to the workflow
151155
* @param workflowURI URI of the workflow
152156
* @return The result set of inputs
153157
*/
@@ -173,6 +177,7 @@ public ResultSet getInputs(String path, String workflowURI) {
173177

174178
/**
175179
* Get the outputs for the workflow in the model
180+
* @param path The path within the Git repository to the workflow
176181
* @param workflowURI URI of the workflow
177182
* @return The result set of outputs
178183
*/
@@ -198,6 +203,7 @@ public ResultSet getOutputs(String path, String workflowURI) {
198203

199204
/**
200205
* Get the steps for the workflow in the model
206+
* @param path The path within the Git repository to the workflow
201207
* @param workflowURI URI of the workflow
202208
* @return The result set of steps
203209
*/
@@ -270,6 +276,7 @@ public ResultSet getOutputLinks(String path, String workflowURI) {
270276

271277
/**
272278
* Gets the docker requirement and pull link for a workflow
279+
* @param path The path within the Git repository to the workflow
273280
* @param workflowURI URI of the workflow
274281
* @return Result set of docker hint and pull link
275282
*/
@@ -291,6 +298,41 @@ public ResultSet getDockerLink(String path, String workflowURI) {
291298
return runQuery(dockerQuery);
292299
}
293300

301+
/**
302+
* Get authors from schema.org creator fields for a file
303+
* @param path The path within the Git repository to the file
304+
* @param fileUri URI of the file
305+
* @return The result set of step links
306+
*/
307+
public ResultSet getAuthors(String path, String fileUri) {
308+
ParameterizedSparqlString linkQuery = new ParameterizedSparqlString();
309+
linkQuery.setCommandText(queryCtx +
310+
"SELECT ?email ?name ?orcid\n" +
311+
"WHERE {\n" +
312+
" GRAPH ?graphName {" +
313+
" ?file s:author|s:contributor|s:creator ?author .\n" +
314+
" {\n" +
315+
" ?creator rdf:type s:Person .\n" +
316+
" OPTIONAL { ?author s:email ?email }\n" +
317+
" OPTIONAL { ?author s:name ?name }\n" +
318+
" OPTIONAL { ?author s:id|s:sameAs ?orcid }\n" +
319+
" } UNION {\n" +
320+
" ?author rdf:type s:Organization .\n" +
321+
" ?author s:department* ?dept .\n" +
322+
" ?dept s:member ?member\n" +
323+
" OPTIONAL { ?member s:email ?email }\n" +
324+
" OPTIONAL { ?member s:name ?name }\n" +
325+
" OPTIONAL { ?member s:id|s:sameAs ?orcid }\n" +
326+
" }\n" +
327+
" FILTER(regex(str(?orcid), \"^https?://orcid.org/\" ))\n" +
328+
" FILTER(regex(str(?file), ?wfFilter, \"i\" ))\n" +
329+
" }" +
330+
"}");
331+
linkQuery.setLiteral("wfFilter", path + "$");
332+
linkQuery.setIri("graphName", rdfService + fileUri);
333+
return runQuery(linkQuery);
334+
}
335+
294336
/**
295337
* Gets the step name from a full URI
296338
* @param baseUrl the URL of the workflow

src/main/java/org/commonwl/view/git/GitDetails.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,7 @@ public GitDetails(String repoUrl, String branch, String path) {
4747
}
4848

4949
// Default to root path
50-
if (path == null || path.isEmpty()) {
51-
this.path = "/";
52-
} else {
53-
this.path = path;
54-
}
50+
setPath(path);
5551
}
5652

5753

@@ -76,7 +72,13 @@ public String getPath() {
7672
}
7773

7874
public void setPath(String path) {
79-
this.path = path;
75+
if (path == null || path.isEmpty()) {
76+
this.path = "/";
77+
} else if (path.startsWith("/") && path.length() > 1) {
78+
this.path = path.substring(1);
79+
} else {
80+
this.path = path;
81+
}
8082
}
8183

8284
/**

src/main/java/org/commonwl/view/researchobject/HashableAgent.java

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,18 +78,26 @@ public boolean equals(Object o) {
7878

7979
HashableAgent that = (HashableAgent) o;
8080

81-
if (name != null ? !name.equals(that.name) : that.name != null) return false;
81+
// ORCID is a unique identifier so if matches, the objects are equal
82+
if (orcid != null && orcid.equals(that.orcid)) return true;
83+
84+
// If no ORCID is present but email is the name, the objects are equal
85+
if (orcid == null && uri != null && uri.equals(that.uri)) return true;
86+
87+
// Default to checking all parameters
8288
if (orcid != null ? !orcid.equals(that.orcid) : that.orcid != null) return false;
89+
if (name != null ? !name.equals(that.name) : that.name != null) return false;
8390
return uri != null ? uri.equals(that.uri) : that.uri == null;
8491

8592
}
8693

94+
/**
95+
* ORCID is used as hashcode to fall back to comparison if missing
96+
* @return The hash code for this object
97+
*/
8798
@Override
8899
public int hashCode() {
89-
int result = name != null ? name.hashCode() : 0;
90-
result = 31 * result + (orcid != null ? orcid.hashCode() : 0);
91-
result = 31 * result + (uri != null ? uri.hashCode() : 0);
92-
return result;
100+
return orcid != null ? orcid.hashCode() : 0;
93101
}
94102

95103
}

src/main/java/org/commonwl/view/researchobject/ROBundleService.java

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,14 @@
2121

2222
import org.apache.commons.io.FileUtils;
2323
import org.apache.commons.io.FilenameUtils;
24+
import org.apache.jena.query.QuerySolution;
25+
import org.apache.jena.query.ResultSet;
2426
import org.apache.taverna.robundle.Bundle;
2527
import org.apache.taverna.robundle.Bundles;
2628
import org.apache.taverna.robundle.manifest.*;
2729
import org.commonwl.view.cwl.CWLTool;
2830
import org.commonwl.view.cwl.CWLValidationException;
31+
import org.commonwl.view.cwl.RDFService;
2932
import org.commonwl.view.git.GitDetails;
3033
import org.commonwl.view.git.GitSemaphore;
3134
import org.commonwl.view.git.GitService;
@@ -67,6 +70,7 @@ public class ROBundleService {
6770
// Services
6871
private GraphVizService graphVizService;
6972
private GitService gitService;
73+
private RDFService rdfService;
7074
private CWLTool cwlTool;
7175
private GitSemaphore gitSemaphore;
7276

@@ -94,6 +98,7 @@ public ROBundleService(@Value("${bundleStorage}") Path bundleStorage,
9498
@Value("${singleFileSizeLimit}") int singleFileSizeLimit,
9599
GraphVizService graphVizService,
96100
GitService gitService,
101+
RDFService rdfService,
97102
GitSemaphore gitSemaphore,
98103
CWLTool cwlTool) throws URISyntaxException {
99104
this.bundleStorage = bundleStorage;
@@ -102,6 +107,7 @@ public ROBundleService(@Value("${bundleStorage}") Path bundleStorage,
102107
this.singleFileSizeLimit = singleFileSizeLimit;
103108
this.graphVizService = graphVizService;
104109
this.gitService = gitService;
110+
this.rdfService = rdfService;
105111
this.gitSemaphore = gitSemaphore;
106112
this.cwlTool = cwlTool;
107113
}
@@ -135,12 +141,13 @@ public Bundle createBundle(Workflow workflow, GitDetails gitInfo) throws IOExcep
135141

136142
// Add the files from the repo to this workflow
137143
Set<HashableAgent> authors = new HashSet<>();
144+
138145
boolean safeToAccess = gitSemaphore.acquire(gitInfo.getRepoUrl());
139146
try {
140147
Git gitRepo = gitService.getRepository(workflow.getRetrievedFrom(), safeToAccess);
141148
Path relativePath = Paths.get(FilenameUtils.getPath(gitInfo.getPath()));
142149
Path gitPath = gitRepo.getRepository().getWorkTree().toPath().resolve(relativePath);
143-
addFilesToBundle(gitInfo, bundle, bundlePath, gitRepo, gitPath, authors);
150+
addFilesToBundle(gitInfo, bundle, bundlePath, gitRepo, gitPath, authors, workflow);
144151
} finally {
145152
gitSemaphore.release(gitInfo.getRepoUrl());
146153
}
@@ -217,7 +224,8 @@ public Bundle createBundle(Workflow workflow, GitDetails gitInfo) throws IOExcep
217224
* @param authors The combined set of authors for al the files
218225
*/
219226
private void addFilesToBundle(GitDetails gitDetails, Bundle bundle, Path bundlePath,
220-
Git gitRepo, Path repoPath, Set<HashableAgent> authors)
227+
Git gitRepo, Path repoPath, Set<HashableAgent> authors,
228+
Workflow workflow)
221229
throws IOException {
222230
File[] files = repoPath.toFile().listFiles();
223231
for (File file : files) {
@@ -234,7 +242,7 @@ private void addFilesToBundle(GitDetails gitDetails, Bundle bundle, Path bundleP
234242

235243
// Add all files in the subdirectory to this new folder
236244
addFilesToBundle(subfolderGitDetails, bundle, newBundlePath, gitRepo,
237-
repoPath.resolve(file.getName()), authors);
245+
repoPath.resolve(file.getName()), authors, workflow);
238246

239247
} else {
240248
try {
@@ -280,7 +288,8 @@ private void addFilesToBundle(GitDetails gitDetails, Bundle bundle, Path bundleP
280288
}
281289

282290
// Special handling for cwl files
283-
if (FilenameUtils.getExtension(file.getName()).equals("cwl")) {
291+
boolean cwl = FilenameUtils.getExtension(file.getName()).equals("cwl");
292+
if (cwl) {
284293
// Correct mime type (no official standard for yaml)
285294
aggregation.setMediatype("text/x-yaml");
286295

@@ -293,10 +302,36 @@ private void addFilesToBundle(GitDetails gitDetails, Bundle bundle, Path bundleP
293302
}
294303
}
295304

296-
// Add authors from git commits to the file
297305
try {
306+
Path gitPath = Paths.get(gitDetails.getPath()).resolve(file.getName());
307+
String url = workflow.getRetrievedFrom()
308+
.getUrl(workflow.getLastCommit()).replace("https://", "");
309+
310+
// Add authors from git commits to the file
298311
Set<HashableAgent> fileAuthors = gitService.getAuthors(gitRepo,
299-
Paths.get(gitDetails.getPath()).resolve(file.getName()).toString());
312+
gitPath.toString());
313+
314+
if (cwl) {
315+
// Attempt to get authors from cwl description - takes priority
316+
ResultSet descAuthors = rdfService.getAuthors(bundlePath
317+
.resolve(file.getName()).toString().substring(10), url);
318+
if (descAuthors.hasNext()) {
319+
QuerySolution authorSoln = descAuthors.nextSolution();
320+
HashableAgent newAuthor = new HashableAgent();
321+
if (authorSoln.contains("name")) {
322+
newAuthor.setName(authorSoln.get("name").toString());
323+
}
324+
if (authorSoln.contains("email")) {
325+
newAuthor.setUri(new URI(authorSoln.get("email").toString()));
326+
}
327+
if (authorSoln.contains("orcid")) {
328+
newAuthor.setOrcid(new URI(authorSoln.get("orcid").toString()));
329+
}
330+
fileAuthors.remove(newAuthor);
331+
fileAuthors.add(newAuthor);
332+
}
333+
}
334+
300335
authors.addAll(fileAuthors);
301336
aggregation.setAuthoredBy(new ArrayList<>(fileAuthors));
302337
} catch (GitAPIException ex) {

src/main/java/org/commonwl/view/workflow/WorkflowRepository.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,5 @@ public interface WorkflowRepository extends PagingAndSortingRepository<Workflow,
5050
* @param doc The string to search for in the doc
5151
* @param pageable The details of the page to be retrieved
5252
*/
53-
Page<Workflow> findByLabelContainingOrDocContaining(String label, String doc, Pageable pageable);
53+
Page<Workflow> findByLabelContainingOrDocContainingIgnoreCase(String label, String doc, Pageable pageable);
5454
}

src/main/java/org/commonwl/view/workflow/WorkflowService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ public Page<Workflow> getPageOfWorkflows(Pageable pageable) {
9797
* @return The resulting page of the workflow entries
9898
*/
9999
public Page<Workflow> searchPageOfWorkflows(String searchString, Pageable pageable) {
100-
return workflowRepository.findByLabelContainingOrDocContaining(searchString, searchString, pageable);
100+
return workflowRepository.findByLabelContainingOrDocContainingIgnoreCase(searchString, searchString, pageable);
101101
}
102102

103103
/**

src/main/resources/templates/about.html

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ <h1>About</h1>
4444
understanding and discovery as well as encouraging best practices when writing workflows and their
4545
tooling.</p>
4646

47-
<p>Cite as: <code><a href="https://doi.org/10.5281/zenodo.823534">10.5281/zenodo.823534</a></code></p>
47+
<p>Cite as: <code><a href="https://doi.org/10.7490/f1000research.1114375.1">10.7490/f1000research.1114375.1</a></code></p>
4848

4949
<p>A <a href="https://doi.org/10.5281/zenodo.823535">Technical Report for this project can be viewed here</a>.</p>
5050

@@ -133,16 +133,39 @@ <h2>Use of Subworkflows</h2>
133133
<p class="use">Subworkflows are simplified in the visualisations and are linked as a different workflow in the
134134
<code>Step</code> tables on each workflow page</p>
135135

136+
<h2>Attribution</h2>
137+
<p class="recommendation">Include attribution information in your workflow and tool descriptions</p>
138+
<p class="recommendation_more">
139+
For example, to attribute a person as the author of a workflow or tool with name, email and
140+
ORCID information, include the following statements at the top level:<br />
141+
<pre>
142+
$namespaces: { s: "http://schema.org/" }
143+
s:author:
144+
- class: s:Person
145+
s:name: Mark Robinson
146+
s:email: mailto:[email protected]
147+
s:id: http://orcid.org/0000-0002-8184-7507
148+
</pre>
149+
For attributing organisations, see <a href="https://github.com/Barski-lab/ga4gh_challenge/blob/9de2f0c29ae09e31a434d6c5c969a5e3b2dbf535/biowardrobe_chipseq_se.cwl#L387">this workflow</a>
150+
as an example
151+
</p>
152+
<p class="why">Attribution information allows your workflows and tooling to be used by others while
153+
recognising your contributions. The inclusion of an <a href="https://orcid.org/">ORCID</a> allows you to
154+
be uniquely identified from other researchers</p>
155+
<p class="use">CWLViewer parses attribution information for inclusion in the Research Object Manifest from
156+
both the Git commit logs and from the CWL descriptions themselves when expressed in the
157+
<a href="http://schema.org/author">http://schema.org/author</a> format as above</p>
158+
136159
<h2>Licensing</h2>
137160
<p class="recommendation">Include a <a href="https://opensource.org/licenses">OSI approved open source license</a>
138161
in your workflow and tool descriptions</p>
139162
<p class="recommendation_more">
140163
For example, the following two statements at the top level of a workflow or tool description licenses it
141164
under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache V2.0 License</a>:<br />
142-
<code>
143-
$namespaces: { s: "http://schema.org/" }<br />
144-
s:license: "https://www.apache.org/licenses/LICENSE-2.0"
145-
</code>
165+
<pre>
166+
$namespaces: { s: "http://schema.org/" }
167+
s:license: "https://www.apache.org/licenses/LICENSE-2.0"
168+
</pre>
146169
</p>
147170
<p class="why">A permissive open source license allows others to remix and use your tooling and workflows
148171
to prevent the community from repeating development effort, allowing everyone to benefit</p>

src/main/resources/templates/workflow.html

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@ <h4 class="modal-title" id="fullScreenGraphLabel">Workflow Graph</h4>
125125
<div class="row">
126126
<div class="col-md-12" role="main" id="main">
127127
<h2>Workflow: <span th:text="${workflow.label}">Workflow Name</span></h2>
128-
<p th:text="${workflow.doc}">Workflow Doc</p>
129128
</div>
130129
<div class="col-md-6">
131130
<a th:href="@{${workflow.retrievedFrom.getUrl()}}" href="#" rel="noopener" target="_blank" style="text-decoration:none;">
@@ -143,6 +142,9 @@ <h2>Workflow: <span th:text="${workflow.label}">Workflow Name</span></h2>
143142
<div class="col-md-6 text-right">
144143
<img class="verification_icon" src="../static/img/tick.svg" th:src="@{/img/tick.svg}" width="20" height="22" /> Verified with cwltool version <span th:text="${workflow.cwltoolVersion}">1.0.20170622090721</span>
145144
</div>
145+
<div class="col-md-12" style="margin-top:5px;" th:if="${workflow.doc != null}">
146+
<p th:text="${workflow.doc}">Workflow Doc</p>
147+
</div>
146148
</div>
147149
<div class="row">
148150
<div class="col-md-12">

0 commit comments

Comments
 (0)