Skip to content

Commit 7903d09

Browse files
authored
Merge pull request #880 from common-workflow-language/prov_secondary_files
Track secondary files in prov/RO Create primary-job.json later so any SecondaryFiles are resolved (and relativised) Track secondary files in PROV Track file extensions, base name in PROV Identify "File" entity with UUID - specializes sha1 entity and have separate filenames/secondary files Tests for secondary files consumption and generation
2 parents c71deb9 + 61c98e9 commit 7903d09

File tree

8 files changed

+703
-337
lines changed

8 files changed

+703
-337
lines changed

cwltool/context.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ def __init__(self, kwargs=None):
127127
self.cwl_full_name = None
128128
self.process_run_id = None # type: Optional[str]
129129
self.prov_obj = None # type: Optional[CreateProvProfile]
130-
self.reference_locations = {} # type: Dict[Text, Text]
131130
super(RuntimeContext, self).__init__(kwargs)
132131

133132

cwltool/executors.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@ def run_jobs(self,
125125
): # type: (...) -> None
126126

127127
process_run_id = None # type: Optional[str]
128-
reference_locations = {} # type: Dict[Text,Text]
129128

130129
# define provenance profile for single commandline tool
131130
if not isinstance(process, Workflow) \
@@ -153,15 +152,13 @@ def run_jobs(self,
153152
else:
154153
runtime_context.prov_obj = job.prov_obj
155154
assert runtime_context.prov_obj
156-
process_run_id, reference_locations = \
155+
process_run_id = \
157156
runtime_context.prov_obj.evaluate(
158157
process, job, job_order_object,
159158
runtime_context.make_fs_access,
160159
runtime_context)
161160
runtime_context = runtime_context.copy()
162161
runtime_context.process_run_id = process_run_id
163-
runtime_context.reference_locations = \
164-
reference_locations
165162
job.run(runtime_context)
166163
else:
167164
logger.error("Workflow cannot make any more progress.")

cwltool/provenance.py

Lines changed: 322 additions & 268 deletions
Large diffs are not rendered by default.

cwltool/workflow.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from .software_requirements import ( # pylint: disable=unused-import
3333
DependenciesConfiguration)
3434
from .stdfsaccess import StdFsAccess
35-
from .provenance import CreateProvProfile
35+
from .provenance import CreateProvProfile, ResearchObject
3636
from .utils import DEFAULT_TMP_PREFIX, aslist, json_dumps
3737
from . import context
3838
from .context import (LoadingContext, # pylint: disable=unused-import
@@ -239,20 +239,19 @@ def do_output_callback(self, final_output_callback):
239239

240240
supportsMultipleInput = bool(self.workflow.get_requirement("MultipleInputFeatureRequirement")[0])
241241

242-
wo = {} # type: Optional[Dict[Text, Text]]
242+
wo = None # type: Optional[Dict[Text, Text]]
243243
try:
244244
wo = object_from_state(
245245
self.state, self.tool["outputs"], True, supportsMultipleInput,
246246
"outputSource", incomplete=True)
247247
except WorkflowException as err:
248248
_logger.error(
249249
u"[%s] Cannot collect workflow output: %s", self.name, err)
250-
wo = {}
251250
self.processStatus = "permanentFail"
252251
if self.prov_obj and self.parent_wf \
253252
and self.prov_obj.workflow_run_uri != self.parent_wf.workflow_run_uri:
254253
process_run_id = None
255-
self.prov_obj.generate_output_prov(wo, process_run_id, self.name)
254+
self.prov_obj.generate_output_prov(wo or {}, process_run_id, self.name)
256255
self.prov_obj.document.wasEndedBy(
257256
self.prov_obj.workflow_run_uri, None, self.prov_obj.engine_uuid,
258257
datetime.datetime.now())
@@ -564,6 +563,14 @@ def job(self,
564563
runtimeContext # type: RuntimeContext
565564
): # type: (...) -> Generator[Any, None, None]
566565
builder = self._init_job(job_order, runtimeContext)
566+
#relativeJob=copy.deepcopy(builder.job)
567+
if runtimeContext.research_obj:
568+
if not runtimeContext.research_obj.make_fs_access:
569+
runtimeContext.research_obj.make_fs_access = runtimeContext.make_fs_access
570+
if runtimeContext.toplevel:
571+
# Record primary-job.json
572+
runtimeContext.research_obj.create_job(self.job, builder.job)
573+
567574
job = WorkflowJob(self, runtimeContext)
568575
yield job
569576

tests/bundle-context.jsonld

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
{
2+
"dct:license": "http://www.apache.org/licenses/LICENSE-2.0",
3+
"prov:alternateOf": "https://w3id.org/bundle/context",
4+
"dc:rights": "Copyright 2013-2018 The University of Manchester. Licensed under the Apache License, Version 2.0 (the 'License'); you may not use this file except in compliance with the License. You may obtain a copy of the License at <http://www.apache.org/licenses/LICENSE-2.0>. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ",
5+
"@context": {
6+
"ao": "http://purl.org/ao/",
7+
"oa": "http://www.w3.org/ns/oa#",
8+
"dc": "http://purl.org/dc/elements/1.1/",
9+
"dct": "http://purl.org/dc/terms/",
10+
"ore": "http://www.openarchives.org/ore/terms/",
11+
"ro": "http://purl.org/wf4ever/ro#",
12+
"roterms": "http://purl.org/wf4ever/roterms#",
13+
"bundle": "http://purl.org/wf4ever/bundle#",
14+
"prov": "http://www.w3.org/ns/prov#",
15+
"pav": "http://purl.org/pav/",
16+
"xsd": "http://www.w3.org/2001/XMLSchema#",
17+
"foaf": "http://xmlns.com/foaf/0.1/",
18+
"owl": "http://www.w3.org/2002/07/owl#",
19+
"doi": "http://dx.doi.org/",
20+
21+
"uri": "@id",
22+
"id": {
23+
"@id": "owl:sameAs",
24+
"@type": "@id" },
25+
"file": {
26+
"@id": "owl:sameAs",
27+
"@type": "@id" },
28+
"annotation": {
29+
"@id": "owl:sameAs",
30+
"@type": "@id" },
31+
32+
"manifest": {
33+
"@id": "ore:isDescribedBy",
34+
"@type": "@id"
35+
},
36+
37+
"createdOn": {
38+
"@id": "pav:createdOn",
39+
"@type": "xsd:dateTime"
40+
},
41+
"createdBy": {
42+
"@id": "pav:createdBy",
43+
"@type": "@id"
44+
},
45+
"aggregatedOn": {
46+
"@id": "pav:createdOn",
47+
"@type": "xsd:dateTime"
48+
},
49+
"aggregatedBy": {
50+
"@id": "pav:createdBy",
51+
"@type": "@id"
52+
},
53+
"authoredOn": {
54+
"@id": "pav:authoredOn",
55+
"@type": "xsd:dateTime"
56+
},
57+
"authoredBy": {
58+
"@id": "pav:authoredBy",
59+
"@type": "@id"
60+
},
61+
"curatedOn": {
62+
"@id": "pav:curatedOn",
63+
"@type": "xsd:dateTime"
64+
},
65+
"curatedBy": {
66+
"@id": "pav:curatedBy",
67+
"@type": "@id"
68+
},
69+
"contributedOn": {
70+
"@id": "pav:contributedOn",
71+
"@type": "xsd:dateTime"
72+
},
73+
"contributedBy": {
74+
"@id": "pav:contributedBy",
75+
"@type": "@id"
76+
},
77+
"retrievedOn": {
78+
"@id": "pav:retrievedOn",
79+
"@type": "xsd:dateTime"
80+
},
81+
"retrievedBy": {
82+
"@id": "pav:retrievedBy",
83+
"@type": "@id"
84+
},
85+
"retrievedFrom": {
86+
"@id": "pav:retrievedFrom",
87+
"@type": "@id"
88+
},
89+
"name": {
90+
"@id": "foaf:name"
91+
},
92+
"orcid": {
93+
"@id": "roterms:orcid",
94+
"@type": "@id"
95+
},
96+
97+
"history": {
98+
"@id": "prov:has_provenance",
99+
"@type": "@id"
100+
},
101+
"aggregates": {
102+
"@id": "ore:aggregates",
103+
"@type": "@id"
104+
},
105+
"mediatype": {
106+
"@id": "dc:format"
107+
},
108+
"folder": {
109+
"@id": "bundle:inFolder",
110+
"@type": "@id"
111+
},
112+
"filename": {
113+
"@id": "ro:entryName"
114+
},
115+
"proxy": {
116+
"@id": "bundle:hasProxy",
117+
"@type": "@id"
118+
},
119+
"bundledAs": {
120+
"@id": "bundle:bundledAs",
121+
"@type": "@id"
122+
},
123+
"conformsTo": {
124+
"@id": "dct:conformsTo",
125+
"@type": "@id"
126+
},
127+
"annotations": {
128+
"@id": "bundle:hasAnnotation",
129+
"@type": "@id"
130+
},
131+
"content": {
132+
"@id": "oa:hasBody",
133+
"@type": "@id"
134+
},
135+
"about": {
136+
"@id": "oa:hasTarget",
137+
"@type": "@id"
138+
}
139+
140+
}
141+
}

0 commit comments

Comments
 (0)