Skip to content

Commit e6bea1f

Browse files
committed
capture secondary files in relativised job file
1 parent a65af4c commit e6bea1f

File tree

2 files changed

+40
-33
lines changed

2 files changed

+40
-33
lines changed

cwltool/provenance.py

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -447,28 +447,6 @@ def evaluate(self,
447447
evaluate the nature of r and
448448
initialize the activity start
449449
'''
450-
def copy_job_order(job, job_order_object):
451-
# type: (Any,Any) -> Any
452-
'''
453-
creates copy of job object for provenance
454-
'''
455-
if not hasattr(job, "tool"):
456-
# direct command line tool execution
457-
return job_order_object
458-
customised_job = {} # new job object for RO
459-
for each, i in enumerate(job.tool["inputs"]):
460-
with SourceLine(job.tool["inputs"], each, WorkflowException,
461-
_logger.isEnabledFor(logging.DEBUG)):
462-
iid = shortname(i["id"])
463-
if iid in job_order_object:
464-
customised_job[iid] = copy.deepcopy(job_order_object[iid])
465-
# add the input element in dictionary for provenance
466-
elif "default" in i:
467-
customised_job[iid] = copy.deepcopy(i["default"])
468-
# add the default elements in the dictionary for provenance
469-
else:
470-
pass
471-
return customised_job
472450

473451
process_run_id = None
474452
research_obj = runtimeContext.research_obj
@@ -479,8 +457,7 @@ def copy_job_order(job, job_order_object):
479457
self.prospective_prov(job)
480458
customised_job = copy_job_order(job, job_order_object)
481459
self.used_artefacts(customised_job, self.workflow_run_uri)
482-
inputs = research_obj.create_job(
483-
customised_job)
460+
research_obj.create_job(job, customised_job)
484461
#self.used_artefacts(inputs, self.workflow_run_uri)
485462
name = ""
486463
if hasattr(job, "name"):
@@ -491,8 +468,8 @@ def copy_job_order(job, job_order_object):
491468
self.prospective_prov(job)
492469
customised_job = copy_job_order(job, job_order_object)
493470
self.used_artefacts(customised_job, self.workflow_run_uri)
494-
inputs = research_obj.create_job(
495-
customised_job)
471+
#inputs = research_obj.create_job(
472+
#customised_job)
496473
#self.used_artefacts(inputs, self.workflow_run_uri)
497474
else: # in case of commandline tool execution as part of workflow
498475
name = ""
@@ -1542,28 +1519,30 @@ def _add_to_bagit(self, rel_path, **checksums):
15421519
self.add_to_manifest(rel_path, checksums)
15431520

15441521
def create_job(self,
1545-
job, # type: Dict
1522+
wfJob,
1523+
builderJob # type: Dict
15461524
): # type: (...) -> Dict
15471525
#TODO customise the file
15481526
'''
15491527
This function takes the dictionary input object and generates
15501528
a json file containing the relative paths and link to the associated
15511529
cwl document
15521530
'''
1531+
copied=copy.deepcopy(builderJob)
15531532
relativised_input_objecttemp = {} # type: Dict[Any,Any]
1554-
self.relativise_files(job)
1555-
1533+
self.relativise_files(copied)
15561534
rel_path = posixpath.join(_posix_path(WORKFLOW), "primary-job.json")
1557-
j = json.dumps(job, indent=4, ensure_ascii=False)
1535+
j = json.dumps(copied, indent=4, ensure_ascii=False)
15581536
with self.write_bag_file(rel_path) as file_path:
15591537
file_path.write(j + u"\n")
15601538
_logger.info(u"[provenance] Generated customised job file: %s", rel_path)
1561-
1539+
print ("copied: ", copied)
1540+
print ("original", builderJob)
15621541
#Generate dictionary with keys as workflow level input IDs and values as
15631542
#1) for files the relativised location containing hash
15641543
#2) for other attributes, the actual value.
15651544
relativised_input_objecttemp = {}
1566-
for key, value in job.items():
1545+
for key, value in copied.items():
15671546
if isinstance(value, dict):
15681547
if value.get("class") in ("File", "Directory"):
15691548
relativised_input_objecttemp[key] = value
@@ -1673,3 +1652,26 @@ def checksum_copy(file_path, # type: IO
16731652
if copy_to_fp is not None:
16741653
copy_to_fp.flush()
16751654
return checksum.hexdigest().lower()
1655+
1656+
def copy_job_order(job, job_order_object):
1657+
# type: (Any,Any) -> Any
1658+
'''
1659+
creates copy of job object for provenance
1660+
'''
1661+
if not hasattr(job, "tool"):
1662+
# direct command line tool execution
1663+
return job_order_object
1664+
customised_job = {} # new job object for RO
1665+
for each, i in enumerate(job.tool["inputs"]):
1666+
with SourceLine(job.tool["inputs"], each, WorkflowException,
1667+
_logger.isEnabledFor(logging.DEBUG)):
1668+
iid = shortname(i["id"])
1669+
if iid in job_order_object:
1670+
customised_job[iid] = copy.deepcopy(job_order_object[iid])
1671+
# add the input element in dictionary for provenance
1672+
elif "default" in i:
1673+
customised_job[iid] = copy.deepcopy(i["default"])
1674+
# add the default elements in the dictionary for provenance
1675+
else:
1676+
pass
1677+
return customised_job

cwltool/workflow.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from .software_requirements import ( # pylint: disable=unused-import
3333
DependenciesConfiguration)
3434
from .stdfsaccess import StdFsAccess
35-
from .provenance import CreateProvProfile
35+
from .provenance import CreateProvProfile, ResearchObject
3636
from .utils import DEFAULT_TMP_PREFIX, aslist, json_dumps
3737
from . import context
3838
from .context import (LoadingContext, # pylint: disable=unused-import
@@ -564,6 +564,11 @@ def job(self,
564564
runtimeContext # type: RuntimeContext
565565
): # type: (...) -> Generator[Any, None, None]
566566
builder = self._init_job(job_order, runtimeContext)
567+
#relativeJob=copy.deepcopy(builder.job)
568+
if runtimeContext.research_obj:
569+
runtimeContext.research_obj.make_fs_access = runtimeContext.make_fs_access
570+
runtimeContext.research_obj.create_job(self.job, builder.job)
571+
567572
job = WorkflowJob(self, runtimeContext)
568573
yield job
569574

0 commit comments

Comments
 (0)