Skip to content

Commit 8b6cfba

Browse files
authored
Merge pull request #921 from common-workflow-language/cwlprov-log-filename
CWLProv log filename in metadata/logs/
2 parents c1a5187 + f75ff86 commit 8b6cfba

File tree

3 files changed

+134
-25
lines changed

3 files changed

+134
-25
lines changed

cwltool/main.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -466,8 +466,7 @@ def main(argsl=None, # type: List[str]
466466
_logger.addHandler(stderr_handler)
467467
# pre-declared for finally block
468468
workflowobj = None
469-
prov_log_handler = None # type: Optional[logging.FileHandler]
470-
prov_log_handler_file = None # type: Optional[IO[Any]]
469+
prov_log_handler = None # type: Optional[logging.StreamHandler]
471470
try:
472471
if args is None:
473472
if argsl is None:
@@ -493,19 +492,23 @@ def main(argsl=None, # type: List[str]
493492
if not hasattr(args, key):
494493
setattr(args, key, val)
495494

495+
## Configure logging
496496
rdflib_logger = logging.getLogger("rdflib.term")
497497
rdflib_logger.addHandler(stderr_handler)
498498
rdflib_logger.setLevel(logging.ERROR)
499499
if args.quiet:
500-
_logger.setLevel(logging.WARN)
500+
# Silence STDERR, not an eventual provenance log file
501+
stderr_handler.setLevel(logging.WARN)
501502
if runtimeContext.debug:
503+
# Increase to debug for both stderr and provenance log file
502504
_logger.setLevel(logging.DEBUG)
503505
rdflib_logger.setLevel(logging.DEBUG)
504506
formatter = None # type: Optional[logging.Formatter]
505507
if args.timestamps:
506508
formatter = logging.Formatter("[%(asctime)s] %(message)s",
507509
"%Y-%m-%d %H:%M:%S")
508510
stderr_handler.setFormatter(formatter)
511+
##
509512

510513
if args.version:
511514
print(versionfunc())
@@ -544,14 +547,12 @@ def main(argsl=None, # type: List[str]
544547
if not args.compute_checksum:
545548
_logger.error("--provenance incompatible with --no-compute-checksum")
546549
return 1
547-
runtimeContext.research_obj = ResearchObject(
550+
ro = ResearchObject(
548551
temp_prefix_ro=args.tmpdir_prefix, orcid=args.orcid,
549552
full_name=args.cwl_full_name)
550-
assert runtimeContext.research_obj.folder
551-
prov_log_handler_filename = os.path.join(
552-
runtimeContext.research_obj.folder, "log")
553-
prov_log_handler = logging.FileHandler(
554-
str(prov_log_handler_filename))
553+
runtimeContext.research_obj = ro
554+
log_file_io = ro.open_log_file_for_activity(ro.engine_uuid)
555+
prov_log_handler = logging.StreamHandler(log_file_io)
555556
class ProvLogFormatter(logging.Formatter):
556557
"""Enforce ISO8601 with both T and Z."""
557558
def __init__(self): # type: () -> None
@@ -566,6 +567,11 @@ def formatTime(self, record, datefmt=None):
566567
return with_msecs
567568
prov_log_handler.setFormatter(ProvLogFormatter())
568569
_logger.addHandler(prov_log_handler)
570+
_logger.debug(u"[provenance] Logging to %s", log_file_io)
571+
if argsl:
572+
# Log cwltool command line options to provenance file
573+
_logger.info("[cwltool] %s %s", sys.argv[0], u" ".join(argsl))
574+
_logger.debug(u"[cwltool] Arguments: %s", args)
569575

570576
if loadingContext is None:
571577
loadingContext = LoadingContext(vars(args))
@@ -795,9 +801,15 @@ def loc_to_path(obj):
795801
assert prov_dep
796802
research_obj.generate_snapshot(prov_dep)
797803
if prov_log_handler:
798-
prov_log_handler.close()
804+
# Stop logging so we won't half-log adding ourself to RO
805+
_logger.debug(u"[provenance] Closing provenance log file %s",
806+
prov_log_handler)
799807
_logger.removeHandler(prov_log_handler)
800-
research_obj.write_log(prov_log_handler_filename)
808+
# Ensure last log lines are written out
809+
prov_log_handler.flush()
810+
# Underlying WritableBagFile will add the tagfile to the manifest
811+
prov_log_handler.stream.close()
812+
prov_log_handler.close()
801813
research_obj.close(args.provenance)
802814

803815
_logger.removeHandler(stderr_handler)

cwltool/provenance.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ class PermissionError(OSError): # pylint: disable=redefined-builtin
8282
# sub-folders
8383
MAIN = os.path.join(WORKFLOW, "main")
8484
PROVENANCE = os.path.join(METADATA, "provenance")
85+
LOGS = os.path.join(METADATA, "logs")
8586
WFDESC = Namespace("wfdesc", 'http://purl.org/wf4ever/wfdesc#')
8687
WFPROV = Namespace("wfprov", 'http://purl.org/wf4ever/wfprov#')
8788
WF4EVER = Namespace("wf4ever", 'http://purl.org/wf4ever/wf4ever#')
@@ -999,7 +1000,7 @@ def __str__(self):
9991000

10001001
def _initialize(self): # type: () -> None
10011002
for research_obj_folder in (METADATA, DATA, WORKFLOW, SNAPSHOT,
1002-
PROVENANCE):
1003+
PROVENANCE, LOGS):
10031004
os.makedirs(os.path.join(self.folder, research_obj_folder))
10041005
self._initialize_bagit()
10051006

@@ -1013,17 +1014,20 @@ def _initialize_bagit(self): # type: () -> None
10131014
# TODO: \n or \r\n ?
10141015
bag_it_file.write(u"BagIt-Version: 0.97\n")
10151016
bag_it_file.write(u"Tag-File-Character-Encoding: %s\n" % ENCODING)
1016-
1017-
def write_log(self, log_path): # type: (Text) -> None
1018-
"""Copies log files to the snapshot/ directory."""
1017+
1018+
def open_log_file_for_activity(self, uuid_uri): # type: (Text) -> IO
10191019
self.self_check()
1020-
dst_path = os.path.join(
1021-
self.folder, SNAPSHOT, os.path.basename(log_path))
1022-
while os.path.exists(dst_path):
1023-
dst_path = dst_path + "_{}".format(uuid.uuid4())
1024-
shutil.move(log_path, dst_path)
1025-
when = datetime.datetime.fromtimestamp(os.path.getmtime(dst_path))
1026-
self.add_tagfile(dst_path, when)
1020+
# Ensure valid UUID for safe filenames
1021+
activity_uuid = uuid.UUID(uuid_uri)
1022+
if activity_uuid.urn == self.engine_uuid:
1023+
# It's the engine aka cwltool!
1024+
name = "engine"
1025+
else:
1026+
name = "activity"
1027+
p = os.path.join(LOGS, "{}.{}.txt".format(name, activity_uuid))
1028+
_logger.debug("[provenance] Opening log file for %s: %s" % (name, p))
1029+
self.add_annotation(activity_uuid.urn, [p], CWLPROV["log"].uri)
1030+
return self.write_bag_file(p)
10271031

10281032
def _finalize(self): # type: () -> None
10291033
self._write_ro_manifest()
@@ -1359,8 +1363,8 @@ def _write_bag_info(self):
13591363

13601364
def generate_snapshot(self, prov_dep):
13611365
# type: (MutableMapping[Text, Any]) -> None
1362-
self.self_check()
13631366
"""Copy all of the CWL files to the snapshot/ directory."""
1367+
self.self_check()
13641368
for key, value in prov_dep.items():
13651369
if key == "location" and value.split("/")[-1]:
13661370
filename = value.split("/")[-1]
@@ -1564,8 +1568,7 @@ def _relativise_files(self, structure):
15641568
if not relative_path and "location" in structure:
15651569
# Register in RO; but why was this not picked
15661570
# up by used_artefacts?
1567-
_logger.warning("File not previously registered in RO: %s",
1568-
yaml.dump(structure))
1571+
_logger.info("[provenance] Adding to RO %s", structure["location"])
15691572
fsaccess = self.make_fs_access("")
15701573
with fsaccess.open(structure["location"], "rb") as fp:
15711574
relative_path = self.add_data_file(fp)

typeshed/2and3/uuid.pyi

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Stubs for uuid
2+
3+
import sys
4+
from typing import Tuple, Optional, Any, Union
5+
6+
# Because UUID has properties called int and bytes we need to rename these temporarily.
7+
_Int = int
8+
_Bytes = bytes
9+
_FieldsType = Tuple[int, int, int, int, int, int]
10+
if sys.version_info >= (3,):
11+
_Text = str
12+
else:
13+
_Text = Union[str, unicode]
14+
15+
class UUID:
16+
def __init__(self, hex: Optional[_Text] = ...,
17+
bytes: Optional[_Bytes] = ...,
18+
bytes_le: Optional[_Bytes] = ...,
19+
fields: Optional[_FieldsType] = ...,
20+
int: Optional[_Int] = ...,
21+
version: Optional[_Int] = ...) -> None: ...
22+
@property
23+
def bytes(self) -> _Bytes: ...
24+
@property
25+
def bytes_le(self) -> _Bytes: ...
26+
@property
27+
def clock_seq(self) -> _Int: ...
28+
@property
29+
def clock_seq_hi_variant(self) -> _Int: ...
30+
@property
31+
def clock_seq_low(self) -> _Int: ...
32+
@property
33+
def fields(self) -> _FieldsType: ...
34+
@property
35+
def hex(self) -> str: ...
36+
@property
37+
def int(self) -> _Int: ...
38+
@property
39+
def node(self) -> _Int: ...
40+
@property
41+
def time(self) -> _Int: ...
42+
@property
43+
def time_hi_version(self) -> _Int: ...
44+
@property
45+
def time_low(self) -> _Int: ...
46+
@property
47+
def time_mid(self) -> _Int: ...
48+
@property
49+
def urn(self) -> str: ...
50+
@property
51+
def variant(self) -> str: ...
52+
@property
53+
def version(self) -> Optional[_Int]: ...
54+
55+
def __int__(self) -> _Int: ...
56+
57+
if sys.version_info >= (3,):
58+
def __eq__(self, other: Any) -> bool: ...
59+
def __lt__(self, other: Any) -> bool: ...
60+
def __le__(self, other: Any) -> bool: ...
61+
def __gt__(self, other: Any) -> bool: ...
62+
def __ge__(self, other: Any) -> bool: ...
63+
else:
64+
def get_bytes(self) -> _Bytes: ...
65+
def get_bytes_le(self) -> _Bytes: ...
66+
def get_clock_seq(self) -> _Int: ...
67+
def get_clock_seq_hi_variant(self) -> _Int: ...
68+
def get_clock_seq_low(self) -> _Int: ...
69+
def get_fields(self) -> _FieldsType: ...
70+
def get_hex(self) -> str: ...
71+
def get_node(self) -> _Int: ...
72+
def get_time(self) -> _Int: ...
73+
def get_time_hi_version(self) -> _Int: ...
74+
def get_time_low(self) -> _Int: ...
75+
def get_time_mid(self) -> _Int: ...
76+
def get_urn(self) -> str: ...
77+
def get_variant(self) -> str: ...
78+
def get_version(self) -> Optional[_Int]: ...
79+
def __cmp__(self, other: Any) -> _Int: ...
80+
81+
def getnode() -> int: ...
82+
def uuid1(node: Optional[_Int] = ..., clock_seq: Optional[_Int] = ...) -> UUID: ...
83+
def uuid3(namespace: UUID, name: str) -> UUID: ...
84+
def uuid4() -> UUID: ...
85+
def uuid5(namespace: UUID, name: str) -> UUID: ...
86+
87+
NAMESPACE_DNS = ... # type: UUID
88+
NAMESPACE_URL = ... # type: UUID
89+
NAMESPACE_OID = ... # type: UUID
90+
NAMESPACE_X500 = ... # type: UUID
91+
RESERVED_NCS = ... # type: str
92+
RFC_4122 = ... # type: str
93+
RESERVED_MICROSOFT = ... # type: str
94+
RESERVED_FUTURE = ... # type: str

0 commit comments

Comments
 (0)