Skip to content

Commit 9731e16

Browse files
committed
simplifications
1 parent c73a361 commit 9731e16

File tree

2 files changed

+33
-53
lines changed

2 files changed

+33
-53
lines changed

cwltool/main.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -785,8 +785,7 @@ def loc_to_path(obj):
785785
if prov_log_handler:
786786
prov_log_handler.close()
787787
_logger.removeHandler(prov_log_handler)
788-
with open(prov_log_handler_filename, "rb") as log:
789-
research_obj.write_log(log)
788+
research_obj.write_log(prov_log_handler_filename)
790789
research_obj.close(args.provenance)
791790

792791
_logger.removeHandler(stderr_handler)

cwltool/provenance.py

Lines changed: 32 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -962,9 +962,9 @@ def __init__(self, temp_prefix_ro="tmp", orcid=None, full_name=None):
962962

963963
self.temp_prefix = temp_prefix_ro
964964
self.orcid = _valid_orcid(orcid)
965-
self.full_name = full_name or None
966-
self.folder = os.path.abspath(tempfile.mkdtemp(prefix=temp_prefix_ro)) # type: Optional[Text]
967-
self.final_location = None # type: Optional[Text]
965+
self.full_name = full_name
966+
self.folder = os.path.abspath(tempfile.mkdtemp(prefix=temp_prefix_ro)) # type: Text
967+
self.closed = False
968968
# map of filename "data/de/alsdklkas": 12398123 bytes
969969
self.bagged_size = {} # type: Dict
970970
self.tagfiles = set() # type: Set
@@ -989,26 +989,23 @@ def __init__(self, temp_prefix_ro="tmp", orcid=None, full_name=None):
989989

990990
def self_check(self): # type: () -> None
991991
"""Raises ValueError if this RO is closed."""
992-
if not self.folder:
992+
if self.closed:
993993
raise ValueError(
994994
"This ResearchObject has already been closed and is not "
995995
"available for futher manipulation.")
996996

997997
def __str__(self):
998-
return "ResearchObject <%s> in <%s>" % (
999-
self.ro_uuid, self.folder or self.final_location)
998+
return "ResearchObject <{}> in <{}>".format(self.ro_uuid, self.folder)
1000999

1001-
def _initialize(self):
1002-
# type: (...) -> None
1003-
assert self.folder
1004-
for research_obj_folder in (METADATA, DATA, WORKFLOW, SNAPSHOT, PROVENANCE):
1000+
def _initialize(self): # type: () -> None
1001+
for research_obj_folder in (METADATA, DATA, WORKFLOW, SNAPSHOT,
1002+
PROVENANCE):
10051003
os.makedirs(os.path.join(self.folder, research_obj_folder))
10061004
self._initialize_bagit()
10071005

1008-
def _initialize_bagit(self):
1009-
# type: (...) -> None
1010-
# Write fixed bagit header
1011-
assert self.folder
1006+
def _initialize_bagit(self): # type: () -> None
1007+
"""Write fixed bagit header."""
1008+
self.self_check()
10121009
bagit = os.path.join(self.folder, "bagit.txt")
10131010
# encoding: always UTF-8 (although ASCII would suffice here)
10141011
# newline: ensure LF also on Windows
@@ -1017,32 +1014,22 @@ def _initialize_bagit(self):
10171014
bag_it_file.write(u"BagIt-Version: 0.97\n")
10181015
bag_it_file.write(u"Tag-File-Character-Encoding: %s\n" % ENCODING)
10191016

1020-
def write_log(self,logger):
1021-
# type: (IO) -> None
1022-
self.self_check()
1017+
def write_log(self, log_path): # type: (Text) -> None
10231018
"""Copies log files to the snapshot/ directory."""
1024-
assert self.folder
1025-
path = os.path.join(self.folder, SNAPSHOT, logger.name.split("/")[-1])
1026-
# FIXME: What if destination path already exists?
1027-
try:
1028-
if os.path.isdir(path):
1029-
shutil.copytree(logger.name, path)
1030-
else:
1031-
shutil.copy(logger.name, path)
1032-
when = datetime.datetime.fromtimestamp(os.path.getmtime(logger.name))
1033-
self.add_tagfile(path, when)
1034-
except PermissionError:
1035-
pass # FIXME: avoids duplicate snapshotting; need better solution
1036-
1037-
1038-
def _finalize(self):
1039-
# type: () -> None
1019+
self.self_check()
1020+
dst_path = os.path.join(
1021+
self.folder, SNAPSHOT, os.path.basename(log_path))
1022+
while os.path.exists(dst_path):
1023+
dst_path = dst_path + "_{}".format(uuid.uuid4())
1024+
shutil.copy(log_path, dst_path)
1025+
when = datetime.datetime.fromtimestamp(os.path.getmtime(log_path))
1026+
self.add_tagfile(dst_path, when)
1027+
1028+
def _finalize(self): # type: () -> None
10401029
self._write_ro_manifest()
10411030
self._write_bag_info()
10421031

1043-
1044-
def user_provenance(self, document):
1045-
# type: (ProvDocument) -> None
1032+
def user_provenance(self, document): # type: (ProvDocument) -> None
10461033
"""Add the user provenance."""
10471034
self.self_check()
10481035
(username, fullname) = _whoami()
@@ -1112,7 +1099,6 @@ def add_tagfile(self, path, when=None):
11121099
if hashlib.sha512:
11131100
tag_file.seek(0)
11141101
checksums[SHA512] = checksum_copy(tag_file, hasher=hashlib.sha512)
1115-
assert self.folder
11161102
rel_path = _posix_path(os.path.relpath(path, self.folder))
11171103
self.tagfiles.add(rel_path)
11181104
self.add_to_manifest(rel_path, checksums)
@@ -1375,7 +1361,6 @@ def generate_snapshot(self, prov_dep):
13751361
# type: (MutableMapping[Text, Any]) -> None
13761362
self.self_check()
13771363
"""Copy all of the CWL files to the snapshot/ directory."""
1378-
assert self.folder
13791364
for key, value in prov_dep.items():
13801365
if key == "location" and value.split("/")[-1]:
13811366
filename = value.split("/")[-1]
@@ -1416,8 +1401,7 @@ def packed_workflow(self, packed): # type: (Text) -> None
14161401

14171402
def has_data_file(self, sha1hash): # type: (str) -> bool
14181403
"""Confirms the presence of the given file in the RO."""
1419-
folder = cast(str, self.folder or self.final_location)
1420-
folder = os.path.join(folder, DATA, sha1hash[0:2])
1404+
folder = os.path.join(self.folder, DATA, sha1hash[0:2])
14211405
hash_path = os.path.join(folder, sha1hash)
14221406
return os.path.isfile(hash_path)
14231407

@@ -1430,7 +1414,6 @@ def add_data_file(self, from_fp, when=None, content_type=None):
14301414
checksum = checksum_copy(from_fp, tmp)
14311415

14321416
# Calculate hash-based file path
1433-
assert self.folder
14341417
folder = os.path.join(self.folder, DATA, checksum[0:2])
14351418
path = os.path.join(folder, checksum)
14361419
# os.rename assumed safe, as our temp file should
@@ -1485,7 +1468,6 @@ def add_to_manifest(self, rel_path, checksums):
14851468
# metadata file, go to tag manifest
14861469
manifest = "tagmanifest"
14871470

1488-
assert self.folder
14891471
# Add checksums to corresponding manifest files
14901472
for (method, hash_value) in checksums.items():
14911473
# File not in manifest because we bailed out on
@@ -1505,7 +1487,6 @@ def _add_to_bagit(self, rel_path, **checksums):
15051487
# type: (Text, Any) -> None
15061488
if posixpath.isabs(rel_path):
15071489
raise ValueError("rel_path must be relative: %s" % rel_path)
1508-
assert self.folder
15091490
local_path = os.path.join(self.folder, _local_path(rel_path))
15101491
if not os.path.exists(local_path):
15111492
raise IOError("File %s does not exist within RO: %s" % (rel_path, local_path))
@@ -1570,8 +1551,11 @@ def _relativise_files(self, structure):
15701551
if structure.get("class") == "File":
15711552
relative_path = None
15721553
if "checksum" in structure:
1573-
sha1, checksum = structure["checksum"].split("$")
1574-
assert sha1 == SHA1
1554+
alg, checksum = structure["checksum"].split("$")
1555+
if alg != SHA1:
1556+
raise TypeError(
1557+
"Only SHA1 CWL checksums are currently supported: "
1558+
"{}".format(structure))
15751559
if self.has_data_file(checksum):
15761560
prefix = checksum[0:2]
15771561
relative_path = posixpath.join(
@@ -1629,7 +1613,7 @@ def close(self, save_to=None):
16291613
ensure the temporary files of this Research Object are removed.
16301614
"""
16311615
if save_to is None:
1632-
if self.folder:
1616+
if not self.closed:
16331617
_logger.debug(u"[provenance] Deleting temporary %s", self.folder)
16341618
shutil.rmtree(self.folder, ignore_errors=True)
16351619
else:
@@ -1641,13 +1625,10 @@ def close(self, save_to=None):
16411625
if os.path.isdir(save_to):
16421626
_logger.info(u"[provenance] Deleting existing %s", save_to)
16431627
shutil.rmtree(save_to)
1644-
assert self.folder
16451628
shutil.move(self.folder, save_to)
16461629
_logger.info(u"[provenance] Research Object saved to %s", save_to)
1647-
self.final_location = save_to
1648-
# Forget our temporary folder, which should no longer exists
1649-
# This makes later close() a no-op
1650-
self.folder = None
1630+
self.folder = save_to
1631+
self.closed = True
16511632

16521633
def checksum_copy(src_file, # type: IO
16531634
dst_file=None, # type: Optional[IO]

0 commit comments

Comments
 (0)