@@ -962,9 +962,9 @@ def __init__(self, temp_prefix_ro="tmp", orcid=None, full_name=None):
962
962
963
963
self .temp_prefix = temp_prefix_ro
964
964
self .orcid = _valid_orcid (orcid )
965
- self .full_name = full_name or None
966
- self .folder = os .path .abspath (tempfile .mkdtemp (prefix = temp_prefix_ro )) # type: Optional[ Text]
967
- self .final_location = None # type: Optional[Text]
965
+ self .full_name = full_name
966
+ self .folder = os .path .abspath (tempfile .mkdtemp (prefix = temp_prefix_ro )) # type: Text
967
+ self .closed = False
968
968
# map of filename "data/de/alsdklkas": 12398123 bytes
969
969
self .bagged_size = {} # type: Dict
970
970
self .tagfiles = set () # type: Set
@@ -989,26 +989,23 @@ def __init__(self, temp_prefix_ro="tmp", orcid=None, full_name=None):
989
989
990
990
def self_check (self ): # type: () -> None
991
991
"""Raises ValueError if this RO is closed."""
992
- if not self .folder :
992
+ if self .closed :
993
993
raise ValueError (
994
994
"This ResearchObject has already been closed and is not "
995
995
"available for futher manipulation." )
996
996
997
997
def __str__ (self ):
998
- return "ResearchObject <%s> in <%s>" % (
999
- self .ro_uuid , self .folder or self .final_location )
998
+ return "ResearchObject <{}> in <{}>" .format (self .ro_uuid , self .folder )
1000
999
1001
- def _initialize (self ):
1002
- # type: (...) -> None
1003
- assert self .folder
1004
- for research_obj_folder in (METADATA , DATA , WORKFLOW , SNAPSHOT , PROVENANCE ):
1000
+ def _initialize (self ): # type: () -> None
1001
+ for research_obj_folder in (METADATA , DATA , WORKFLOW , SNAPSHOT ,
1002
+ PROVENANCE ):
1005
1003
os .makedirs (os .path .join (self .folder , research_obj_folder ))
1006
1004
self ._initialize_bagit ()
1007
1005
1008
- def _initialize_bagit (self ):
1009
- # type: (...) -> None
1010
- # Write fixed bagit header
1011
- assert self .folder
1006
+ def _initialize_bagit (self ): # type: () -> None
1007
+ """Write fixed bagit header."""
1008
+ self .self_check ()
1012
1009
bagit = os .path .join (self .folder , "bagit.txt" )
1013
1010
# encoding: always UTF-8 (although ASCII would suffice here)
1014
1011
# newline: ensure LF also on Windows
@@ -1017,32 +1014,22 @@ def _initialize_bagit(self):
1017
1014
bag_it_file .write (u"BagIt-Version: 0.97\n " )
1018
1015
bag_it_file .write (u"Tag-File-Character-Encoding: %s\n " % ENCODING )
1019
1016
1020
- def write_log (self ,logger ):
1021
- # type: (IO) -> None
1022
- self .self_check ()
1017
+ def write_log (self , log_path ): # type: (Text) -> None
1023
1018
"""Copies log files to the snapshot/ directory."""
1024
- assert self .folder
1025
- path = os .path .join (self .folder , SNAPSHOT , logger .name .split ("/" )[- 1 ])
1026
- # FIXME: What if destination path already exists?
1027
- try :
1028
- if os .path .isdir (path ):
1029
- shutil .copytree (logger .name , path )
1030
- else :
1031
- shutil .copy (logger .name , path )
1032
- when = datetime .datetime .fromtimestamp (os .path .getmtime (logger .name ))
1033
- self .add_tagfile (path , when )
1034
- except PermissionError :
1035
- pass # FIXME: avoids duplicate snapshotting; need better solution
1036
-
1037
-
1038
- def _finalize (self ):
1039
- # type: () -> None
1019
+ self .self_check ()
1020
+ dst_path = os .path .join (
1021
+ self .folder , SNAPSHOT , os .path .basename (log_path ))
1022
+ while os .path .exists (dst_path ):
1023
+ dst_path = dst_path + "_{}" .format (uuid .uuid4 ())
1024
+ shutil .copy (log_path , dst_path )
1025
+ when = datetime .datetime .fromtimestamp (os .path .getmtime (log_path ))
1026
+ self .add_tagfile (dst_path , when )
1027
+
1028
+ def _finalize (self ): # type: () -> None
1040
1029
self ._write_ro_manifest ()
1041
1030
self ._write_bag_info ()
1042
1031
1043
-
1044
- def user_provenance (self , document ):
1045
- # type: (ProvDocument) -> None
1032
+ def user_provenance (self , document ): # type: (ProvDocument) -> None
1046
1033
"""Add the user provenance."""
1047
1034
self .self_check ()
1048
1035
(username , fullname ) = _whoami ()
@@ -1112,7 +1099,6 @@ def add_tagfile(self, path, when=None):
1112
1099
if hashlib .sha512 :
1113
1100
tag_file .seek (0 )
1114
1101
checksums [SHA512 ] = checksum_copy (tag_file , hasher = hashlib .sha512 )
1115
- assert self .folder
1116
1102
rel_path = _posix_path (os .path .relpath (path , self .folder ))
1117
1103
self .tagfiles .add (rel_path )
1118
1104
self .add_to_manifest (rel_path , checksums )
@@ -1375,7 +1361,6 @@ def generate_snapshot(self, prov_dep):
1375
1361
# type: (MutableMapping[Text, Any]) -> None
1376
1362
self .self_check ()
1377
1363
"""Copy all of the CWL files to the snapshot/ directory."""
1378
- assert self .folder
1379
1364
for key , value in prov_dep .items ():
1380
1365
if key == "location" and value .split ("/" )[- 1 ]:
1381
1366
filename = value .split ("/" )[- 1 ]
@@ -1416,8 +1401,7 @@ def packed_workflow(self, packed): # type: (Text) -> None
1416
1401
1417
1402
def has_data_file (self , sha1hash ): # type: (str) -> bool
1418
1403
"""Confirms the presence of the given file in the RO."""
1419
- folder = cast (str , self .folder or self .final_location )
1420
- folder = os .path .join (folder , DATA , sha1hash [0 :2 ])
1404
+ folder = os .path .join (self .folder , DATA , sha1hash [0 :2 ])
1421
1405
hash_path = os .path .join (folder , sha1hash )
1422
1406
return os .path .isfile (hash_path )
1423
1407
@@ -1430,7 +1414,6 @@ def add_data_file(self, from_fp, when=None, content_type=None):
1430
1414
checksum = checksum_copy (from_fp , tmp )
1431
1415
1432
1416
# Calculate hash-based file path
1433
- assert self .folder
1434
1417
folder = os .path .join (self .folder , DATA , checksum [0 :2 ])
1435
1418
path = os .path .join (folder , checksum )
1436
1419
# os.rename assumed safe, as our temp file should
@@ -1485,7 +1468,6 @@ def add_to_manifest(self, rel_path, checksums):
1485
1468
# metadata file, go to tag manifest
1486
1469
manifest = "tagmanifest"
1487
1470
1488
- assert self .folder
1489
1471
# Add checksums to corresponding manifest files
1490
1472
for (method , hash_value ) in checksums .items ():
1491
1473
# File not in manifest because we bailed out on
@@ -1505,7 +1487,6 @@ def _add_to_bagit(self, rel_path, **checksums):
1505
1487
# type: (Text, Any) -> None
1506
1488
if posixpath .isabs (rel_path ):
1507
1489
raise ValueError ("rel_path must be relative: %s" % rel_path )
1508
- assert self .folder
1509
1490
local_path = os .path .join (self .folder , _local_path (rel_path ))
1510
1491
if not os .path .exists (local_path ):
1511
1492
raise IOError ("File %s does not exist within RO: %s" % (rel_path , local_path ))
@@ -1570,8 +1551,11 @@ def _relativise_files(self, structure):
1570
1551
if structure .get ("class" ) == "File" :
1571
1552
relative_path = None
1572
1553
if "checksum" in structure :
1573
- sha1 , checksum = structure ["checksum" ].split ("$" )
1574
- assert sha1 == SHA1
1554
+ alg , checksum = structure ["checksum" ].split ("$" )
1555
+ if alg != SHA1 :
1556
+ raise TypeError (
1557
+ "Only SHA1 CWL checksums are currently supported: "
1558
+ "{}" .format (structure ))
1575
1559
if self .has_data_file (checksum ):
1576
1560
prefix = checksum [0 :2 ]
1577
1561
relative_path = posixpath .join (
@@ -1629,7 +1613,7 @@ def close(self, save_to=None):
1629
1613
ensure the temporary files of this Research Object are removed.
1630
1614
"""
1631
1615
if save_to is None :
1632
- if self .folder :
1616
+ if not self .closed :
1633
1617
_logger .debug (u"[provenance] Deleting temporary %s" , self .folder )
1634
1618
shutil .rmtree (self .folder , ignore_errors = True )
1635
1619
else :
@@ -1641,13 +1625,10 @@ def close(self, save_to=None):
1641
1625
if os .path .isdir (save_to ):
1642
1626
_logger .info (u"[provenance] Deleting existing %s" , save_to )
1643
1627
shutil .rmtree (save_to )
1644
- assert self .folder
1645
1628
shutil .move (self .folder , save_to )
1646
1629
_logger .info (u"[provenance] Research Object saved to %s" , save_to )
1647
- self .final_location = save_to
1648
- # Forget our temporary folder, which should no longer exists
1649
- # This makes later close() a no-op
1650
- self .folder = None
1630
+ self .folder = save_to
1631
+ self .closed = True
1651
1632
1652
1633
def checksum_copy (src_file , # type: IO
1653
1634
dst_file = None , # type: Optional[IO]
0 commit comments