|
13 | 13 | import posixpath
|
14 | 14 | import shutil
|
15 | 15 | import tempfile
|
16 |
| -import itertools |
17 | 16 | import logging
|
18 | 17 |
|
19 | 18 | import hashlib
|
|
32 | 31 | # move to a regular typing import when Python 3.3-3.6 is no longer supported
|
33 | 32 | import six
|
34 | 33 | from six.moves import urllib
|
| 34 | +from ruamel import yaml |
35 | 35 | import prov.model as provM
|
36 | 36 | from prov.identifier import Namespace, Identifier
|
37 | 37 | from prov.model import (PROV, ProvDocument, # pylint: disable=unused-import
|
@@ -323,10 +323,10 @@ def __init__(self,
|
323 | 323 | self.engine_uuid = research_object.engine_uuid
|
324 | 324 | self.add_to_manifest = self.research_object.add_to_manifest
|
325 | 325 | if self.orcid:
|
326 |
| - _logger.info(u"[provenance] Creator ORCID: %s", self.orcid) |
| 326 | + _logger.debug(u"[provenance] Creator ORCID: %s", self.orcid) |
327 | 327 | self.full_name = full_name or None
|
328 | 328 | if self.full_name:
|
329 |
| - _logger.info(u"[provenance] Creator Full name: %s", self.full_name) |
| 329 | + _logger.debug(u"[provenance] Creator Full name: %s", self.full_name) |
330 | 330 | if not run_uuid:
|
331 | 331 | run_uuid = uuid.uuid4()
|
332 | 332 | self.workflow_run_uuid = run_uuid
|
@@ -982,7 +982,7 @@ def finalize_prov_profile(self, name):
|
982 | 982 | self.document.serialize(provenance_file, format="rdf", rdf_format="json-ld")
|
983 | 983 | prov_ids.append(self.provenance_ns[filename + ".jsonld"])
|
984 | 984 |
|
985 |
| - _logger.info("[provenance] added provenance: %s" % prov_ids) |
| 985 | + _logger.debug("[provenance] added provenance: %s" % prov_ids) |
986 | 986 | return prov_ids
|
987 | 987 |
|
988 | 988 | class ResearchObject():
|
@@ -1014,7 +1014,7 @@ def __init__(self, temp_prefix_ro="tmp", orcid=None, full_name=None):
|
1014 | 1014 | self.make_fs_access = None # type: Optional[Callable[[Text], StdFsAccess]]
|
1015 | 1015 |
|
1016 | 1016 | self._initialize()
|
1017 |
| - _logger.info(u"[provenance] Temporary research object: %s", self.folder) |
| 1017 | + _logger.debug(u"[provenance] Temporary research object: %s", self.folder) |
1018 | 1018 |
|
1019 | 1019 | def __str__(self):
|
1020 | 1020 | return "ResearchObject <%s> in <%s>" % (
|
@@ -1372,7 +1372,7 @@ def _write_bag_info(self):
|
1372 | 1372 | total_size = sum(self.bagged_size.values())
|
1373 | 1373 | num_files = len(self.bagged_size)
|
1374 | 1374 | info_file.write(u"Payload-Oxum: %d.%d\n" % (total_size, num_files))
|
1375 |
| - _logger.info(u"[provenance] Generated bagit metadata: %s", self.folder) |
| 1375 | + _logger.debug(u"[provenance] Generated bagit metadata: %s", self.folder) |
1376 | 1376 |
|
1377 | 1377 | def generate_snapshot(self, prov_dep):
|
1378 | 1378 | # type: (MutableMapping[Text, Any]) -> None
|
@@ -1419,7 +1419,7 @@ def packed_workflow(self, packed): # type: (Text) -> None
|
1419 | 1419 | with self.write_bag_file(rel_path, encoding=None) as write_pack:
|
1420 | 1420 | # YAML is always UTF8, but json.dumps gives us str in py2
|
1421 | 1421 | write_pack.write(packed.encode(ENCODING))
|
1422 |
| - _logger.info(u"[provenance] Added packed workflow: %s", rel_path) |
| 1422 | + _logger.debug(u"[provenance] Added packed workflow: %s", rel_path) |
1423 | 1423 |
|
1424 | 1424 | def has_data_file(self, sha1hash):
|
1425 | 1425 | # type: (str) -> bool
|
@@ -1458,10 +1458,10 @@ def add_data_file(self, from_fp, when=None, content_type=None):
|
1458 | 1458 | Hasher)
|
1459 | 1459 | # Inefficient, bagit support need to checksum again
|
1460 | 1460 | self._add_to_bagit(rel_path)
|
1461 |
| - _logger.info(u"[provenance] Added data file %s", path) |
| 1461 | + _logger.debug(u"[provenance] Added data file %s", path) |
1462 | 1462 | if when:
|
1463 | 1463 | self._file_provenance[rel_path] = self._self_made(when)
|
1464 |
| - _logger.info(u"[provenance] Relative path for data file %s", rel_path) |
| 1464 | + _logger.debug(u"[provenance] Relative path for data file %s", rel_path) |
1465 | 1465 |
|
1466 | 1466 | if content_type:
|
1467 | 1467 | self._content_types[rel_path] = content_type
|
@@ -1547,7 +1547,7 @@ def create_job(self,
|
1547 | 1547 | j = json.dumps(copied, indent=4, ensure_ascii=False)
|
1548 | 1548 | with self.write_bag_file(rel_path) as file_path:
|
1549 | 1549 | file_path.write(j + u"\n")
|
1550 |
| - _logger.info(u"[provenance] Generated customised job file: %s", rel_path) |
| 1550 | + _logger.debug(u"[provenance] Generated customised job file: %s", rel_path) |
1551 | 1551 | #Generate dictionary with keys as workflow level input IDs and values as
|
1552 | 1552 | #1) for files the relativised location containing hash
|
1553 | 1553 | #2) for other attributes, the actual value.
|
@@ -1583,7 +1583,8 @@ def _relativise_files(self, structure):
|
1583 | 1583 | if not relative_path and "location" in structure:
|
1584 | 1584 | # Register in RO; but why was this not picked
|
1585 | 1585 | # up by used_artefacts?
|
1586 |
| - _logger.warning("File not previously registered in RO: %s", structure) |
| 1586 | + _logger.warning("File not previously registered in RO: %s", |
| 1587 | + yaml.dump(structure)) |
1587 | 1588 | fsaccess = self.make_fs_access("")
|
1588 | 1589 | with fsaccess.open(structure["location"], "rb") as fp:
|
1589 | 1590 | relative_path = self.add_data_file(fp)
|
@@ -1632,7 +1633,7 @@ def close(self, save_to=None):
|
1632 | 1633 | """
|
1633 | 1634 | if save_to is None:
|
1634 | 1635 | if self.folder:
|
1635 |
| - _logger.info(u"[provenance] Deleting temporary %s", self.folder) |
| 1636 | + _logger.debug(u"[provenance] Deleting temporary %s", self.folder) |
1636 | 1637 | shutil.rmtree(self.folder, ignore_errors=True)
|
1637 | 1638 | else:
|
1638 | 1639 | save_to = os.path.abspath(save_to)
|
|
0 commit comments