Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ eggs/
*.egg
.tox/
.pytest_cache
*.so

# Editor Temps
.*.sw?
Expand Down
23 changes: 1 addition & 22 deletions cwltool/cwlprov/provenance_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from collections.abc import MutableMapping, MutableSequence, Sequence
from io import BytesIO
from pathlib import PurePath, PurePosixPath
from socket import getfqdn
from typing import TYPE_CHECKING, Any, Optional, Union, cast

from prov.identifier import Identifier, QualifiedName
Expand All @@ -24,7 +23,6 @@
ACCOUNT_UUID,
CWLPROV,
ENCODING,
FOAF,
METADATA,
ORE,
PROVENANCE,
Expand Down Expand Up @@ -108,25 +106,6 @@

def generate_prov_doc(self) -> tuple[str, ProvDocument]:
"""Add basic namespaces."""

def host_provenance(document: ProvDocument) -> None:
"""Record host provenance."""
document.add_namespace(CWLPROV)
document.add_namespace(UUID)
document.add_namespace(FOAF)

hostname = getfqdn()
# won't have a foaf:accountServiceHomepage for unix hosts, but
# we can at least provide hostname
document.agent(
ACCOUNT_UUID,
{
PROV_TYPE: FOAF["OnlineAccount"],
"prov:location": hostname,
CWLPROV["hostname"]: hostname,
},
)

self.cwltool_version = f"cwltool {versionstring().split()[-1]}"
self.document.add_namespace("wfprov", "http://purl.org/wf4ever/wfprov#")
# document.add_namespace('prov', 'http://www.w3.org/ns/prov#')
Expand Down Expand Up @@ -181,7 +160,7 @@
self.document.actedOnBehalfOf(account, agent)
else:
if self.host_provenance:
host_provenance(self.document)
self.research_object.host_provenance(self.document)

Check warning on line 163 in cwltool/cwlprov/provenance_profile.py

View check run for this annotation

Codecov / codecov/patch

cwltool/cwlprov/provenance_profile.py#L163

Added line #L163 was not covered by tests
if self.user_provenance:
self.research_object.user_provenance(self.document)
# The execution of cwltool
Expand Down
86 changes: 80 additions & 6 deletions cwltool/cwlprov/ro.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
import uuid
from collections.abc import MutableMapping, MutableSequence
from pathlib import Path, PurePosixPath
from typing import IO, Any, Optional, Union, cast
from socket import getfqdn
from typing import TYPE_CHECKING, IO, Any, Optional, Union, cast

import prov.model as provM
from prov.model import PROV, ProvDocument
from prov.model import ProvDocument

from ..loghandler import _logger
from ..stdfsaccess import StdFsAccess
Expand All @@ -27,6 +28,7 @@
from . import Aggregate, Annotation, AuthoredBy, _valid_orcid, _whoami, checksum_copy
from .provenance_constants import (
ACCOUNT_UUID,
CWLPROV,
CWLPROV_VERSION,
DATA,
ENCODING,
Expand All @@ -46,6 +48,9 @@
Hasher,
)

if TYPE_CHECKING:
from .provenance_profile import ProvenanceProfile # pylint: disable=unused-import


class ResearchObject:
"""CWLProv Research Object."""
Expand Down Expand Up @@ -82,6 +87,34 @@
self._initialize()
_logger.debug("[provenance] Temporary research object: %s", self.folder)

def initialize_provenance(
self,
full_name: str,
host_provenance: bool,
user_provenance: bool,
orcid: str,
fsaccess: StdFsAccess,
run_uuid: Optional[uuid.UUID] = None,
) -> "ProvenanceProfile":
"""
Provide a provenance profile initialization hook function.

Allows overriding the default strategy to define the
provenance profile concepts and associations to extend
details as needed.
"""
from .provenance_profile import ProvenanceProfile

return ProvenanceProfile(
research_object=self,
full_name=full_name,
host_provenance=host_provenance,
user_provenance=user_provenance,
orcid=orcid,
fsaccess=fsaccess,
run_uuid=run_uuid,
)

def self_check(self) -> None:
"""Raise ValueError if this RO is closed."""
if self.closed:
Expand Down Expand Up @@ -117,10 +150,22 @@
bag_it_file.write("BagIt-Version: 0.97\n")
bag_it_file.write(f"Tag-File-Character-Encoding: {ENCODING}\n")

def resolve_user(self) -> tuple[str, str]:
"""
Provide a user provenance hook function.

Allows overriding the default strategy to retrieve user provenance
in case the calling code can provide a better resolution.
The function must return a tuple of the (username, fullname)
that identifies the user. This user will be applied on top
to any provided ORCID or fullname by agent association.
"""
return _whoami()

Check warning on line 163 in cwltool/cwlprov/ro.py

View check run for this annotation

Codecov / codecov/patch

cwltool/cwlprov/ro.py#L163

Added line #L163 was not covered by tests

def user_provenance(self, document: ProvDocument) -> None:
"""Add the user provenance."""
self.self_check()
(username, fullname) = _whoami()
(username, fullname) = self.resolve_user()

Check warning on line 168 in cwltool/cwlprov/ro.py

View check run for this annotation

Codecov / codecov/patch

cwltool/cwlprov/ro.py#L168

Added line #L168 was not covered by tests

if not self.full_name:
self.full_name = fullname
Expand All @@ -132,16 +177,16 @@
ACCOUNT_UUID,
{
provM.PROV_TYPE: FOAF["OnlineAccount"],
"prov:label": username,
provM.PROV_LABEL: username,
FOAF["accountName"]: username,
},
)

user = document.agent(
self.orcid or USER_UUID,
{
provM.PROV_TYPE: PROV["Person"],
"prov:label": self.full_name,
provM.PROV_TYPE: provM.PROV["Person"],
provM.PROV_LABEL: self.full_name,
FOAF["name"]: self.full_name,
FOAF["account"]: account,
},
Expand All @@ -156,6 +201,35 @@
# get their name wrong!)
document.actedOnBehalfOf(account, user)

def resolve_host(self) -> tuple[str, str]:
"""
Provide a host provenance hook function.

Allows overriding the default strategy to retrieve host provenance
in case the calling code can provide a better resolution.
The function must return a tuple of the (fqdn, uri) that identifies the host.
"""
fqdn = getfqdn()
return fqdn, fqdn # allow for (fqdn, uri) to be distinct, but the same by default

Check warning on line 213 in cwltool/cwlprov/ro.py

View check run for this annotation

Codecov / codecov/patch

cwltool/cwlprov/ro.py#L212-L213

Added lines #L212 - L213 were not covered by tests

def host_provenance(self, document: ProvDocument) -> None:
"""Record host provenance."""
document.add_namespace(CWLPROV)
document.add_namespace(UUID)
document.add_namespace(FOAF)

Check warning on line 219 in cwltool/cwlprov/ro.py

View check run for this annotation

Codecov / codecov/patch

cwltool/cwlprov/ro.py#L217-L219

Added lines #L217 - L219 were not covered by tests

hostname, uri = self.resolve_host()

Check warning on line 221 in cwltool/cwlprov/ro.py

View check run for this annotation

Codecov / codecov/patch

cwltool/cwlprov/ro.py#L221

Added line #L221 was not covered by tests
# won't have a foaf:accountServiceHomepage for unix hosts, but
# we can at least provide hostname
document.agent(

Check warning on line 224 in cwltool/cwlprov/ro.py

View check run for this annotation

Codecov / codecov/patch

cwltool/cwlprov/ro.py#L224

Added line #L224 was not covered by tests
ACCOUNT_UUID,
{
provM.PROV_TYPE: FOAF["OnlineAccount"],
provM.PROV_LOCATION: uri,
CWLPROV["hostname"]: hostname,
},
)

def add_tagfile(self, path: str, timestamp: Optional[datetime.datetime] = None) -> None:
"""Add tag files to our research object."""
self.self_check()
Expand Down
4 changes: 1 addition & 3 deletions cwltool/executors.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from .command_line_tool import CallbackJob, ExpressionJob
from .context import RuntimeContext, getdefault
from .cuda import cuda_version_and_device_count
from .cwlprov.provenance_profile import ProvenanceProfile
from .errors import WorkflowException
from .job import JobBase
from .loghandler import _logger
Expand Down Expand Up @@ -194,8 +193,7 @@ def run_jobs(

# define provenance profile for single commandline tool
if not isinstance(process, Workflow) and runtime_context.research_obj is not None:
process.provenance_object = ProvenanceProfile(
runtime_context.research_obj,
process.provenance_object = runtime_context.research_obj.initialize_provenance(
full_name=runtime_context.cwl_full_name,
host_provenance=False,
user_provenance=False,
Expand Down
3 changes: 1 addition & 2 deletions cwltool/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ def __init__(
if is_main:
run_uuid = loadingContext.research_obj.ro_uuid

self.provenance_object = ProvenanceProfile(
loadingContext.research_obj,
self.provenance_object = loadingContext.research_obj.initialize_provenance(
full_name=loadingContext.cwl_full_name,
host_provenance=loadingContext.host_provenance,
user_provenance=loadingContext.user_provenance,
Expand Down
Loading