Skip to content

Commit 78ef52d

Browse files
committed
test with/without orcid prov + fix missing user prov when running CommandLineTool directly
1 parent ea2a0b9 commit 78ef52d

File tree

6 files changed

+112
-56
lines changed

6 files changed

+112
-56
lines changed

cwltool/context.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,8 @@ def __init__(self, kwargs: Optional[dict[str, Any]] = None) -> None:
183183
self.orcid: str = ""
184184
self.cwl_full_name: str = ""
185185
self.process_run_id: Optional[str] = None
186+
self.prov_host: bool = False
187+
self.prov_user: bool = False
186188
self.prov_obj: Optional[ProvenanceProfile] = None
187189
self.mpi_config: MpiConfig = MpiConfig()
188190
self.default_stdout: Optional[Union[IO[bytes], TextIO]] = None

cwltool/cwlprov/provenance_profile.py

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
ORE,
2828
PROVENANCE,
2929
RO,
30-
SCHEMA,
3130
SHA1,
3231
SHA256,
3332
TEXT_PLAIN,
@@ -144,25 +143,10 @@ def generate_prov_doc(self) -> tuple[str, ProvDocument]:
144143
# .. but we always know cwltool was launched (directly or indirectly)
145144
# by a user account, as cwltool is a command line tool
146145
account = self.document.agent(ACCOUNT_UUID)
147-
if self.orcid or self.full_name:
148-
person: dict[Union[str, Identifier], Any] = {
149-
PROV_TYPE: PROV["Person"],
150-
"prov:type": SCHEMA["Person"],
151-
}
152-
if self.full_name:
153-
person["prov:label"] = self.full_name
154-
person["foaf:name"] = self.full_name
155-
person["schema:name"] = self.full_name
156-
else:
157-
# TODO: Look up name from ORCID API?
158-
pass
159-
agent = self.document.agent(self.orcid or uuid.uuid4().urn, person)
160-
self.document.actedOnBehalfOf(account, agent)
161-
else:
162-
if self.host_provenance:
163-
self.research_object.host_provenance(self.document)
164-
if self.user_provenance:
165-
self.research_object.user_provenance(self.document)
146+
if self.host_provenance:
147+
self.research_object.host_provenance(self.document)
148+
if self.user_provenance or self.orcid or self.full_name:
149+
self.research_object.user_provenance(self.document)
166150
# The execution of cwltool
167151
wfengine = self.document.agent(
168152
self.engine_uuid,

cwltool/cwlprov/ro.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
METADATA,
3838
ORCID,
3939
PROVENANCE,
40+
SCHEMA,
4041
SHA1,
4142
SHA256,
4243
SHA512,
@@ -184,12 +185,14 @@ def user_provenance(self, document: ProvDocument) -> None:
184185

185186
user = document.agent(
186187
self.orcid or USER_UUID,
187-
{
188-
provM.PROV_TYPE: provM.PROV["Person"],
189-
provM.PROV_LABEL: self.full_name,
190-
FOAF["name"]: self.full_name,
191-
FOAF["account"]: account,
192-
},
188+
[
189+
(provM.PROV_TYPE, SCHEMA["Person"]),
190+
(provM.PROV_TYPE, provM.PROV["Person"]),
191+
(provM.PROV_LABEL, self.full_name),
192+
(FOAF["name"], self.full_name),
193+
(FOAF["account"], account),
194+
(SCHEMA["name"], self.full_name),
195+
],
193196
)
194197
# cwltool may be started on the shell (directly by user),
195198
# by shell script (indirectly by user)

cwltool/executors.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,11 @@ def run_jobs(
195195
if not isinstance(process, Workflow) and runtime_context.research_obj is not None:
196196
process.provenance_object = runtime_context.research_obj.initialize_provenance(
197197
full_name=runtime_context.cwl_full_name,
198-
host_provenance=False,
199-
user_provenance=False,
198+
# following are only set from main when directly command line tool
199+
# when nested in a workflow, they should be disabled since they would
200+
# already have been provided/initialized by the parent workflow prov-obj
201+
host_provenance=runtime_context.prov_host,
202+
user_provenance=runtime_context.prov_user,
200203
orcid=runtime_context.orcid,
201204
# single tool execution, so RO UUID = wf UUID = tool UUID
202205
run_uuid=runtime_context.research_obj.ro_uuid,

cwltool/main.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1060,6 +1060,11 @@ def main(
10601060

10611061
loadingContext = setup_loadingContext(loadingContext, runtimeContext, args)
10621062

1063+
if loadingContext.research_obj:
1064+
# early forward parameters required for a single command line tool
1065+
runtimeContext.prov_host = loadingContext.host_provenance
1066+
runtimeContext.prov_user = loadingContext.user_provenance
1067+
10631068
uri, tool_file_uri = resolve_tool_uri(
10641069
args.workflow,
10651070
resolver=loadingContext.resolver,

tests/test_provenance.py

Lines changed: 87 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -32,22 +32,23 @@
3232
SCHEMA = Namespace("http://schema.org/")
3333
CWLPROV = Namespace("https://w3id.org/cwl/prov#")
3434
OA = Namespace("http://www.w3.org/ns/oa#")
35+
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
3536

3637

3738
TEST_ORCID = "https://orcid.org/0000-0003-4862-3349"
3839

3940

40-
def cwltool(tmp_path: Path, *args: Any) -> Path:
41+
def cwltool(tmp_path: Path, *args: Any, with_orcid: bool = False) -> Path:
4142
prov_folder = tmp_path / "provenance"
4243
prov_folder.mkdir()
4344
new_args = [
4445
"--enable-user-provenance",
4546
"--enable-host-provenance",
46-
"--orcid",
47-
TEST_ORCID,
4847
"--provenance",
4948
str(prov_folder),
5049
]
50+
if with_orcid:
51+
new_args.extend(["--orcid", TEST_ORCID])
5152
new_args.extend(args)
5253
# Run within a temporary directory to not pollute git checkout
5354
tmp_dir = tmp_path / "cwltool-run"
@@ -59,61 +60,81 @@ def cwltool(tmp_path: Path, *args: Any) -> Path:
5960

6061

6162
@needs_docker
62-
def test_hello_workflow(tmp_path: Path) -> None:
63+
@pytest.mark.parametrize("with_orcid", [True, False])
64+
def test_hello_workflow(tmp_path: Path, with_orcid: bool) -> None:
6365
check_provenance(
6466
cwltool(
6567
tmp_path,
6668
get_data("tests/wf/hello-workflow.cwl"),
6769
"--usermessage",
6870
"Hello workflow",
69-
)
71+
with_orcid=with_orcid,
72+
),
73+
with_orcid=with_orcid,
7074
)
7175

7276

7377
@needs_docker
74-
def test_hello_single_tool(tmp_path: Path) -> None:
78+
@pytest.mark.parametrize("with_orcid", [True, False])
79+
def test_hello_single_tool(tmp_path: Path, with_orcid: bool) -> None:
7580
check_provenance(
7681
cwltool(
7782
tmp_path,
7883
get_data("tests/wf/hello_single_tool.cwl"),
7984
"--message",
8085
"Hello tool",
86+
with_orcid=with_orcid,
8187
),
8288
single_tool=True,
89+
with_orcid=with_orcid,
8390
)
8491

8592

8693
@needs_docker
87-
def test_revsort_workflow(tmp_path: Path) -> None:
94+
@pytest.mark.parametrize("with_orcid", [True, False])
95+
def test_revsort_workflow(tmp_path: Path, with_orcid: bool) -> None:
8896
folder = cwltool(
8997
tmp_path,
9098
get_data("tests/wf/revsort.cwl"),
9199
get_data("tests/wf/revsort-job.json"),
100+
with_orcid=with_orcid,
92101
)
93102
check_output_object(folder)
94-
check_provenance(folder)
103+
check_provenance(folder, with_orcid=with_orcid)
95104

96105

97106
@needs_docker
98-
def test_revsort_workflow_shortcut(tmp_path: Path) -> None:
107+
@pytest.mark.parametrize("with_orcid", [True, False])
108+
def test_revsort_workflow_shortcut(tmp_path: Path, with_orcid: bool) -> None:
99109
"""Confirm that using 'cwl:tool' shortcut still snapshots the CWL files."""
100110
folder = cwltool(
101111
tmp_path,
102112
get_data("tests/wf/revsort-job-shortcut.json"),
113+
with_orcid=with_orcid,
103114
)
104115
check_output_object(folder)
105-
check_provenance(folder)
116+
check_provenance(folder, with_orcid=with_orcid)
106117
assert not (folder / "snapshot" / "revsort-job-shortcut.json").exists()
107118
assert len(list((folder / "snapshot").iterdir())) == 4
108119

109120

110121
@needs_docker
111-
def test_nested_workflow(tmp_path: Path) -> None:
112-
check_provenance(cwltool(tmp_path, get_data("tests/wf/nested.cwl")), nested=True)
122+
@pytest.mark.parametrize("with_orcid", [True, False])
123+
def test_nested_workflow(tmp_path: Path, with_orcid: bool) -> None:
124+
check_provenance(
125+
cwltool(
126+
tmp_path,
127+
get_data("tests/wf/nested.cwl"),
128+
with_orcid=with_orcid,
129+
),
130+
nested=True,
131+
with_orcid=with_orcid,
132+
)
113133

114134

115135
@needs_docker
116-
def test_secondary_files_implicit(tmp_path: Path) -> None:
136+
@pytest.mark.parametrize("with_orcid", [True, False])
137+
def test_secondary_files_implicit(tmp_path: Path, with_orcid: bool) -> None:
117138
file1 = tmp_path / "foo1.txt"
118139
file1idx = tmp_path / "foo1.txt.idx"
119140

@@ -123,13 +144,20 @@ def test_secondary_files_implicit(tmp_path: Path) -> None:
123144
f.write("bar")
124145

125146
# secondary will be picked up by .idx
126-
folder = cwltool(tmp_path, get_data("tests/wf/sec-wf.cwl"), "--file1", str(file1))
127-
check_provenance(folder, secondary_files=True)
147+
folder = cwltool(
148+
tmp_path,
149+
get_data("tests/wf/sec-wf.cwl"),
150+
"--file1",
151+
str(file1),
152+
with_orcid=with_orcid,
153+
)
154+
check_provenance(folder, secondary_files=True, with_orcid=with_orcid)
128155
check_secondary_files(folder)
129156

130157

131158
@needs_docker
132-
def test_secondary_files_explicit(tmp_path: Path) -> None:
159+
@pytest.mark.parametrize("with_orcid", [True, False])
160+
def test_secondary_files_explicit(tmp_path: Path, with_orcid: bool) -> None:
133161
# Deliberately do NOT have common basename or extension
134162
file1dir = tmp_path / "foo"
135163
file1dir.mkdir()
@@ -164,22 +192,33 @@ def test_secondary_files_explicit(tmp_path: Path) -> None:
164192
j = json.dumps(job, ensure_ascii=True)
165193
fp.write(j.encode("ascii"))
166194

167-
folder = cwltool(tmp_path, get_data("tests/wf/sec-wf.cwl"), str(jobJson))
168-
check_provenance(folder, secondary_files=True)
195+
folder = cwltool(
196+
tmp_path,
197+
get_data("tests/wf/sec-wf.cwl"),
198+
str(jobJson),
199+
with_orcid=with_orcid,
200+
)
201+
check_provenance(folder, secondary_files=True, with_orcid=with_orcid)
169202
check_secondary_files(folder)
170203

171204

172205
@needs_docker
173-
def test_secondary_files_output(tmp_path: Path) -> None:
206+
@pytest.mark.parametrize("with_orcid", [True, False])
207+
def test_secondary_files_output(tmp_path: Path, with_orcid: bool) -> None:
174208
# secondary will be picked up by .idx
175-
folder = cwltool(tmp_path, get_data("tests/wf/sec-wf-out.cwl"))
176-
check_provenance(folder, secondary_files=True)
209+
folder = cwltool(
210+
tmp_path,
211+
get_data("tests/wf/sec-wf-out.cwl"),
212+
with_orcid=with_orcid,
213+
)
214+
check_provenance(folder, secondary_files=True, with_orcid=with_orcid)
177215
# Skipped, not the same secondary files as above
178216
# self.check_secondary_files()
179217

180218

181219
@needs_docker
182-
def test_directory_workflow(tmp_path: Path) -> None:
220+
@pytest.mark.parametrize("with_orcid", [True, False])
221+
def test_directory_workflow(tmp_path: Path, with_orcid: bool) -> None:
183222
dir2 = tmp_path / "dir2"
184223
dir2.mkdir()
185224
sha1 = {
@@ -195,8 +234,14 @@ def test_directory_workflow(tmp_path: Path) -> None:
195234
with open(dir2 / x, "w", encoding="ascii") as f:
196235
f.write(x)
197236

198-
folder = cwltool(tmp_path, get_data("tests/wf/directory.cwl"), "--dir", str(dir2))
199-
check_provenance(folder, directory=True)
237+
folder = cwltool(
238+
tmp_path,
239+
get_data("tests/wf/directory.cwl"),
240+
"--dir",
241+
str(dir2),
242+
with_orcid=with_orcid,
243+
)
244+
check_provenance(folder, directory=True, with_orcid=with_orcid)
200245

201246
# Output should include ls stdout of filenames a b c on each line
202247
file_list = (
@@ -219,10 +264,12 @@ def test_directory_workflow(tmp_path: Path) -> None:
219264

220265

221266
@needs_docker
222-
def test_no_data_files(tmp_path: Path) -> None:
267+
@pytest.mark.parametrize("with_orcid", [True, False])
268+
def test_no_data_files(tmp_path: Path, with_orcid: bool) -> None:
223269
folder = cwltool(
224270
tmp_path,
225271
get_data("tests/wf/conditional_step_no_inputs.cwl"),
272+
with_orcid=with_orcid,
226273
)
227274
check_bagit(folder)
228275

@@ -273,6 +320,7 @@ def check_provenance(
273320
single_tool: bool = False,
274321
directory: bool = False,
275322
secondary_files: bool = False,
323+
with_orcid: bool = False,
276324
) -> None:
277325
check_folders(base_path)
278326
check_bagit(base_path)
@@ -283,6 +331,7 @@ def check_provenance(
283331
single_tool=single_tool,
284332
directory=directory,
285333
secondary_files=secondary_files,
334+
with_orcid=with_orcid,
286335
)
287336

288337

@@ -473,6 +522,7 @@ def check_prov(
473522
single_tool: bool = False,
474523
directory: bool = False,
475524
secondary_files: bool = False,
525+
with_orcid: bool = False,
476526
) -> None:
477527
prov_file = base_path / "metadata" / "provenance" / "primary.cwlprov.nt"
478528
assert prov_file.is_file(), f"Can't find {prov_file}"
@@ -512,10 +562,20 @@ def check_prov(
512562
) in g, "Engine not declared as SoftwareAgent"
513563

514564
# run should be associated to the user
565+
accounts = set(g.subjects(RDF.type, FOAF.OnlineAccount))
566+
assert len(accounts) == 1
567+
account = accounts.pop()
515568
people = set(g.subjects(RDF.type, SCHEMA.Person))
516569
assert len(people) == 1, "Can't find associated person in workflow run"
517570
person = people.pop()
518-
assert person == URIRef(TEST_ORCID)
571+
if with_orcid:
572+
assert person == URIRef(TEST_ORCID)
573+
else:
574+
account_names = set(g.objects(account, FOAF.accountName))
575+
assert len(account_names) == 1
576+
account_name = account_names.pop()
577+
machine_user = provenance._whoami()[0]
578+
assert account_name.value == machine_user
519579

520580
# find the random UUID assigned to cwltool
521581
tool_agents = set(g.subjects(RDF.type, PROV.SoftwareAgent))
@@ -528,9 +588,8 @@ def check_prov(
528588
agents.remove(engine) # the main tool
529589
remain_agents = agents - tool_agents
530590
assert len(remain_agents) == 1
531-
cwltool_agent = remain_agents.pop()
532591
assert (
533-
cwltool_agent,
592+
account,
534593
PROV.actedOnBehalfOf,
535594
person,
536595
) in g, "Association of cwltool agent acting for user is missing"

0 commit comments

Comments
 (0)