32
32
SCHEMA = Namespace ("http://schema.org/" )
33
33
CWLPROV = Namespace ("https://w3id.org/cwl/prov#" )
34
34
OA = Namespace ("http://www.w3.org/ns/oa#" )
35
+ FOAF = Namespace ("http://xmlns.com/foaf/0.1/" )
35
36
36
37
37
38
TEST_ORCID = "https://orcid.org/0000-0003-4862-3349"
38
39
39
40
40
- def cwltool (tmp_path : Path , * args : Any ) -> Path :
41
+ def cwltool (tmp_path : Path , * args : Any , with_orcid : bool = False ) -> Path :
41
42
prov_folder = tmp_path / "provenance"
42
43
prov_folder .mkdir ()
43
44
new_args = [
44
45
"--enable-user-provenance" ,
45
46
"--enable-host-provenance" ,
46
- "--orcid" ,
47
- TEST_ORCID ,
48
47
"--provenance" ,
49
48
str (prov_folder ),
50
49
]
50
+ if with_orcid :
51
+ new_args .extend (["--orcid" , TEST_ORCID ])
51
52
new_args .extend (args )
52
53
# Run within a temporary directory to not pollute git checkout
53
54
tmp_dir = tmp_path / "cwltool-run"
@@ -59,61 +60,81 @@ def cwltool(tmp_path: Path, *args: Any) -> Path:
59
60
60
61
61
62
@needs_docker
62
- def test_hello_workflow (tmp_path : Path ) -> None :
63
+ @pytest .mark .parametrize ("with_orcid" , [True , False ])
64
+ def test_hello_workflow (tmp_path : Path , with_orcid : bool ) -> None :
63
65
check_provenance (
64
66
cwltool (
65
67
tmp_path ,
66
68
get_data ("tests/wf/hello-workflow.cwl" ),
67
69
"--usermessage" ,
68
70
"Hello workflow" ,
69
- )
71
+ with_orcid = with_orcid ,
72
+ ),
73
+ with_orcid = with_orcid ,
70
74
)
71
75
72
76
73
77
@needs_docker
74
- def test_hello_single_tool (tmp_path : Path ) -> None :
78
+ @pytest .mark .parametrize ("with_orcid" , [True , False ])
79
+ def test_hello_single_tool (tmp_path : Path , with_orcid : bool ) -> None :
75
80
check_provenance (
76
81
cwltool (
77
82
tmp_path ,
78
83
get_data ("tests/wf/hello_single_tool.cwl" ),
79
84
"--message" ,
80
85
"Hello tool" ,
86
+ with_orcid = with_orcid ,
81
87
),
82
88
single_tool = True ,
89
+ with_orcid = with_orcid ,
83
90
)
84
91
85
92
86
93
@needs_docker
87
- def test_revsort_workflow (tmp_path : Path ) -> None :
94
+ @pytest .mark .parametrize ("with_orcid" , [True , False ])
95
+ def test_revsort_workflow (tmp_path : Path , with_orcid : bool ) -> None :
88
96
folder = cwltool (
89
97
tmp_path ,
90
98
get_data ("tests/wf/revsort.cwl" ),
91
99
get_data ("tests/wf/revsort-job.json" ),
100
+ with_orcid = with_orcid ,
92
101
)
93
102
check_output_object (folder )
94
- check_provenance (folder )
103
+ check_provenance (folder , with_orcid = with_orcid )
95
104
96
105
97
106
@needs_docker
98
- def test_revsort_workflow_shortcut (tmp_path : Path ) -> None :
107
+ @pytest .mark .parametrize ("with_orcid" , [True , False ])
108
+ def test_revsort_workflow_shortcut (tmp_path : Path , with_orcid : bool ) -> None :
99
109
"""Confirm that using 'cwl:tool' shortcut still snapshots the CWL files."""
100
110
folder = cwltool (
101
111
tmp_path ,
102
112
get_data ("tests/wf/revsort-job-shortcut.json" ),
113
+ with_orcid = with_orcid ,
103
114
)
104
115
check_output_object (folder )
105
- check_provenance (folder )
116
+ check_provenance (folder , with_orcid = with_orcid )
106
117
assert not (folder / "snapshot" / "revsort-job-shortcut.json" ).exists ()
107
118
assert len (list ((folder / "snapshot" ).iterdir ())) == 4
108
119
109
120
110
121
@needs_docker
111
- def test_nested_workflow (tmp_path : Path ) -> None :
112
- check_provenance (cwltool (tmp_path , get_data ("tests/wf/nested.cwl" )), nested = True )
122
+ @pytest .mark .parametrize ("with_orcid" , [True , False ])
123
+ def test_nested_workflow (tmp_path : Path , with_orcid : bool ) -> None :
124
+ check_provenance (
125
+ cwltool (
126
+ tmp_path ,
127
+ get_data ("tests/wf/nested.cwl" ),
128
+ with_orcid = with_orcid ,
129
+ ),
130
+ nested = True ,
131
+ with_orcid = with_orcid ,
132
+ )
113
133
114
134
115
135
@needs_docker
116
- def test_secondary_files_implicit (tmp_path : Path ) -> None :
136
+ @pytest .mark .parametrize ("with_orcid" , [True , False ])
137
+ def test_secondary_files_implicit (tmp_path : Path , with_orcid : bool ) -> None :
117
138
file1 = tmp_path / "foo1.txt"
118
139
file1idx = tmp_path / "foo1.txt.idx"
119
140
@@ -123,13 +144,20 @@ def test_secondary_files_implicit(tmp_path: Path) -> None:
123
144
f .write ("bar" )
124
145
125
146
# secondary will be picked up by .idx
126
- folder = cwltool (tmp_path , get_data ("tests/wf/sec-wf.cwl" ), "--file1" , str (file1 ))
127
- check_provenance (folder , secondary_files = True )
147
+ folder = cwltool (
148
+ tmp_path ,
149
+ get_data ("tests/wf/sec-wf.cwl" ),
150
+ "--file1" ,
151
+ str (file1 ),
152
+ with_orcid = with_orcid ,
153
+ )
154
+ check_provenance (folder , secondary_files = True , with_orcid = with_orcid )
128
155
check_secondary_files (folder )
129
156
130
157
131
158
@needs_docker
132
- def test_secondary_files_explicit (tmp_path : Path ) -> None :
159
+ @pytest .mark .parametrize ("with_orcid" , [True , False ])
160
+ def test_secondary_files_explicit (tmp_path : Path , with_orcid : bool ) -> None :
133
161
# Deliberately do NOT have common basename or extension
134
162
file1dir = tmp_path / "foo"
135
163
file1dir .mkdir ()
@@ -164,22 +192,33 @@ def test_secondary_files_explicit(tmp_path: Path) -> None:
164
192
j = json .dumps (job , ensure_ascii = True )
165
193
fp .write (j .encode ("ascii" ))
166
194
167
- folder = cwltool (tmp_path , get_data ("tests/wf/sec-wf.cwl" ), str (jobJson ))
168
- check_provenance (folder , secondary_files = True )
195
+ folder = cwltool (
196
+ tmp_path ,
197
+ get_data ("tests/wf/sec-wf.cwl" ),
198
+ str (jobJson ),
199
+ with_orcid = with_orcid ,
200
+ )
201
+ check_provenance (folder , secondary_files = True , with_orcid = with_orcid )
169
202
check_secondary_files (folder )
170
203
171
204
172
205
@needs_docker
173
- def test_secondary_files_output (tmp_path : Path ) -> None :
206
+ @pytest .mark .parametrize ("with_orcid" , [True , False ])
207
+ def test_secondary_files_output (tmp_path : Path , with_orcid : bool ) -> None :
174
208
# secondary will be picked up by .idx
175
- folder = cwltool (tmp_path , get_data ("tests/wf/sec-wf-out.cwl" ))
176
- check_provenance (folder , secondary_files = True )
209
+ folder = cwltool (
210
+ tmp_path ,
211
+ get_data ("tests/wf/sec-wf-out.cwl" ),
212
+ with_orcid = with_orcid ,
213
+ )
214
+ check_provenance (folder , secondary_files = True , with_orcid = with_orcid )
177
215
# Skipped, not the same secondary files as above
178
216
# self.check_secondary_files()
179
217
180
218
181
219
@needs_docker
182
- def test_directory_workflow (tmp_path : Path ) -> None :
220
+ @pytest .mark .parametrize ("with_orcid" , [True , False ])
221
+ def test_directory_workflow (tmp_path : Path , with_orcid : bool ) -> None :
183
222
dir2 = tmp_path / "dir2"
184
223
dir2 .mkdir ()
185
224
sha1 = {
@@ -195,8 +234,14 @@ def test_directory_workflow(tmp_path: Path) -> None:
195
234
with open (dir2 / x , "w" , encoding = "ascii" ) as f :
196
235
f .write (x )
197
236
198
- folder = cwltool (tmp_path , get_data ("tests/wf/directory.cwl" ), "--dir" , str (dir2 ))
199
- check_provenance (folder , directory = True )
237
+ folder = cwltool (
238
+ tmp_path ,
239
+ get_data ("tests/wf/directory.cwl" ),
240
+ "--dir" ,
241
+ str (dir2 ),
242
+ with_orcid = with_orcid ,
243
+ )
244
+ check_provenance (folder , directory = True , with_orcid = with_orcid )
200
245
201
246
# Output should include ls stdout of filenames a b c on each line
202
247
file_list = (
@@ -219,10 +264,12 @@ def test_directory_workflow(tmp_path: Path) -> None:
219
264
220
265
221
266
@needs_docker
222
- def test_no_data_files (tmp_path : Path ) -> None :
267
+ @pytest .mark .parametrize ("with_orcid" , [True , False ])
268
+ def test_no_data_files (tmp_path : Path , with_orcid : bool ) -> None :
223
269
folder = cwltool (
224
270
tmp_path ,
225
271
get_data ("tests/wf/conditional_step_no_inputs.cwl" ),
272
+ with_orcid = with_orcid ,
226
273
)
227
274
check_bagit (folder )
228
275
@@ -273,6 +320,7 @@ def check_provenance(
273
320
single_tool : bool = False ,
274
321
directory : bool = False ,
275
322
secondary_files : bool = False ,
323
+ with_orcid : bool = False ,
276
324
) -> None :
277
325
check_folders (base_path )
278
326
check_bagit (base_path )
@@ -283,6 +331,7 @@ def check_provenance(
283
331
single_tool = single_tool ,
284
332
directory = directory ,
285
333
secondary_files = secondary_files ,
334
+ with_orcid = with_orcid ,
286
335
)
287
336
288
337
@@ -473,6 +522,7 @@ def check_prov(
473
522
single_tool : bool = False ,
474
523
directory : bool = False ,
475
524
secondary_files : bool = False ,
525
+ with_orcid : bool = False ,
476
526
) -> None :
477
527
prov_file = base_path / "metadata" / "provenance" / "primary.cwlprov.nt"
478
528
assert prov_file .is_file (), f"Can't find { prov_file } "
@@ -512,10 +562,20 @@ def check_prov(
512
562
) in g , "Engine not declared as SoftwareAgent"
513
563
514
564
# run should be associated to the user
565
+ accounts = set (g .subjects (RDF .type , FOAF .OnlineAccount ))
566
+ assert len (accounts ) == 1
567
+ account = accounts .pop ()
515
568
people = set (g .subjects (RDF .type , SCHEMA .Person ))
516
569
assert len (people ) == 1 , "Can't find associated person in workflow run"
517
570
person = people .pop ()
518
- assert person == URIRef (TEST_ORCID )
571
+ if with_orcid :
572
+ assert person == URIRef (TEST_ORCID )
573
+ else :
574
+ account_names = set (g .objects (account , FOAF .accountName ))
575
+ assert len (account_names ) == 1
576
+ account_name = account_names .pop ()
577
+ machine_user = provenance ._whoami ()[0 ]
578
+ assert account_name .value == machine_user
519
579
520
580
# find the random UUID assigned to cwltool
521
581
tool_agents = set (g .subjects (RDF .type , PROV .SoftwareAgent ))
@@ -528,9 +588,8 @@ def check_prov(
528
588
agents .remove (engine ) # the main tool
529
589
remain_agents = agents - tool_agents
530
590
assert len (remain_agents ) == 1
531
- cwltool_agent = remain_agents .pop ()
532
591
assert (
533
- cwltool_agent ,
592
+ account ,
534
593
PROV .actedOnBehalfOf ,
535
594
person ,
536
595
) in g , "Association of cwltool agent acting for user is missing"
0 commit comments