1
1
"""Compare utilities for CWL objects."""
2
2
3
+ import hashlib
3
4
import json
4
- from typing import Any , Dict , Optional , Set
5
+ import os .path
6
+ import urllib .parse
7
+ from typing import Any , Callable , Dict , Optional , Set
5
8
6
9
7
10
class CompareFail (Exception ):
@@ -22,11 +25,11 @@ def format(
22
25
23
26
24
27
def _check_keys (
25
- keys : Set [str ], expected : Dict [str , Any ], actual : Dict [str , Any ]
28
+ keys : Set [str ], expected : Dict [str , Any ], actual : Dict [str , Any ], skip_details : bool
26
29
) -> None :
27
30
for k in keys :
28
31
try :
29
- compare (expected .get (k ), actual .get (k ))
32
+ compare (expected .get (k ), actual .get (k ), skip_details )
30
33
except CompareFail as e :
31
34
raise CompareFail .format (
32
35
expected , actual , f"field { k !r} failed comparison: { str (e )} "
@@ -48,10 +51,12 @@ def _compare_contents(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
48
51
)
49
52
50
53
51
- def _compare_dict (expected : Dict [str , Any ], actual : Dict [str , Any ]) -> None :
54
+ def _compare_dict (
55
+ expected : Dict [str , Any ], actual : Dict [str , Any ], skip_details : bool
56
+ ) -> None :
52
57
for c in expected :
53
58
try :
54
- compare (expected [c ], actual .get (c ))
59
+ compare (expected [c ], actual .get (c ), skip_details )
55
60
except CompareFail as e :
56
61
raise CompareFail .format (
57
62
expected , actual , f"failed comparison for key { c !r} : { e } "
@@ -62,7 +67,9 @@ def _compare_dict(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
62
67
raise CompareFail .format (expected , actual , "unexpected key '%s'" % k )
63
68
64
69
65
- def _compare_directory (expected : Dict [str , Any ], actual : Dict [str , Any ]) -> None :
70
+ def _compare_directory (
71
+ expected : Dict [str , Any ], actual : Dict [str , Any ], skip_details : bool
72
+ ) -> None :
66
73
if actual .get ("class" ) != "Directory" :
67
74
raise CompareFail .format (
68
75
expected , actual , "expected object with a class 'Directory'"
@@ -75,7 +82,7 @@ def _compare_directory(expected: Dict[str, Any], actual: Dict[str, Any]) -> None
75
82
found = False
76
83
for j in actual ["listing" ]:
77
84
try :
78
- compare (i , j )
85
+ compare (i , j , skip_details )
79
86
found = True
80
87
break
81
88
except CompareFail :
@@ -86,19 +93,32 @@ def _compare_directory(expected: Dict[str, Any], actual: Dict[str, Any]) -> None
86
93
actual ,
87
94
"%s not found" % json .dumps (i , indent = 4 , sort_keys = True ),
88
95
)
89
- _compare_file (expected , actual )
96
+ _compare_file (expected , actual , skip_details )
90
97
91
98
92
- def _compare_file (expected : Dict [str , Any ], actual : Dict [str , Any ]) -> None :
93
- _compare_location (expected , actual )
99
+ def _compare_file (
100
+ expected : Dict [str , Any ], actual : Dict [str , Any ], skip_details : bool
101
+ ) -> None :
102
+ _compare_location (expected , actual , skip_details )
94
103
if "contents" in expected :
95
104
_compare_contents (expected , actual )
96
- other_keys = set (expected .keys ()) - {"path" , "location" , "listing" , "contents" }
97
- _check_keys (other_keys , expected , actual )
98
- _check_keys (other_keys , expected , actual )
99
-
100
-
101
- def _compare_location (expected : Dict [str , Any ], actual : Dict [str , Any ]) -> None :
105
+ if actual .get ("class" ) == "File" and not skip_details :
106
+ _compare_checksum (expected , actual )
107
+ _compare_size (expected , actual )
108
+ other_keys = set (expected .keys ()) - {
109
+ "path" ,
110
+ "location" ,
111
+ "listing" ,
112
+ "contents" ,
113
+ "checksum" ,
114
+ "size" ,
115
+ }
116
+ _check_keys (other_keys , expected , actual , skip_details )
117
+
118
+
119
+ def _compare_location (
120
+ expected : Dict [str , Any ], actual : Dict [str , Any ], skip_details : bool
121
+ ) -> None :
102
122
if "path" in expected :
103
123
comp = "path"
104
124
if "path" not in actual :
@@ -109,7 +129,19 @@ def _compare_location(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
109
129
return
110
130
if actual .get ("class" ) == "Directory" :
111
131
actual [comp ] = actual [comp ].rstrip ("/" )
112
-
132
+ exist_fun : Callable [[str ], bool ] = os .path .isdir
133
+ else :
134
+ exist_fun = os .path .isfile
135
+ if "path" in actual :
136
+ path = urllib .parse .urlparse (actual ["path" ]).path
137
+ else :
138
+ path = urllib .parse .urlparse (actual ["location" ]).path
139
+ if not exist_fun (path ) and not skip_details :
140
+ raise CompareFail .format (
141
+ expected ,
142
+ actual ,
143
+ f"{ actual [comp ]} does not exist" ,
144
+ )
113
145
if expected [comp ] != "Any" and (
114
146
not (
115
147
actual [comp ].endswith ("/" + expected [comp ])
@@ -123,7 +155,67 @@ def _compare_location(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
123
155
)
124
156
125
157
126
- def compare (expected : Any , actual : Any ) -> None :
158
+ def _compare_checksum (expected : Dict [str , Any ], actual : Dict [str , Any ]) -> None :
159
+ if "path" in actual :
160
+ path = urllib .parse .urlparse (actual ["path" ]).path
161
+ else :
162
+ path = urllib .parse .urlparse (actual ["location" ]).path
163
+ checksum = hashlib .sha1 () # nosec
164
+ with open (path , "rb" ) as f :
165
+ contents = f .read (1024 * 1024 )
166
+ while contents != b"" :
167
+ checksum .update (contents )
168
+ contents = f .read (1024 * 1024 )
169
+ actual_checksum_on_disk = f"sha1${ checksum .hexdigest ()} "
170
+ if "checksum" in actual :
171
+ actual_checksum_declared = actual ["checksum" ]
172
+ if actual_checksum_on_disk != actual_checksum_declared :
173
+ raise CompareFail .format (
174
+ expected ,
175
+ actual ,
176
+ "Output file checksums do not match: actual "
177
+ f"{ actual_checksum_on_disk !r} on disk is not equal to actual "
178
+ f"{ actual_checksum_declared !r} in the output object" ,
179
+ )
180
+ if "checksum" in expected :
181
+ expected_checksum = expected ["checksum" ]
182
+ if expected_checksum != actual_checksum_on_disk :
183
+ raise CompareFail .format (
184
+ expected ,
185
+ actual ,
186
+ "Output file checksums do not match: actual "
187
+ f"{ actual_checksum_on_disk !r} is not equal to expected { expected_checksum !r} " ,
188
+ )
189
+
190
+
191
+ def _compare_size (expected : Dict [str , Any ], actual : Dict [str , Any ]) -> None :
192
+ if "path" in actual :
193
+ path = urllib .parse .urlparse (actual ["path" ]).path
194
+ else :
195
+ path = urllib .parse .urlparse (actual ["location" ]).path
196
+ actual_size_on_disk = os .path .getsize (path )
197
+ if "size" in actual :
198
+ actual_size_declared = actual ["size" ]
199
+ if actual_size_on_disk != actual_size_declared :
200
+ raise CompareFail .format (
201
+ expected ,
202
+ actual ,
203
+ "Output file sizes do not match: actual "
204
+ f"{ actual_size_on_disk !r} on disk is not equal to actual "
205
+ f"{ actual_size_declared !r} ' in the output object" ,
206
+ )
207
+ if "size" in expected :
208
+ expected_size = expected ["size" ]
209
+ if expected_size != actual_size_on_disk :
210
+ raise CompareFail .format (
211
+ expected ,
212
+ actual ,
213
+ "Output file sizes do not match: actual "
214
+ f"{ actual_size_on_disk !r} is not equal to expected { expected_size !r} " ,
215
+ )
216
+
217
+
218
+ def compare (expected : Any , actual : Any , skip_details : bool = False ) -> None :
127
219
"""Compare two CWL objects."""
128
220
if expected == "Any" :
129
221
return
@@ -136,11 +228,11 @@ def compare(expected: Any, actual: Any) -> None:
136
228
raise CompareFail .format (expected , actual )
137
229
138
230
if expected .get ("class" ) == "File" :
139
- _compare_file (expected , actual )
231
+ _compare_file (expected , actual , skip_details )
140
232
elif expected .get ("class" ) == "Directory" :
141
- _compare_directory (expected , actual )
233
+ _compare_directory (expected , actual , skip_details )
142
234
else :
143
- _compare_dict (expected , actual )
235
+ _compare_dict (expected , actual , skip_details )
144
236
145
237
elif isinstance (expected , list ):
146
238
if not isinstance (actual , list ):
@@ -150,7 +242,7 @@ def compare(expected: Any, actual: Any) -> None:
150
242
raise CompareFail .format (expected , actual , "lengths don't match" )
151
243
for c in range (0 , len (expected )):
152
244
try :
153
- compare (expected [c ], actual [c ])
245
+ compare (expected [c ], actual [c ], skip_details )
154
246
except CompareFail as e :
155
247
raise CompareFail .format (expected , actual , e ) from e
156
248
else :
0 commit comments