Skip to content

Commit 4c57396

Browse files
tom-tanmr-c
andcommitted
Validate actual file properties such as checksum
A re-do of #146 Co-Authored-By: Michael R. Crusoe <[email protected]>
1 parent 5cd65d5 commit 4c57396

File tree

3 files changed

+236
-27
lines changed

3 files changed

+236
-27
lines changed

cwltest/compare.py

Lines changed: 114 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
"""Compare utilities for CWL objects."""
22

3+
import hashlib
34
import json
4-
from typing import Any, Dict, Optional, Set
5+
import os.path
6+
import urllib.parse
7+
from typing import Any, Callable, Dict, Optional, Set
58

69

710
class CompareFail(Exception):
@@ -22,11 +25,11 @@ def format(
2225

2326

2427
def _check_keys(
25-
keys: Set[str], expected: Dict[str, Any], actual: Dict[str, Any]
28+
keys: Set[str], expected: Dict[str, Any], actual: Dict[str, Any], skip_details: bool
2629
) -> None:
2730
for k in keys:
2831
try:
29-
compare(expected.get(k), actual.get(k))
32+
compare(expected.get(k), actual.get(k), skip_details)
3033
except CompareFail as e:
3134
raise CompareFail.format(
3235
expected, actual, f"field {k!r} failed comparison: {str(e)}"
@@ -48,10 +51,12 @@ def _compare_contents(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
4851
)
4952

5053

51-
def _compare_dict(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
54+
def _compare_dict(
55+
expected: Dict[str, Any], actual: Dict[str, Any], skip_details: bool
56+
) -> None:
5257
for c in expected:
5358
try:
54-
compare(expected[c], actual.get(c))
59+
compare(expected[c], actual.get(c), skip_details)
5560
except CompareFail as e:
5661
raise CompareFail.format(
5762
expected, actual, f"failed comparison for key {c!r}: {e}"
@@ -62,7 +67,9 @@ def _compare_dict(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
6267
raise CompareFail.format(expected, actual, "unexpected key '%s'" % k)
6368

6469

65-
def _compare_directory(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
70+
def _compare_directory(
71+
expected: Dict[str, Any], actual: Dict[str, Any], skip_details: bool
72+
) -> None:
6673
if actual.get("class") != "Directory":
6774
raise CompareFail.format(
6875
expected, actual, "expected object with a class 'Directory'"
@@ -75,7 +82,7 @@ def _compare_directory(expected: Dict[str, Any], actual: Dict[str, Any]) -> None
7582
found = False
7683
for j in actual["listing"]:
7784
try:
78-
compare(i, j)
85+
compare(i, j, skip_details)
7986
found = True
8087
break
8188
except CompareFail:
@@ -86,19 +93,32 @@ def _compare_directory(expected: Dict[str, Any], actual: Dict[str, Any]) -> None
8693
actual,
8794
"%s not found" % json.dumps(i, indent=4, sort_keys=True),
8895
)
89-
_compare_file(expected, actual)
96+
_compare_file(expected, actual, skip_details)
9097

9198

92-
def _compare_file(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
93-
_compare_location(expected, actual)
99+
def _compare_file(
100+
expected: Dict[str, Any], actual: Dict[str, Any], skip_details: bool
101+
) -> None:
102+
_compare_location(expected, actual, skip_details)
94103
if "contents" in expected:
95104
_compare_contents(expected, actual)
96-
other_keys = set(expected.keys()) - {"path", "location", "listing", "contents"}
97-
_check_keys(other_keys, expected, actual)
98-
_check_keys(other_keys, expected, actual)
99-
100-
101-
def _compare_location(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
105+
if actual.get("class") == "File" and not skip_details:
106+
_compare_checksum(expected, actual)
107+
_compare_size(expected, actual)
108+
other_keys = set(expected.keys()) - {
109+
"path",
110+
"location",
111+
"listing",
112+
"contents",
113+
"checksum",
114+
"size",
115+
}
116+
_check_keys(other_keys, expected, actual, skip_details)
117+
118+
119+
def _compare_location(
120+
expected: Dict[str, Any], actual: Dict[str, Any], skip_details: bool
121+
) -> None:
102122
if "path" in expected:
103123
comp = "path"
104124
if "path" not in actual:
@@ -109,7 +129,19 @@ def _compare_location(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
109129
return
110130
if actual.get("class") == "Directory":
111131
actual[comp] = actual[comp].rstrip("/")
112-
132+
exist_fun: Callable[[str], bool] = os.path.isdir
133+
else:
134+
exist_fun = os.path.isfile
135+
if "path" in actual:
136+
path = urllib.parse.urlparse(actual["path"]).path
137+
else:
138+
path = urllib.parse.urlparse(actual["location"]).path
139+
if not exist_fun(path) and not skip_details:
140+
raise CompareFail.format(
141+
expected,
142+
actual,
143+
f"{actual[comp]} does not exist",
144+
)
113145
if expected[comp] != "Any" and (
114146
not (
115147
actual[comp].endswith("/" + expected[comp])
@@ -123,7 +155,67 @@ def _compare_location(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
123155
)
124156

125157

126-
def compare(expected: Any, actual: Any) -> None:
158+
def _compare_checksum(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
159+
if "path" in actual:
160+
path = urllib.parse.urlparse(actual["path"]).path
161+
else:
162+
path = urllib.parse.urlparse(actual["location"]).path
163+
checksum = hashlib.sha1() # nosec
164+
with open(path, "rb") as f:
165+
contents = f.read(1024 * 1024)
166+
while contents != b"":
167+
checksum.update(contents)
168+
contents = f.read(1024 * 1024)
169+
actual_checksum_on_disk = f"sha1${checksum.hexdigest()}"
170+
if "checksum" in actual:
171+
actual_checksum_declared = actual["checksum"]
172+
if actual_checksum_on_disk != actual_checksum_declared:
173+
raise CompareFail.format(
174+
expected,
175+
actual,
176+
"Output file checksums do not match: actual "
177+
f"{actual_checksum_on_disk!r} on disk is not equal to actual "
178+
f"{actual_checksum_declared!r} in the output object",
179+
)
180+
if "checksum" in expected:
181+
expected_checksum = expected["checksum"]
182+
if expected_checksum != actual_checksum_on_disk:
183+
raise CompareFail.format(
184+
expected,
185+
actual,
186+
"Output file checksums do not match: actual "
187+
f"{actual_checksum_on_disk!r} is not equal to expected {expected_checksum!r}",
188+
)
189+
190+
191+
def _compare_size(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
192+
if "path" in actual:
193+
path = urllib.parse.urlparse(actual["path"]).path
194+
else:
195+
path = urllib.parse.urlparse(actual["location"]).path
196+
actual_size_on_disk = os.path.getsize(path)
197+
if "size" in actual:
198+
actual_size_declared = actual["size"]
199+
if actual_size_on_disk != actual_size_declared:
200+
raise CompareFail.format(
201+
expected,
202+
actual,
203+
"Output file sizes do not match: actual "
204+
f"{actual_size_on_disk!r} on disk is not equal to actual "
205+
f"{actual_size_declared!r}' in the output object",
206+
)
207+
if "size" in expected:
208+
expected_size = expected["size"]
209+
if expected_size != actual_size_on_disk:
210+
raise CompareFail.format(
211+
expected,
212+
actual,
213+
"Output file sizes do not match: actual "
214+
f"{actual_size_on_disk!r} is not equal to expected {expected_size!r}",
215+
)
216+
217+
218+
def compare(expected: Any, actual: Any, skip_details: bool = False) -> None:
127219
"""Compare two CWL objects."""
128220
if expected == "Any":
129221
return
@@ -136,11 +228,11 @@ def compare(expected: Any, actual: Any) -> None:
136228
raise CompareFail.format(expected, actual)
137229

138230
if expected.get("class") == "File":
139-
_compare_file(expected, actual)
231+
_compare_file(expected, actual, skip_details)
140232
elif expected.get("class") == "Directory":
141-
_compare_directory(expected, actual)
233+
_compare_directory(expected, actual, skip_details)
142234
else:
143-
_compare_dict(expected, actual)
235+
_compare_dict(expected, actual, skip_details)
144236

145237
elif isinstance(expected, list):
146238
if not isinstance(actual, list):
@@ -150,7 +242,7 @@ def compare(expected: Any, actual: Any) -> None:
150242
raise CompareFail.format(expected, actual, "lengths don't match")
151243
for c in range(0, len(expected)):
152244
try:
153-
compare(expected[c], actual[c])
245+
compare(expected[c], actual[c], skip_details)
154246
except CompareFail as e:
155247
raise CompareFail.format(expected, actual, e) from e
156248
else:

tests/test_compare.py

Lines changed: 120 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import os
2+
from pathlib import Path
13
from typing import Any, Dict
24

35
import pytest
4-
from cwltest.compare import CompareFail, compare
6+
from cwltest.compare import CompareFail, _compare_directory, _compare_file, compare
57

68
from .util import get_data
79

@@ -37,6 +39,7 @@ def test_compare_contents_success() -> None:
3739
"size": 2,
3840
"class": "File",
3941
"contents": "2\n",
42+
"checksum": "sha1$7448d8798a4380162d4b56f9b452e2f6f9e24e7a",
4043
}
4144
actual = {
4245
"basename": "cores.txt",
@@ -49,6 +52,120 @@ def test_compare_contents_success() -> None:
4952
compare(expected, actual)
5053

5154

55+
def test_compare_contents_not_exist() -> None:
56+
expected = {
57+
"location": "cores.txt",
58+
"class": "File",
59+
}
60+
actual = {
61+
"basename": "cores.txt",
62+
"class": "File",
63+
"location": "file:///var/folders/8x/2df05_7j20j6r8y81w4qf43r0000gn/T/tmpG0EkrS/cores.txt",
64+
"path": "/none/exist/path/to/cores.txt",
65+
"size": 2,
66+
}
67+
with pytest.raises(CompareFail):
68+
_compare_file(expected, actual, False)
69+
70+
71+
def test_compare_file_different_size(tmp_path: Path) -> None:
72+
expected = {
73+
"location": "cores.txt",
74+
"size": 2,
75+
"class": "File",
76+
}
77+
78+
path = tmp_path / "cores.txt"
79+
with open(path, "w") as f:
80+
f.write("hello")
81+
82+
actual = {
83+
"basename": "cores.txt",
84+
"class": "File",
85+
"location": str(path),
86+
}
87+
with pytest.raises(CompareFail):
88+
_compare_file(expected, actual, False)
89+
90+
91+
def test_compare_file_different_checksum(tmp_path: Path) -> None:
92+
expected = {
93+
"location": "cores.txt",
94+
"class": "File",
95+
"checksum": "sha1$7448d8798a4380162d4b56f9b452e2f6f9e24e7a",
96+
}
97+
98+
path = tmp_path / "cores.txt"
99+
with open(path, "w") as f:
100+
f.write("hello")
101+
102+
actual = {
103+
"basename": "cores.txt",
104+
"class": "File",
105+
"location": str(path),
106+
}
107+
with pytest.raises(CompareFail):
108+
_compare_file(expected, actual, False)
109+
110+
111+
def test_compare_file_inconsistent_size(tmp_path: Path) -> None:
112+
expected = {
113+
"location": "cores.txt",
114+
"class": "File",
115+
}
116+
117+
path = tmp_path / "cores.txt"
118+
with open(path, "w") as f:
119+
f.write("hello")
120+
121+
actual = {
122+
"basename": "cores.txt",
123+
"class": "File",
124+
"location": str(path),
125+
"size": 65535,
126+
}
127+
with pytest.raises(CompareFail):
128+
_compare_file(expected, actual, False)
129+
130+
131+
def test_compare_file_inconsistent_checksum(tmp_path: Path) -> None:
132+
expected = {
133+
"location": "cores.txt",
134+
"class": "File",
135+
}
136+
137+
path = tmp_path / "cores.txt"
138+
with open(path, "w") as f:
139+
f.write("hello")
140+
141+
actual = {
142+
"basename": "cores.txt",
143+
"checksum": "inconsistent-checksum",
144+
"class": "File",
145+
"location": str(path),
146+
}
147+
with pytest.raises(CompareFail):
148+
_compare_file(expected, actual, False)
149+
150+
151+
def test_compare_directory(tmp_path: Path) -> None:
152+
expected = {
153+
"location": "dir",
154+
"class": "Directory",
155+
"listing": [],
156+
}
157+
158+
path = tmp_path / "dir"
159+
os.makedirs(path)
160+
161+
actual = {
162+
"class": "Directory",
163+
"location": str(path),
164+
"listing": [],
165+
}
166+
_compare_directory(expected, actual, False)
167+
168+
52169
def test_compare_directory_success() -> None:
53170
expected = {
54171
"stuff": {
@@ -100,7 +217,7 @@ def test_compare_directory_success() -> None:
100217
],
101218
}
102219
}
103-
compare(expected, actual)
220+
compare(expected, actual, skip_details=True)
104221

105222

106223
def test_compare_directory_failure_different_listing() -> None:
@@ -270,7 +387,7 @@ def test_compare_file_success() -> None:
270387
"path": "/var/folders/8x/2df05_7j20j6r8y81w4qf43r0000gn/T/tmpG0EkrS/cores.txt",
271388
"size": 2,
272389
}
273-
compare(expected, actual)
390+
compare(expected, actual, skip_details=True)
274391

275392

276393
def test_compare_list_failure_missing() -> None:

tox.ini

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tox]
22
envlist =
3-
py{36,37,38,39,310,311}-lint,
3+
py{37,38,39,310,311}-lint,
44
py{36,37,38,39,310,311}-unit,
55
py{36,37,38,39,310,311}-bandit,
66
py{37,38,39,310,311}-mypy,
@@ -58,7 +58,7 @@ commands =
5858
py{36,37,38,39,310,311}-unit: python -m pip install -U pip setuptools wheel
5959
py{36,37,38,39,310,311}-unit: python -m pytest --cov --cov-config={toxinidir}/.coveragerc --cov-append {posargs}
6060
py{36,37,38,39,310,311}-unit: coverage xml
61-
py{36,37,38,39,310,311}-bandit: bandit --recursive cwltest --exclude tests/*
61+
py{36,37,38,39,310,311}-bandit: bandit --recursive cwltest
6262
py{36,37,38,39,310,311}-lint: make flake8
6363
py{36,37,38,39,310,311}-lint: make format-check
6464
py{37,38,39,310,311}-mypy: make mypy

0 commit comments

Comments
 (0)