Skip to content

Commit 51d844d

Browse files
committed
feat(mnemonic): add property-based tests for invalid cases
Introduce property-based tests for mnemonic file validation using Hypothesis. Covers malformed files, invalid word counts, encoding issues, and permission errors. Refactor test parametrization to use shared constants. Improves robustness of CLI error handling for mnemonic derivation.
1 parent b8e07d4 commit 51d844d

File tree

2 files changed

+329
-2
lines changed

2 files changed

+329
-2
lines changed

cardano_node_tests/tests/common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ def hypothesis_settings(max_examples: int = 100) -> tp.Any:
192192
suppress_health_check=(
193193
hypothesis.HealthCheck.too_slow,
194194
hypothesis.HealthCheck.function_scoped_fixture,
195+
hypothesis.HealthCheck.filter_too_much,
195196
),
196197
)
197198

cardano_node_tests/tests/test_mnemonic.py

Lines changed: 328 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
"""Tests for deriving keys from a mnemonic sentence."""
22

3+
import dataclasses
34
import logging
45
import pathlib as pl
6+
import shutil
7+
import stat
58
import typing as tp
69

710
import allure
11+
import hypothesis
12+
import hypothesis.strategies as st
813
import pytest
914
from cardano_clusterlib import clusterlib
1015

@@ -14,13 +19,279 @@
1419
LOGGER = logging.getLogger(__name__)
1520
DATA_DIR = pl.Path(__file__).parent / "data" / "mnemonic_golden"
1621

22+
# A small embedded list of *valid* BIP39 words (subset).
23+
# Enough to build syntactically plausible phrases without importing anything.
24+
_VALID_BIP39_SUBSET: tuple[str, ...] = (
25+
"abandon",
26+
"ability",
27+
"able",
28+
"about",
29+
"above",
30+
"absent",
31+
"absorb",
32+
"abstract",
33+
"absurd",
34+
"abuse",
35+
"access",
36+
"accident",
37+
"account",
38+
"accuse",
39+
"achieve",
40+
"acid",
41+
"acoustic",
42+
"acquire",
43+
"across",
44+
"act",
45+
"action",
46+
"actor",
47+
"actress",
48+
"actual",
49+
"adapt",
50+
"add",
51+
"addict",
52+
"address",
53+
"adjust",
54+
"admit",
55+
"adult",
56+
"advance",
57+
)
58+
59+
_ALLOWED_COUNTS: frozenset[int] = frozenset({12, 15, 18, 21, 24})
60+
61+
62+
# Case model
63+
@dataclasses.dataclass(frozen=True)
64+
class BadMnemonicCase:
65+
"""A single non-compliant mnemonic-file test case."""
66+
67+
name: str
68+
# When content is None, the path will be a *special* case (e.g., directory/symlink).
69+
content_bytes: bytes | None
70+
# How to materialize the path on disk.
71+
kind: tp.Literal[
72+
"file",
73+
"empty_file",
74+
"no_read_perm",
75+
"directory",
76+
"broken_symlink",
77+
"symlink_loop",
78+
]
79+
80+
81+
# Primitive strategies
82+
83+
ascii_word = st.text(
84+
alphabet=st.characters(min_codepoint=0x61, max_codepoint=0x7A), # a-z
85+
min_size=3,
86+
max_size=10,
87+
).filter(lambda s: s not in _VALID_BIP39_SUBSET)
88+
89+
weird_sep = st.sampled_from(
90+
[" ", " ", "\t", " \t ", "\n", "\r\n", ",", ", ", " \u00a0 "]
91+
) # NBSP etc.
92+
93+
bad_token = st.one_of(
94+
ascii_word, # Unknown word
95+
st.text(min_size=1, max_size=8).filter(
96+
lambda s: any(c for c in s if not c.isalpha())
97+
), # Punctuation/digits
98+
st.sampled_from(["Über", "naïve", "résumé", "café"]), # Diacritics / non-ASCII
99+
st.sampled_from(["🚀", "🔥", "🙂"]), # Emoji
100+
)
101+
102+
valid_token = st.sampled_from(_VALID_BIP39_SUBSET)
103+
104+
105+
# Helpers to compose lines / encodings
106+
107+
108+
@st.composite
109+
def word_sequence(draw: st.DrawFn, *, count: int, ensure_invalid: bool) -> list[str]:
110+
"""Build a list of tokens of a specific count.
111+
112+
If ensure_invalid=True, inject at least one invalid token.
113+
"""
114+
if ensure_invalid:
115+
# At least one invalid token, others may be valid or invalid.
116+
invalid_ix = draw(st.integers(min_value=0, max_value=max(0, count - 1)))
117+
tokens: list[str] = []
118+
for i in range(count):
119+
tok = draw(bad_token if i == invalid_ix else st.one_of(valid_token, bad_token))
120+
tokens.append(tok)
121+
return tokens
122+
123+
# All valid tokens (from the subset); used for checksum-invalid cases downstream.
124+
return draw(st.lists(valid_token, min_size=count, max_size=count))
125+
126+
127+
@st.composite
128+
def join_with_weirdness(draw: st.DrawFn, *, tokens: tp.Sequence[str]) -> str:
129+
"""Join tokens with odd/mixed separators and optional leading/trailing clutter."""
130+
# Draw separators (one fewer than tokens)
131+
seps = draw(st.lists(weird_sep, min_size=len(tokens) - 1, max_size=len(tokens) - 1))
132+
# Interleave tokens + seps
133+
core = "".join(a + b for a, b in zip(tokens, [*seps, ""]))
134+
# Draw optional prefix/suffix instead of using .example()
135+
prefix = draw(st.sampled_from(["", " ", "\n", "\t", "\ufeff"])) # include BOM as char
136+
suffix = draw(st.sampled_from(["", " ", "\n", "\t", "\u200b"])) # zero-width space
137+
return prefix + core + suffix
138+
139+
140+
def maybe_reencode(text: str) -> st.SearchStrategy[bytes]:
141+
"""Return bytes in various encodings and with potential binary junk."""
142+
return st.one_of(
143+
st.just(text.encode("utf-8")),
144+
st.just(("\ufeff" + text).encode("utf-8")), # UTF-8 with BOM
145+
st.just(text.encode("utf-16")), # BOM included by default
146+
st.just(text.encode("utf-32")), # BOM included by default
147+
# Inject NULs or random bytes around:
148+
st.binary(min_size=0, max_size=8).map(lambda b: b + text.encode("utf-8") + b"\x00"),
149+
)
150+
151+
152+
# High-level bad file strategies
153+
154+
155+
@st.composite
156+
def wrong_count_files(draw: st.DrawFn) -> BadMnemonicCase:
157+
"""Wrong number of words (not in {12, 15, 18, 21, 24})."""
158+
count = draw(st.integers(min_value=0, max_value=48).filter(lambda n: n not in _ALLOWED_COUNTS))
159+
hypothesis.assume(count != 0) # Zero handled by empty_file elsewhere
160+
tokens = draw(word_sequence(count=count, ensure_invalid=False))
161+
line = draw(join_with_weirdness(tokens=tokens))
162+
content = draw(maybe_reencode(line))
163+
return BadMnemonicCase(
164+
name=f"wrong_count_{count}",
165+
content_bytes=content,
166+
kind="file",
167+
)
168+
169+
170+
@st.composite
171+
def unknown_word_files(draw: st.DrawFn) -> BadMnemonicCase:
172+
"""Produce allowed word count, but at least one token is not a valid BIP39 word."""
173+
count = draw(st.sampled_from(sorted(_ALLOWED_COUNTS)))
174+
tokens = draw(word_sequence(count=count, ensure_invalid=True))
175+
line = draw(join_with_weirdness(tokens=tokens))
176+
content = draw(maybe_reencode(line))
177+
return BadMnemonicCase(
178+
name=f"unknown_word_{count}",
179+
content_bytes=content,
180+
kind="file",
181+
)
182+
183+
184+
@st.composite
185+
def separator_mess_files(draw: st.DrawFn) -> BadMnemonicCase:
186+
"""Only separators/newlines/tabs/comma issues (often two+ separators between words)."""
187+
count = draw(st.sampled_from(sorted(_ALLOWED_COUNTS)))
188+
tokens = draw(word_sequence(count=count, ensure_invalid=False))
189+
# Force messy separators (multiple mixed separators)
190+
weird_line = draw(join_with_weirdness(tokens=tokens))
191+
content = draw(maybe_reencode(weird_line))
192+
return BadMnemonicCase(
193+
name=f"separator_mess_{count}",
194+
content_bytes=content,
195+
kind="file",
196+
)
197+
198+
199+
@st.composite
200+
def encoding_garbage_files(draw: st.DrawFn) -> BadMnemonicCase:
201+
"""Binary junk, odd encodings, NULs."""
202+
count = draw(st.sampled_from(sorted(_ALLOWED_COUNTS)))
203+
tokens = draw(word_sequence(count=count, ensure_invalid=False))
204+
line = draw(join_with_weirdness(tokens=tokens))
205+
content = draw(maybe_reencode(line))
206+
# Strengthen likelihood of binary by maybe wrapping with extra random bytes:
207+
blob = draw(st.binary(min_size=0, max_size=32))
208+
return BadMnemonicCase(
209+
name=f"encoding_garbage_{count}",
210+
content_bytes=blob + content + blob,
211+
kind="file",
212+
)
213+
214+
215+
def empty_file_case() -> st.SearchStrategy[BadMnemonicCase]:
216+
return st.just(BadMnemonicCase(name="empty_file", content_bytes=b"", kind="empty_file"))
217+
218+
219+
def no_read_perm_case() -> st.SearchStrategy[BadMnemonicCase]:
220+
return st.just(
221+
BadMnemonicCase(name="no_read_perm", content_bytes=b"abandon " * 12, kind="no_read_perm")
222+
)
223+
224+
225+
def directory_case() -> st.SearchStrategy[BadMnemonicCase]:
226+
return st.just(
227+
BadMnemonicCase(name="directory_instead_of_file", content_bytes=None, kind="directory")
228+
)
229+
230+
231+
def broken_symlink_case() -> st.SearchStrategy[BadMnemonicCase]:
232+
return st.just(
233+
BadMnemonicCase(name="broken_symlink", content_bytes=None, kind="broken_symlink")
234+
)
235+
236+
237+
def symlink_loop_case() -> st.SearchStrategy[BadMnemonicCase]:
238+
return st.just(BadMnemonicCase(name="symlink_loop", content_bytes=None, kind="symlink_loop"))
239+
240+
241+
# Master strategy: mix categories with reasonable weights
242+
invalid_mnemonic_files: st.SearchStrategy[BadMnemonicCase] = st.one_of(
243+
wrong_count_files(),
244+
unknown_word_files(),
245+
separator_mess_files(),
246+
encoding_garbage_files(),
247+
empty_file_case(),
248+
no_read_perm_case(),
249+
directory_case(),
250+
broken_symlink_case(),
251+
symlink_loop_case(),
252+
)
253+
254+
# Materialization helper
255+
256+
257+
def _materialize_case(tmp_dir: pl.Path, case: BadMnemonicCase) -> pl.Path:
258+
"""Create the on-disk artifact described by `case` and return its path."""
259+
path = tmp_dir / f"{case.name}.txt"
260+
match case.kind:
261+
case "file":
262+
assert case.content_bytes is not None
263+
path.write_bytes(case.content_bytes)
264+
case "empty_file":
265+
path.write_bytes(b"")
266+
case "no_read_perm":
267+
assert case.content_bytes is not None
268+
path.write_bytes(case.content_bytes)
269+
# Remove read permissions for owner/group/others.
270+
path.chmod(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH)
271+
case "directory":
272+
path.mkdir(parents=False, exist_ok=False)
273+
case "broken_symlink":
274+
target = tmp_dir / "does_not_exist.txt"
275+
path.symlink_to(target)
276+
case "symlink_loop":
277+
a = tmp_dir / "loop_a"
278+
b = tmp_dir / "loop_b"
279+
a.symlink_to(b)
280+
b.symlink_to(a)
281+
# Return one end of the loop.
282+
path = a
283+
case _:
284+
err = f"Unhandled kind: {case.kind}"
285+
raise ValueError(err)
286+
return path
287+
17288

18289
@common.SKIPIF_WRONG_ERA
19290
class TestMnemonic:
20291
"""Tests for mnemonic sentence."""
21292

22293
@allure.link(helpers.get_vcs_link())
23-
@pytest.mark.parametrize("size", (12, 15, 18, 21, 24))
294+
@pytest.mark.parametrize("size", _ALLOWED_COUNTS)
24295
@pytest.mark.parametrize("out", ("file", "stdout"))
25296
@pytest.mark.parametrize(
26297
"key_type",
@@ -74,7 +345,7 @@ def test_gen_and_deriv(
74345
assert key_file.exists()
75346

76347
@allure.link(helpers.get_vcs_link())
77-
@pytest.mark.parametrize("size", (12, 15, 18, 21, 24))
348+
@pytest.mark.parametrize("size", _ALLOWED_COUNTS)
78349
@pytest.mark.parametrize(
79350
"key_type",
80351
# pyrefly: ignore # no-matching-overload
@@ -128,3 +399,58 @@ def test_golden_deriv(
128399
assert key_file.exists()
129400

130401
assert helpers.checksum(filename=key_file) == helpers.checksum(filename=golden_key_file)
402+
403+
404+
@common.SKIPIF_WRONG_ERA
405+
class TestNegativeMnemonic:
406+
"""Tests with invalid arguments."""
407+
408+
@pytest.fixture
409+
def tmp_case_path(self) -> tp.Generator[pl.Path, None, None]:
410+
d = pl.Path(f"reject_mnemonics_{clusterlib.get_rand_str()}").resolve()
411+
d.mkdir(exist_ok=True)
412+
yield d
413+
shutil.rmtree(d)
414+
415+
@allure.link(helpers.get_vcs_link())
416+
@hypothesis.given(size=st.integers())
417+
@common.hypothesis_settings(max_examples=1_000)
418+
@pytest.mark.smoke
419+
def test_gen_invalid_size(
420+
self,
421+
cluster: clusterlib.ClusterLib,
422+
size: int,
423+
):
424+
"""Test generating a mnemonic with an invalid size."""
425+
hypothesis.assume(size not in _ALLOWED_COUNTS)
426+
427+
common.get_test_id(cluster)
428+
with pytest.raises(clusterlib.CLIError) as excinfo:
429+
cluster.g_key.gen_mnemonic(size=size) # type: ignore
430+
err_value = str(excinfo.value)
431+
assert "Invalid mnemonic size" in err_value
432+
433+
@common.hypothesis_settings(max_examples=500)
434+
@hypothesis.given(bad_case=invalid_mnemonic_files)
435+
def test_rejects_noncompliant_mnemonics(
436+
self,
437+
cluster: clusterlib.ClusterLib,
438+
bad_case: BadMnemonicCase,
439+
tmp_case_path: pl.Path,
440+
) -> None:
441+
"""Test that the CLI wrapper rejects malformed mnemonic files."""
442+
temp_template = f"{common.get_test_id(cluster)}_{common.unique_time_str()}"
443+
target = _materialize_case(tmp_dir=tmp_case_path, case=bad_case)
444+
445+
with pytest.raises(clusterlib.CLIError) as excinfo:
446+
cluster.g_key.derive_from_mnemonic(
447+
key_name=temp_template,
448+
key_type=clusterlib.KeyType.DREP,
449+
mnemonic_file=target,
450+
account_number=0,
451+
)
452+
err_value = str(excinfo.value)
453+
assert (
454+
"Error reading mnemonic file" in err_value
455+
or "Error converting the mnemonic into a key" in err_value
456+
)

0 commit comments

Comments
 (0)