|
1 | 1 | """Tests for deriving keys from a mnemonic sentence.""" |
2 | 2 |
|
| 3 | +import dataclasses |
3 | 4 | import logging |
4 | 5 | import pathlib as pl |
| 6 | +import shutil |
| 7 | +import stat |
5 | 8 | import typing as tp |
6 | 9 |
|
7 | 10 | import allure |
| 11 | +import hypothesis |
| 12 | +import hypothesis.strategies as st |
8 | 13 | import pytest |
9 | 14 | from cardano_clusterlib import clusterlib |
10 | 15 |
|
|
14 | 19 | LOGGER = logging.getLogger(__name__) |
15 | 20 | DATA_DIR = pl.Path(__file__).parent / "data" / "mnemonic_golden" |
16 | 21 |
|
| 22 | +# A small embedded list of *valid* BIP39 words (subset). |
| 23 | +# Enough to build syntactically plausible phrases without importing anything. |
| 24 | +_VALID_BIP39_SUBSET: tuple[str, ...] = ( |
| 25 | + "abandon", |
| 26 | + "ability", |
| 27 | + "able", |
| 28 | + "about", |
| 29 | + "above", |
| 30 | + "absent", |
| 31 | + "absorb", |
| 32 | + "abstract", |
| 33 | + "absurd", |
| 34 | + "abuse", |
| 35 | + "access", |
| 36 | + "accident", |
| 37 | + "account", |
| 38 | + "accuse", |
| 39 | + "achieve", |
| 40 | + "acid", |
| 41 | + "acoustic", |
| 42 | + "acquire", |
| 43 | + "across", |
| 44 | + "act", |
| 45 | + "action", |
| 46 | + "actor", |
| 47 | + "actress", |
| 48 | + "actual", |
| 49 | + "adapt", |
| 50 | + "add", |
| 51 | + "addict", |
| 52 | + "address", |
| 53 | + "adjust", |
| 54 | + "admit", |
| 55 | + "adult", |
| 56 | + "advance", |
| 57 | +) |
| 58 | + |
| 59 | +_ALLOWED_COUNTS: frozenset[int] = frozenset({12, 15, 18, 21, 24}) |
| 60 | + |
| 61 | + |
| 62 | +# Case model |
| 63 | +@dataclasses.dataclass(frozen=True) |
| 64 | +class BadMnemonicCase: |
| 65 | + """A single non-compliant mnemonic-file test case.""" |
| 66 | + |
| 67 | + name: str |
| 68 | + # When content is None, the path will be a *special* case (e.g., directory/symlink). |
| 69 | + content_bytes: bytes | None |
| 70 | + # How to materialize the path on disk. |
| 71 | + kind: tp.Literal[ |
| 72 | + "file", |
| 73 | + "empty_file", |
| 74 | + "no_read_perm", |
| 75 | + "directory", |
| 76 | + "broken_symlink", |
| 77 | + "symlink_loop", |
| 78 | + ] |
| 79 | + |
| 80 | + |
| 81 | +# Primitive strategies |
| 82 | + |
| 83 | +ascii_word = st.text( |
| 84 | + alphabet=st.characters(min_codepoint=0x61, max_codepoint=0x7A), # a-z |
| 85 | + min_size=3, |
| 86 | + max_size=10, |
| 87 | +).filter(lambda s: s not in _VALID_BIP39_SUBSET) |
| 88 | + |
| 89 | +weird_sep = st.sampled_from( |
| 90 | + [" ", " ", "\t", " \t ", "\n", "\r\n", ",", ", ", " \u00a0 "] |
| 91 | +) # NBSP etc. |
| 92 | + |
| 93 | +bad_token = st.one_of( |
| 94 | + ascii_word, # Unknown word |
| 95 | + st.text(min_size=1, max_size=8).filter( |
| 96 | + lambda s: any(c for c in s if not c.isalpha()) |
| 97 | + ), # Punctuation/digits |
| 98 | + st.sampled_from(["Über", "naïve", "résumé", "café"]), # Diacritics / non-ASCII |
| 99 | + st.sampled_from(["🚀", "🔥", "🙂"]), # Emoji |
| 100 | +) |
| 101 | + |
| 102 | +valid_token = st.sampled_from(_VALID_BIP39_SUBSET) |
| 103 | + |
| 104 | + |
| 105 | +# Helpers to compose lines / encodings |
| 106 | + |
| 107 | + |
| 108 | +@st.composite |
| 109 | +def word_sequence(draw: st.DrawFn, *, count: int, ensure_invalid: bool) -> list[str]: |
| 110 | + """Build a list of tokens of a specific count. |
| 111 | +
|
| 112 | + If ensure_invalid=True, inject at least one invalid token. |
| 113 | + """ |
| 114 | + if ensure_invalid: |
| 115 | + # At least one invalid token, others may be valid or invalid. |
| 116 | + invalid_ix = draw(st.integers(min_value=0, max_value=max(0, count - 1))) |
| 117 | + tokens: list[str] = [] |
| 118 | + for i in range(count): |
| 119 | + tok = draw(bad_token if i == invalid_ix else st.one_of(valid_token, bad_token)) |
| 120 | + tokens.append(tok) |
| 121 | + return tokens |
| 122 | + |
| 123 | + # All valid tokens (from the subset); used for checksum-invalid cases downstream. |
| 124 | + return draw(st.lists(valid_token, min_size=count, max_size=count)) |
| 125 | + |
| 126 | + |
| 127 | +@st.composite |
| 128 | +def join_with_weirdness(draw: st.DrawFn, *, tokens: tp.Sequence[str]) -> str: |
| 129 | + """Join tokens with odd/mixed separators and optional leading/trailing clutter.""" |
| 130 | + # Draw separators (one fewer than tokens) |
| 131 | + seps = draw(st.lists(weird_sep, min_size=len(tokens) - 1, max_size=len(tokens) - 1)) |
| 132 | + # Interleave tokens + seps |
| 133 | + core = "".join(a + b for a, b in zip(tokens, [*seps, ""])) |
| 134 | + # Draw optional prefix/suffix instead of using .example() |
| 135 | + prefix = draw(st.sampled_from(["", " ", "\n", "\t", "\ufeff"])) # include BOM as char |
| 136 | + suffix = draw(st.sampled_from(["", " ", "\n", "\t", "\u200b"])) # zero-width space |
| 137 | + return prefix + core + suffix |
| 138 | + |
| 139 | + |
| 140 | +def maybe_reencode(text: str) -> st.SearchStrategy[bytes]: |
| 141 | + """Return bytes in various encodings and with potential binary junk.""" |
| 142 | + return st.one_of( |
| 143 | + st.just(text.encode("utf-8")), |
| 144 | + st.just(("\ufeff" + text).encode("utf-8")), # UTF-8 with BOM |
| 145 | + st.just(text.encode("utf-16")), # BOM included by default |
| 146 | + st.just(text.encode("utf-32")), # BOM included by default |
| 147 | + # Inject NULs or random bytes around: |
| 148 | + st.binary(min_size=0, max_size=8).map(lambda b: b + text.encode("utf-8") + b"\x00"), |
| 149 | + ) |
| 150 | + |
| 151 | + |
| 152 | +# High-level bad file strategies |
| 153 | + |
| 154 | + |
| 155 | +@st.composite |
| 156 | +def wrong_count_files(draw: st.DrawFn) -> BadMnemonicCase: |
| 157 | + """Wrong number of words (not in {12, 15, 18, 21, 24}).""" |
| 158 | + count = draw(st.integers(min_value=0, max_value=48).filter(lambda n: n not in _ALLOWED_COUNTS)) |
| 159 | + hypothesis.assume(count != 0) # Zero handled by empty_file elsewhere |
| 160 | + tokens = draw(word_sequence(count=count, ensure_invalid=False)) |
| 161 | + line = draw(join_with_weirdness(tokens=tokens)) |
| 162 | + content = draw(maybe_reencode(line)) |
| 163 | + return BadMnemonicCase( |
| 164 | + name=f"wrong_count_{count}", |
| 165 | + content_bytes=content, |
| 166 | + kind="file", |
| 167 | + ) |
| 168 | + |
| 169 | + |
| 170 | +@st.composite |
| 171 | +def unknown_word_files(draw: st.DrawFn) -> BadMnemonicCase: |
| 172 | + """Produce allowed word count, but at least one token is not a valid BIP39 word.""" |
| 173 | + count = draw(st.sampled_from(sorted(_ALLOWED_COUNTS))) |
| 174 | + tokens = draw(word_sequence(count=count, ensure_invalid=True)) |
| 175 | + line = draw(join_with_weirdness(tokens=tokens)) |
| 176 | + content = draw(maybe_reencode(line)) |
| 177 | + return BadMnemonicCase( |
| 178 | + name=f"unknown_word_{count}", |
| 179 | + content_bytes=content, |
| 180 | + kind="file", |
| 181 | + ) |
| 182 | + |
| 183 | + |
| 184 | +@st.composite |
| 185 | +def separator_mess_files(draw: st.DrawFn) -> BadMnemonicCase: |
| 186 | + """Only separators/newlines/tabs/comma issues (often two+ separators between words).""" |
| 187 | + count = draw(st.sampled_from(sorted(_ALLOWED_COUNTS))) |
| 188 | + tokens = draw(word_sequence(count=count, ensure_invalid=False)) |
| 189 | + # Force messy separators (multiple mixed separators) |
| 190 | + weird_line = draw(join_with_weirdness(tokens=tokens)) |
| 191 | + content = draw(maybe_reencode(weird_line)) |
| 192 | + return BadMnemonicCase( |
| 193 | + name=f"separator_mess_{count}", |
| 194 | + content_bytes=content, |
| 195 | + kind="file", |
| 196 | + ) |
| 197 | + |
| 198 | + |
| 199 | +@st.composite |
| 200 | +def encoding_garbage_files(draw: st.DrawFn) -> BadMnemonicCase: |
| 201 | + """Binary junk, odd encodings, NULs.""" |
| 202 | + count = draw(st.sampled_from(sorted(_ALLOWED_COUNTS))) |
| 203 | + tokens = draw(word_sequence(count=count, ensure_invalid=False)) |
| 204 | + line = draw(join_with_weirdness(tokens=tokens)) |
| 205 | + content = draw(maybe_reencode(line)) |
| 206 | + # Strengthen likelihood of binary by maybe wrapping with extra random bytes: |
| 207 | + blob = draw(st.binary(min_size=0, max_size=32)) |
| 208 | + return BadMnemonicCase( |
| 209 | + name=f"encoding_garbage_{count}", |
| 210 | + content_bytes=blob + content + blob, |
| 211 | + kind="file", |
| 212 | + ) |
| 213 | + |
| 214 | + |
| 215 | +def empty_file_case() -> st.SearchStrategy[BadMnemonicCase]: |
| 216 | + return st.just(BadMnemonicCase(name="empty_file", content_bytes=b"", kind="empty_file")) |
| 217 | + |
| 218 | + |
| 219 | +def no_read_perm_case() -> st.SearchStrategy[BadMnemonicCase]: |
| 220 | + return st.just( |
| 221 | + BadMnemonicCase(name="no_read_perm", content_bytes=b"abandon " * 12, kind="no_read_perm") |
| 222 | + ) |
| 223 | + |
| 224 | + |
| 225 | +def directory_case() -> st.SearchStrategy[BadMnemonicCase]: |
| 226 | + return st.just( |
| 227 | + BadMnemonicCase(name="directory_instead_of_file", content_bytes=None, kind="directory") |
| 228 | + ) |
| 229 | + |
| 230 | + |
| 231 | +def broken_symlink_case() -> st.SearchStrategy[BadMnemonicCase]: |
| 232 | + return st.just( |
| 233 | + BadMnemonicCase(name="broken_symlink", content_bytes=None, kind="broken_symlink") |
| 234 | + ) |
| 235 | + |
| 236 | + |
| 237 | +def symlink_loop_case() -> st.SearchStrategy[BadMnemonicCase]: |
| 238 | + return st.just(BadMnemonicCase(name="symlink_loop", content_bytes=None, kind="symlink_loop")) |
| 239 | + |
| 240 | + |
| 241 | +# Master strategy: mix categories with reasonable weights |
| 242 | +invalid_mnemonic_files: st.SearchStrategy[BadMnemonicCase] = st.one_of( |
| 243 | + wrong_count_files(), |
| 244 | + unknown_word_files(), |
| 245 | + separator_mess_files(), |
| 246 | + encoding_garbage_files(), |
| 247 | + empty_file_case(), |
| 248 | + no_read_perm_case(), |
| 249 | + directory_case(), |
| 250 | + broken_symlink_case(), |
| 251 | + symlink_loop_case(), |
| 252 | +) |
| 253 | + |
| 254 | +# Materialization helper |
| 255 | + |
| 256 | + |
| 257 | +def _materialize_case(tmp_dir: pl.Path, case: BadMnemonicCase) -> pl.Path: |
| 258 | + """Create the on-disk artifact described by `case` and return its path.""" |
| 259 | + path = tmp_dir / f"{case.name}.txt" |
| 260 | + match case.kind: |
| 261 | + case "file": |
| 262 | + assert case.content_bytes is not None |
| 263 | + path.write_bytes(case.content_bytes) |
| 264 | + case "empty_file": |
| 265 | + path.write_bytes(b"") |
| 266 | + case "no_read_perm": |
| 267 | + assert case.content_bytes is not None |
| 268 | + path.write_bytes(case.content_bytes) |
| 269 | + # Remove read permissions for owner/group/others. |
| 270 | + path.chmod(stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH) |
| 271 | + case "directory": |
| 272 | + path.mkdir(parents=False, exist_ok=False) |
| 273 | + case "broken_symlink": |
| 274 | + target = tmp_dir / "does_not_exist.txt" |
| 275 | + path.symlink_to(target) |
| 276 | + case "symlink_loop": |
| 277 | + a = tmp_dir / "loop_a" |
| 278 | + b = tmp_dir / "loop_b" |
| 279 | + a.symlink_to(b) |
| 280 | + b.symlink_to(a) |
| 281 | + # Return one end of the loop. |
| 282 | + path = a |
| 283 | + case _: |
| 284 | + err = f"Unhandled kind: {case.kind}" |
| 285 | + raise ValueError(err) |
| 286 | + return path |
| 287 | + |
17 | 288 |
|
18 | 289 | @common.SKIPIF_WRONG_ERA |
19 | 290 | class TestMnemonic: |
20 | 291 | """Tests for mnemonic sentence.""" |
21 | 292 |
|
22 | 293 | @allure.link(helpers.get_vcs_link()) |
23 | | - @pytest.mark.parametrize("size", (12, 15, 18, 21, 24)) |
| 294 | + @pytest.mark.parametrize("size", _ALLOWED_COUNTS) |
24 | 295 | @pytest.mark.parametrize("out", ("file", "stdout")) |
25 | 296 | @pytest.mark.parametrize( |
26 | 297 | "key_type", |
@@ -74,7 +345,7 @@ def test_gen_and_deriv( |
74 | 345 | assert key_file.exists() |
75 | 346 |
|
76 | 347 | @allure.link(helpers.get_vcs_link()) |
77 | | - @pytest.mark.parametrize("size", (12, 15, 18, 21, 24)) |
| 348 | + @pytest.mark.parametrize("size", _ALLOWED_COUNTS) |
78 | 349 | @pytest.mark.parametrize( |
79 | 350 | "key_type", |
80 | 351 | # pyrefly: ignore # no-matching-overload |
@@ -128,3 +399,58 @@ def test_golden_deriv( |
128 | 399 | assert key_file.exists() |
129 | 400 |
|
130 | 401 | assert helpers.checksum(filename=key_file) == helpers.checksum(filename=golden_key_file) |
| 402 | + |
| 403 | + |
| 404 | +@common.SKIPIF_WRONG_ERA |
| 405 | +class TestNegativeMnemonic: |
| 406 | + """Tests with invalid arguments.""" |
| 407 | + |
| 408 | + @pytest.fixture |
| 409 | + def tmp_case_path(self) -> tp.Generator[pl.Path, None, None]: |
| 410 | + d = pl.Path(f"reject_mnemonics_{clusterlib.get_rand_str()}").resolve() |
| 411 | + d.mkdir(exist_ok=True) |
| 412 | + yield d |
| 413 | + shutil.rmtree(d) |
| 414 | + |
| 415 | + @allure.link(helpers.get_vcs_link()) |
| 416 | + @hypothesis.given(size=st.integers()) |
| 417 | + @common.hypothesis_settings(max_examples=1_000) |
| 418 | + @pytest.mark.smoke |
| 419 | + def test_gen_invalid_size( |
| 420 | + self, |
| 421 | + cluster: clusterlib.ClusterLib, |
| 422 | + size: int, |
| 423 | + ): |
| 424 | + """Test generating a mnemonic with an invalid size.""" |
| 425 | + hypothesis.assume(size not in _ALLOWED_COUNTS) |
| 426 | + |
| 427 | + common.get_test_id(cluster) |
| 428 | + with pytest.raises(clusterlib.CLIError) as excinfo: |
| 429 | + cluster.g_key.gen_mnemonic(size=size) # type: ignore |
| 430 | + err_value = str(excinfo.value) |
| 431 | + assert "Invalid mnemonic size" in err_value |
| 432 | + |
| 433 | + @common.hypothesis_settings(max_examples=500) |
| 434 | + @hypothesis.given(bad_case=invalid_mnemonic_files) |
| 435 | + def test_rejects_noncompliant_mnemonics( |
| 436 | + self, |
| 437 | + cluster: clusterlib.ClusterLib, |
| 438 | + bad_case: BadMnemonicCase, |
| 439 | + tmp_case_path: pl.Path, |
| 440 | + ) -> None: |
| 441 | + """Test that the CLI wrapper rejects malformed mnemonic files.""" |
| 442 | + temp_template = f"{common.get_test_id(cluster)}_{common.unique_time_str()}" |
| 443 | + target = _materialize_case(tmp_dir=tmp_case_path, case=bad_case) |
| 444 | + |
| 445 | + with pytest.raises(clusterlib.CLIError) as excinfo: |
| 446 | + cluster.g_key.derive_from_mnemonic( |
| 447 | + key_name=temp_template, |
| 448 | + key_type=clusterlib.KeyType.DREP, |
| 449 | + mnemonic_file=target, |
| 450 | + account_number=0, |
| 451 | + ) |
| 452 | + err_value = str(excinfo.value) |
| 453 | + assert ( |
| 454 | + "Error reading mnemonic file" in err_value |
| 455 | + or "Error converting the mnemonic into a key" in err_value |
| 456 | + ) |
0 commit comments