|
7 | 7 |
|
8 | 8 | # %% Imports |
9 | 9 |
|
| 10 | +import json |
| 11 | +import shutil |
10 | 12 | from copy import deepcopy |
11 | 13 | from functools import partial |
12 | 14 | from pathlib import Path |
13 | 15 | from typing import Callable |
14 | 16 |
|
| 17 | +import numpy as np |
15 | 18 | import pytest |
| 19 | +import yaml |
16 | 20 |
|
17 | | -# from plaid.bridges import huggingface_bridge |
18 | 21 | from plaid.containers.dataset import Dataset |
19 | 22 | from plaid.containers.sample import Sample |
20 | 23 | from plaid.problem_definition import ProblemDefinition |
|
28 | 31 | ) |
29 | 32 |
|
30 | 33 |
|
| 34 | +def test_load_metadata_from_hub_materializes_memmaps(tmp_path, monkeypatch): |
| 35 | + """Hub metadata loader must return arrays independent from temp files.""" |
| 36 | + from plaid.storage.common import reader as common_reader |
| 37 | + |
| 38 | + repo_root = tmp_path / "fake_hub_repo" |
| 39 | + constants_dir = repo_root / "constants" / "train" |
| 40 | + constants_dir.mkdir(parents=True) |
| 41 | + |
| 42 | + data = np.arange(6, dtype=np.float32).reshape(2, 3) |
| 43 | + with open(constants_dir / "data.mmap", "wb") as f: |
| 44 | + f.write(data.tobytes(order="C")) |
| 45 | + |
| 46 | + with open(constants_dir / "layout.json", "w", encoding="utf-8") as f: |
| 47 | + json.dump( |
| 48 | + { |
| 49 | + "Global/cst_numeric": { |
| 50 | + "offset": 0, |
| 51 | + "shape": list(data.shape), |
| 52 | + "dtype": str(data.dtype), |
| 53 | + } |
| 54 | + }, |
| 55 | + f, |
| 56 | + ) |
| 57 | + |
| 58 | + with open(constants_dir / "constant_schema.yaml", "w", encoding="utf-8") as f: |
| 59 | + yaml.safe_dump({"Global/cst_numeric": {"dtype": str(data.dtype), "ndim": 2}}, f) |
| 60 | + |
| 61 | + with open(repo_root / "variable_schema.yaml", "w", encoding="utf-8") as f: |
| 62 | + yaml.safe_dump({"Global/var": {"dtype": "float32", "ndim": 1}}, f) |
| 63 | + |
| 64 | + with open(repo_root / "cgns_types.yaml", "w", encoding="utf-8") as f: |
| 65 | + yaml.safe_dump({"Global": "DataArray_t"}, f) |
| 66 | + |
| 67 | + def _fake_snapshot_download(**kwargs): |
| 68 | + local_dir = Path(kwargs["local_dir"]) |
| 69 | + shutil.copytree( |
| 70 | + repo_root / "constants", local_dir / "constants", dirs_exist_ok=True |
| 71 | + ) |
| 72 | + return str(local_dir) |
| 73 | + |
| 74 | + def _fake_hf_hub_download(**kwargs): |
| 75 | + return str(repo_root / kwargs["filename"]) |
| 76 | + |
| 77 | + monkeypatch.setattr(common_reader, "snapshot_download", _fake_snapshot_download) |
| 78 | + monkeypatch.setattr(common_reader, "hf_hub_download", _fake_hf_hub_download) |
| 79 | + |
| 80 | + flat_cst, variable_schema, constant_schema, cgns_types = ( |
| 81 | + common_reader.load_metadata_from_hub("dummy/repo") |
| 82 | + ) |
| 83 | + |
| 84 | + loaded = flat_cst["train"]["Global/cst_numeric"] |
| 85 | + assert isinstance(loaded, np.ndarray) |
| 86 | + assert not isinstance(loaded, np.memmap) |
| 87 | + assert np.array_equal(loaded, data) |
| 88 | + assert variable_schema["Global/var"]["dtype"] == "float32" |
| 89 | + assert "Global/cst_numeric" in constant_schema["train"] |
| 90 | + assert cgns_types["Global"] == "DataArray_t" |
| 91 | + |
| 92 | + |
| 93 | +def test_load_metadata_from_disk_keeps_memmaps(tmp_path): |
| 94 | + """Local metadata loader keeps memmap-backed numeric constants.""" |
| 95 | + from plaid.storage.common import reader as common_reader |
| 96 | + |
| 97 | + dataset_root = tmp_path / "dataset" |
| 98 | + constants_dir = dataset_root / "constants" / "train" |
| 99 | + constants_dir.mkdir(parents=True) |
| 100 | + |
| 101 | + data = np.arange(6, dtype=np.float32).reshape(2, 3) |
| 102 | + with open(constants_dir / "data.mmap", "wb") as f: |
| 103 | + f.write(data.tobytes(order="C")) |
| 104 | + |
| 105 | + with open(constants_dir / "layout.json", "w", encoding="utf-8") as f: |
| 106 | + json.dump( |
| 107 | + { |
| 108 | + "Global/cst_numeric": { |
| 109 | + "offset": 0, |
| 110 | + "shape": list(data.shape), |
| 111 | + "dtype": str(data.dtype), |
| 112 | + } |
| 113 | + }, |
| 114 | + f, |
| 115 | + ) |
| 116 | + |
| 117 | + with open(constants_dir / "constant_schema.yaml", "w", encoding="utf-8") as f: |
| 118 | + yaml.safe_dump({"Global/cst_numeric": {"dtype": str(data.dtype), "ndim": 2}}, f) |
| 119 | + |
| 120 | + with open(dataset_root / "variable_schema.yaml", "w", encoding="utf-8") as f: |
| 121 | + yaml.safe_dump({"Global/var": {"dtype": "float32", "ndim": 1}}, f) |
| 122 | + |
| 123 | + with open(dataset_root / "cgns_types.yaml", "w", encoding="utf-8") as f: |
| 124 | + yaml.safe_dump({"Global": "DataArray_t"}, f) |
| 125 | + |
| 126 | + flat_cst, variable_schema, constant_schema, cgns_types = ( |
| 127 | + common_reader.load_metadata_from_disk(dataset_root) |
| 128 | + ) |
| 129 | + |
| 130 | + loaded = flat_cst["train"]["Global/cst_numeric"] |
| 131 | + assert isinstance(loaded, np.memmap) |
| 132 | + assert np.array_equal(np.asarray(loaded), data) |
| 133 | + assert variable_schema["Global/var"]["dtype"] == "float32" |
| 134 | + assert "Global/cst_numeric" in constant_schema["train"] |
| 135 | + assert cgns_types["Global"] == "DataArray_t" |
| 136 | + |
| 137 | + |
31 | 138 | @pytest.fixture() |
32 | 139 | def current_directory(): |
33 | 140 | return Path(__file__).absolute().parent |
|
0 commit comments