Skip to content

Commit 1cebbbf

Browse files
committed
Add support for legacy v0.1.0 h5ad files; implement tests for legacy categorical and dataframe formats
1 parent d46981f commit 1cebbbf

File tree

4 files changed

+120
-17
lines changed

4 files changed

+120
-17
lines changed

tests/conftest.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,53 @@ def sample_categorical_h5ad(temp_dir):
172172
f.create_dataset("X", data=X)
173173

174174
return file_path
175+
176+
177+
@pytest.fixture
178+
def sample_legacy_v010_h5ad(temp_dir):
179+
"""Create a sample h5ad file with legacy v0.1.0 categorical columns.
180+
181+
In v0.1.0, categorical columns are stored as:
182+
- Integer code datasets with a 'categories' attribute (HDF5 object reference)
183+
- Categories stored in __categories/<colname> subgroup
184+
"""
185+
file_path = temp_dir / "test_legacy_v010.h5ad"
186+
187+
with h5py.File(file_path, "w") as f:
188+
# Create obs with legacy categorical column
189+
obs = f.create_group("obs")
190+
obs.attrs["_index"] = "obs_names"
191+
obs.attrs["encoding-type"] = "dataframe"
192+
obs.attrs["encoding-version"] = "0.1.0"
193+
obs_names = ["cell_1", "cell_2", "cell_3", "cell_4"]
194+
obs.create_dataset("obs_names", data=np.array(obs_names, dtype="S"))
195+
196+
# Create __categories subgroup (v0.1.0 convention)
197+
categories_group = obs.create_group("__categories")
198+
cell_type_cats = np.array(["TypeA", "TypeB", "TypeC"], dtype="S")
199+
cats_ds = categories_group.create_dataset("cell_type", data=cell_type_cats)
200+
201+
# Create categorical column as integer codes with reference to categories
202+
codes = np.array([0, 1, 0, 2], dtype=np.int8)
203+
cell_type_ds = obs.create_dataset("cell_type", data=codes)
204+
# Store HDF5 object reference to categories
205+
cell_type_ds.attrs["categories"] = cats_ds.ref
206+
207+
# Add a regular non-categorical column
208+
obs.create_dataset(
209+
"n_counts", data=np.array([100, 200, 150, 300], dtype=np.int32)
210+
)
211+
212+
# Create var
213+
var = f.create_group("var")
214+
var.attrs["_index"] = "var_names"
215+
var.attrs["encoding-type"] = "dataframe"
216+
var.attrs["encoding-version"] = "0.1.0"
217+
var_names = ["gene_1", "gene_2"]
218+
var.create_dataset("var_names", data=np.array(var_names, dtype="S"))
219+
220+
# Create X matrix (no encoding-type for legacy)
221+
X = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]], dtype=np.float32)
222+
f.create_dataset("X", data=X)
223+
224+
return file_path

tests/test_cli.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,36 +66,32 @@ def test_info_depth_short_flag(self, sample_h5ad_file):
6666
output = result.stdout + (result.stderr or "")
6767
assert "<" in output
6868

69-
def test_info_entry_flag(self, sample_h5ad_file):
70-
"""Test info command with --entry flag."""
71-
result = runner.invoke(app, ["info", "--entry", "X", str(sample_h5ad_file)])
69+
def test_info_entry_positional(self, sample_h5ad_file):
70+
"""Test info command with entry as positional argument."""
71+
result = runner.invoke(app, ["info", str(sample_h5ad_file), "X"])
7272
assert result.exit_code == 0
7373
output = result.stdout + (result.stderr or "")
7474
assert "Path:" in output
7575
assert "Type:" in output
7676

77-
def test_info_entry_short_flag(self, sample_h5ad_file):
78-
"""Test info command with -e short flag."""
79-
result = runner.invoke(app, ["info", "-e", "obs", str(sample_h5ad_file)])
77+
def test_info_entry_obs(self, sample_h5ad_file):
78+
"""Test info command with obs entry."""
79+
result = runner.invoke(app, ["info", str(sample_h5ad_file), "obs"])
8080
assert result.exit_code == 0
8181
output = result.stdout + (result.stderr or "")
8282
assert "Path:" in output
8383
assert "dataframe" in output
8484

8585
def test_info_entry_nested_path(self, sample_h5ad_file):
8686
"""Test info command with nested object path."""
87-
result = runner.invoke(
88-
app, ["info", "-e", "uns/description", str(sample_h5ad_file)]
89-
)
87+
result = runner.invoke(app, ["info", str(sample_h5ad_file), "uns/description"])
9088
assert result.exit_code == 0
9189
output = result.stdout + (result.stderr or "")
9290
assert "Path:" in output
9391

9492
def test_info_entry_not_found(self, sample_h5ad_file):
9593
"""Test info command with non-existent object path."""
96-
result = runner.invoke(
97-
app, ["info", "-e", "nonexistent", str(sample_h5ad_file)]
98-
)
94+
result = runner.invoke(app, ["info", str(sample_h5ad_file), "nonexistent"])
9995
assert result.exit_code == 0 # Doesn't exit with error, just shows message
10096
output = result.stdout + (result.stderr or "")
10197
assert "not found" in output
@@ -114,6 +110,7 @@ def test_export_dataframe_obs(self, sample_h5ad_file, temp_dir):
114110
"dataframe",
115111
str(sample_h5ad_file),
116112
"obs",
113+
"--output",
117114
str(output),
118115
],
119116
)
@@ -137,6 +134,7 @@ def test_export_dataframe_var(self, sample_h5ad_file, temp_dir):
137134
"dataframe",
138135
str(sample_h5ad_file),
139136
"var",
137+
"--output",
140138
str(output),
141139
],
142140
)
@@ -158,6 +156,7 @@ def test_export_dataframe_columns_filter(self, sample_h5ad_file, temp_dir):
158156
"dataframe",
159157
str(sample_h5ad_file),
160158
"obs",
159+
"--output",
161160
str(output),
162161
"--columns",
163162
"obs_names,cell_type",
@@ -183,6 +182,7 @@ def test_export_dataframe_head(self, sample_h5ad_file, temp_dir):
183182
"dataframe",
184183
str(sample_h5ad_file),
185184
"obs",
185+
"--output",
186186
str(output),
187187
"--head",
188188
"2",
@@ -205,6 +205,7 @@ def test_export_dataframe_invalid_axis(self, sample_h5ad_file, temp_dir):
205205
"dataframe",
206206
str(sample_h5ad_file),
207207
"invalid",
208+
"--output",
208209
str(output),
209210
],
210211
)

tests/test_export.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@ class TestExportDataframe:
112112
def test_export_dataframe_obs(self, sample_h5ad_file, temp_dir):
113113
out = temp_dir / "obs.csv"
114114
result = runner.invoke(
115-
app, ["export", "dataframe", str(sample_h5ad_file), "obs", str(out)]
115+
app,
116+
["export", "dataframe", str(sample_h5ad_file), "obs", "--output", str(out)],
116117
)
117118
assert result.exit_code == 0
118119
assert out.exists()
@@ -125,7 +126,8 @@ def test_wrong_type_for_dataframe(self, sample_h5ad_file, temp_dir):
125126
"""Test that wrong object type is rejected for dataframe export."""
126127
out = temp_dir / "X.csv"
127128
result = runner.invoke(
128-
app, ["export", "dataframe", str(sample_h5ad_file), "X", str(out)]
129+
app,
130+
["export", "dataframe", str(sample_h5ad_file), "X", "--output", str(out)],
129131
)
130132
assert result.exit_code == 1
131133
assert "obs" in result.output or "var" in result.output

tests/test_info_read.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -245,9 +245,59 @@ def test_col_chunk_categorical(self, sample_categorical_h5ad):
245245
result = col_chunk_as_strings(f["obs"], "cell_type", 0, 4, cache)
246246
assert result == ["TypeA", "TypeB", "TypeA", "TypeC"]
247247

248-
def test_col_chunk_unsupported(self, sample_h5ad_file):
249-
"""Test reading unsupported column."""
248+
def test_col_chunk_not_found(self, sample_h5ad_file):
249+
"""Test reading non-existent column."""
250250
with h5py.File(sample_h5ad_file, "r") as f:
251251
cache = {}
252-
with pytest.raises(RuntimeError, match="Unsupported column"):
252+
with pytest.raises(RuntimeError, match="not found in group"):
253253
col_chunk_as_strings(f["obs"], "nonexistent", 0, 5, cache)
254+
255+
256+
class TestLegacyV010Support:
257+
"""Tests for legacy v0.1.0 format support."""
258+
259+
def test_get_entry_type_legacy_categorical(self, sample_legacy_v010_h5ad):
260+
"""Test type detection for legacy categorical column (v0.1.0)."""
261+
with h5py.File(sample_legacy_v010_h5ad, "r") as f:
262+
info = get_entry_type(f["obs"]["cell_type"])
263+
assert info["type"] == "categorical"
264+
assert info["version"] == "0.1.0"
265+
assert "Legacy" in info["details"]
266+
267+
def test_get_entry_type_legacy_dataframe(self, sample_legacy_v010_h5ad):
268+
"""Test type detection for legacy dataframe (v0.1.0)."""
269+
with h5py.File(sample_legacy_v010_h5ad, "r") as f:
270+
info = get_entry_type(f["obs"])
271+
assert info["type"] == "dataframe"
272+
assert info["version"] == "0.1.0"
273+
assert "legacy" in info["details"].lower()
274+
275+
def test_read_legacy_categorical_column(self, sample_legacy_v010_h5ad):
276+
"""Test reading legacy categorical column."""
277+
with h5py.File(sample_legacy_v010_h5ad, "r") as f:
278+
cache = {}
279+
result = read_categorical_column(
280+
f["obs"]["cell_type"], 0, 4, cache, f["obs"]
281+
)
282+
assert result == ["TypeA", "TypeB", "TypeA", "TypeC"]
283+
284+
def test_col_chunk_legacy_categorical(self, sample_legacy_v010_h5ad):
285+
"""Test col_chunk_as_strings with legacy categorical column."""
286+
with h5py.File(sample_legacy_v010_h5ad, "r") as f:
287+
cache = {}
288+
result = col_chunk_as_strings(f["obs"], "cell_type", 0, 4, cache)
289+
assert result == ["TypeA", "TypeB", "TypeA", "TypeC"]
290+
291+
def test_col_chunk_legacy_numeric(self, sample_legacy_v010_h5ad):
292+
"""Test col_chunk_as_strings with legacy numeric column."""
293+
with h5py.File(sample_legacy_v010_h5ad, "r") as f:
294+
cache = {}
295+
result = col_chunk_as_strings(f["obs"], "n_counts", 0, 4, cache)
296+
assert result == ["100", "200", "150", "300"]
297+
298+
def test_legacy_categorical_slice(self, sample_legacy_v010_h5ad):
299+
"""Test reading slice of legacy categorical column."""
300+
with h5py.File(sample_legacy_v010_h5ad, "r") as f:
301+
cache = {}
302+
result = col_chunk_as_strings(f["obs"], "cell_type", 1, 3, cache)
303+
assert result == ["TypeB", "TypeA"]

0 commit comments

Comments
 (0)