Skip to content

Commit aba7637

Browse files
committed
Linting
1 parent aa5cf85 commit aba7637

File tree

6 files changed

+64
-61
lines changed

6 files changed

+64
-61
lines changed

src/mdio/core/v1/_overloads.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ def to_mdio(
2525
"""Alias for `.to_zarr()`."""
2626
# Ensure zarr_version=2 by default unless explicitly overridden
2727
zarr_version = kwargs.get("zarr_version", 2)
28-
if zarr_version != 2:
29-
raise ValueError("MDIO only supports zarr_version=2")
28+
if zarr_version != 2: # noqa: PLR2004
29+
msg = "MDIO only supports zarr_version=2"
30+
raise ValueError(msg)
3031
kwargs["zarr_version"] = zarr_version
3132
return super().to_zarr(*args, store=store, **kwargs)
3233

@@ -45,8 +46,9 @@ def to_mdio(
4546
"""Alias for `.to_zarr()`, and writes to Zarr store."""
4647
# Ensure zarr_version=2 by default unless explicitly overridden
4748
zarr_version = kwargs.get("zarr_version", 2)
48-
if zarr_version != 2:
49-
raise ValueError("MDIO only supports zarr_version=2")
49+
if zarr_version != 2: # noqa: PLR2004
50+
msg = "MDIO only supports zarr_version=2"
51+
raise ValueError(msg)
5052
kwargs["zarr_version"] = zarr_version
5153
return super().to_zarr(*args, store=store, **kwargs)
5254

@@ -80,9 +82,9 @@ def open(
8082
ds.__class__ = MDIODataset
8183
# Cast each DataArray in data_vars and coords
8284

83-
for _name, var in ds.data_vars.items():
85+
for _name, var in ds.data_vars.items(): # noqa: PERF102 .values() failed tests
8486
var.__class__ = MDIODataArray
85-
for _name, coord in ds.coords.items():
87+
for _name, coord in ds.coords.items(): # noqa: PERF102 .values() failed tests
8688
coord.__class__ = MDIODataArray
8789
return ds
8890

src/mdio/core/v1/_serializer.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from numcodecs import Blosc as NumcodecsBlosc
1212

1313
from mdio.core.v1._overloads import mdio
14+
from mdio.schemas.chunk_grid import * # noqa: F403
1415
from mdio.schemas.compressors import ZFP
1516
from mdio.schemas.compressors import Blosc
1617
from mdio.schemas.dimension import NamedDimension
@@ -19,20 +20,16 @@
1920
from mdio.schemas.metadata import UserAttributes
2021
from mdio.schemas.v1.dataset import Dataset as MDIODataset
2122
from mdio.schemas.v1.dataset import DatasetMetadata
23+
from mdio.schemas.v1.stats import * # noqa: F403
2224
from mdio.schemas.v1.units import AllUnits
2325
from mdio.schemas.v1.variable import Coordinate
2426
from mdio.schemas.v1.variable import Variable
2527
from mdio.schemas.v1.variable import VariableMetadata
26-
from mdio.schemas.chunk_grid import *
27-
from mdio.schemas.v1.stats import *
28-
29-
import logging
3028

3129
try:
3230
import zfpy as zfpy_base # Base library
3331
from numcodecs import ZFPY # Codec
3432
except ImportError:
35-
logging.warning(f"Tried to import zfpy and numcodecs zfpy but failed because {ImportError}")
3633
zfpy_base = None
3734
ZFPY = None
3835

@@ -87,9 +84,7 @@ def make_variable( # noqa: PLR0913 PLR0912
8784
TypeError: If the metadata type is not supported.
8885
"""
8986

90-
# TODO(BrianMichell) #0: I suspect that this is only partially correct...
91-
92-
def _to_serializable(val: Any) -> Any:
87+
def _to_serializable(val: object) -> dict[str, Any] | object:
9388
return val.model_dump(mode="json", by_alias=True) if hasattr(val, "model_dump") else val
9489

9590
var_metadata = None
@@ -104,7 +99,9 @@ def _to_serializable(val: Any) -> Any:
10499
metadata_dict["unitsV1"] = val
105100
elif isinstance(md, UserAttributes):
106101
attrs = _to_serializable(md)
107-
metadata_dict["attributes"] = attrs[0] if isinstance(attrs, list) and len(attrs) == 1 else attrs
102+
metadata_dict["attributes"] = (
103+
attrs[0] if isinstance(attrs, list) and len(attrs) == 1 else attrs
104+
)
108105
var_metadata = VariableMetadata(**metadata_dict)
109106

110107
elif isinstance(metadata, dict):
@@ -121,7 +118,8 @@ def _to_serializable(val: Any) -> Any:
121118
var_metadata = metadata
122119

123120
else:
124-
raise TypeError(f"Unsupported metadata type: {type(metadata)}")
121+
msg = f"Unsupported metadata type: {type(metadata)}"
122+
raise TypeError(msg)
125123

126124
return Variable(
127125
name=name,

src/mdio/schemas/core.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
from __future__ import annotations
44

55
from typing import Any
6-
from typing import get_type_hints
76

87
from pydantic import BaseModel
98
from pydantic import ConfigDict
10-
from pydantic.alias_generators import to_camel
119
from pydantic import Field
10+
from pydantic.alias_generators import to_camel
11+
1212

1313
def model_fields(model: type[BaseModel]) -> dict[str, tuple[Any, Any]]:
1414
"""Extract Pydantic BaseModel fields.
@@ -57,7 +57,7 @@ class CamelCaseStrictModel(StrictModel):
5757
ser_json_by_alias=True,
5858
)
5959

60-
def model_dump_json(self, *args, **kwargs): # type: ignore[override]
60+
def model_dump_json(self, *args, **kwargs) -> dict: # noqa: ANN201 ANN001 ANN002 ANN003
6161
"""Dump JSON using camelCase aliases and excluding None values by default."""
6262
# Ensure camelCase aliases
6363
if "by_alias" not in kwargs:

tests/test_main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def test_main_succeeds(runner: CliRunner, segy_input: Path, zarr_tmp: Path) -> N
3030
def test_main_cloud(runner: CliRunner, segy_input_uri: str, zarr_tmp: Path) -> None:
3131
"""It exits with a status code of zero."""
3232
os.environ["MDIO__IMPORT__CLOUD_NATIVE"] = "true"
33+
os.environ["MDIO__IMPORT__CPU_COUNT"] = "1"
3334
cli_args = ["segy", "import", segy_input_uri, str(zarr_tmp)]
3435
cli_args.extend(["--header-locations", "181,185"])
3536
cli_args.extend(["--header-names", "inline,crossline"])

tests/unit/test_schema.py

Lines changed: 44 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -311,24 +311,25 @@ class TestPydanticMDIORoundTrip:
311311
def test_json_to_mdio_dataset(self, tmp_path: Path) -> None:
312312
"""Test converting TEST_SCHEMA JSON to an MDIO dataset using to_mdio."""
313313
from mdio.core.v1._serializer import _construct_mdio_dataset
314-
314+
315315
output_path = tmp_path / "from_json.mdio"
316316
# output_path = "test_mdio_from_json.mdio"
317-
317+
318318
# Step 1: Validate the TEST_SCHEMA JSON with Pydantic
319319
dataset = V1Dataset.model_validate(TEST_SCHEMA)
320-
320+
321321
# Step 2: Convert to MDIO dataset using the internal constructor
322322
mdio_dataset = _construct_mdio_dataset(dataset)
323-
323+
324324
# Step 3: Use to_mdio to save the dataset
325325
mdio_dataset.to_mdio(store=str(output_path))
326-
326+
327327
# Verify the dataset was created
328328
assert output_path.exists()
329-
329+
330330
# Verify we can read it back
331331
from mdio.core.v1 import mdio
332+
332333
with mdio.open(str(output_path)) as reader:
333334
assert "actual_variable" in reader
334335
assert "coord" in reader
@@ -338,21 +339,21 @@ def test_json_to_mdio_dataset(self, tmp_path: Path) -> None:
338339

339340
def test_mdio_dataset_to_json(self, tmp_path: Path) -> None:
340341
"""Test converting an MDIO dataset back to JSON (camelCase)."""
341-
from mdio.core.v1._serializer import _construct_mdio_dataset
342342
from mdio.core.v1 import mdio
343-
343+
from mdio.core.v1._serializer import _construct_mdio_dataset
344+
344345
# Step 1: Create MDIO dataset from TEST_SCHEMA
345346
dataset = V1Dataset.model_validate(TEST_SCHEMA)
346347
mdio_dataset = _construct_mdio_dataset(dataset)
347-
348+
348349
mdio_path = tmp_path / "test_dataset.mdio"
349350
mdio_dataset.to_mdio(store=str(mdio_path))
350-
351+
351352
# Step 2: Read back the MDIO dataset
352353
with mdio.open(str(mdio_path)) as reader:
353354
# Step 3: Extract information to reconstruct Pydantic model
354355
variables = []
355-
356+
356357
# Add dimension variables
357358
for dim_name in ["dim0", "dim1"]:
358359
if dim_name in reader.coords:
@@ -363,7 +364,7 @@ def test_mdio_dataset_to_json(self, tmp_path: Path) -> None:
363364
"dimensions": [{"name": dim_name, "size": reader.dims[dim_name]}],
364365
}
365366
variables.append(var_dict)
366-
367+
367368
# Add data variables with their metadata
368369
for var_name in reader.data_vars:
369370
var = reader[var_name]
@@ -372,7 +373,7 @@ def test_mdio_dataset_to_json(self, tmp_path: Path) -> None:
372373
"dataType": str(var.dtype),
373374
"dimensions": list(var.dims),
374375
}
375-
376+
376377
# Reconstruct metadata based on original TEST_SCHEMA
377378
if var_name == "coord":
378379
var_dict["metadata"] = {
@@ -392,21 +393,21 @@ def test_mdio_dataset_to_json(self, tmp_path: Path) -> None:
392393
},
393394
}
394395
variables.append(var_dict)
395-
396+
396397
# Step 4: Create Pydantic model data (camelCase)
397398
dataset_data = {
398399
"metadata": {
399400
"name": reader.attrs.get("name"),
400401
"apiVersion": reader.attrs.get("apiVersion", "1.0.0"),
401402
"createdOn": reader.attrs.get("createdOn", "2023-01-01T00:00:00Z"),
402403
},
403-
"variables": variables
404+
"variables": variables,
404405
}
405-
406+
406407
# Step 5: Validate with Pydantic and serialize to JSON using by_alias=True
407408
pydantic_dataset = V1Dataset.model_validate(dataset_data)
408409
json_str = pydantic_dataset.model_dump_json(by_alias=True)
409-
410+
410411
# Verify it's valid JSON and camelCase
411412
parsed = json.loads(json_str)
412413

@@ -415,34 +416,34 @@ def test_mdio_dataset_to_json(self, tmp_path: Path) -> None:
415416
assert "apiVersion" in parsed["metadata"]
416417
assert "createdOn" in parsed["metadata"]
417418
assert "dataType" in parsed["variables"][0]
418-
419+
419420
# Verify the conversion preserved data
420421
assert pydantic_dataset.metadata.name == "test_dataset"
421422

422423
def test_full_round_trip_json_mdio_json(self, tmp_path: Path) -> None:
423424
"""Test full round-trip: TEST_SCHEMA JSON -> MDIO -> JSON using to_mdio."""
424-
from mdio.core.v1._serializer import _construct_mdio_dataset
425425
from mdio.core.v1 import mdio
426-
426+
from mdio.core.v1._serializer import _construct_mdio_dataset
427+
427428
# Step 1: Start with TEST_SCHEMA (input JSON)
428429
original_dataset = V1Dataset.model_validate(TEST_SCHEMA)
429430
original_json = original_dataset.model_dump_json(by_alias=True)
430431
original_parsed = json.loads(original_json)
431-
432+
432433
# Verify original is camelCase
433434
assert "apiVersion" in original_parsed["metadata"]
434435
assert "createdOn" in original_parsed["metadata"]
435-
436+
436437
# Step 2: Convert to MDIO dataset and save
437438
mdio_dataset = _construct_mdio_dataset(original_dataset)
438439
mdio_path = tmp_path / "round_trip.mdio"
439440
mdio_dataset.to_mdio(store=str(mdio_path))
440-
441+
441442
# Step 3: Read back from MDIO and convert to JSON
442443
with mdio.open(str(mdio_path)) as reader:
443444
# Reconstruct the schema structure
444445
variables = []
445-
446+
446447
# Add dimension variables
447448
for dim_name in ["dim0", "dim1"]:
448449
if dim_name in reader.coords:
@@ -453,7 +454,7 @@ def test_full_round_trip_json_mdio_json(self, tmp_path: Path) -> None:
453454
"dimensions": [{"name": dim_name, "size": reader.dims[dim_name]}],
454455
}
455456
variables.append(var_dict)
456-
457+
457458
# Add coordinate variables that are not dimensions
458459
for coord_name, coord in reader.coords.items():
459460
if coord_name not in ["dim0", "dim1"]: # Skip dimension coordinates
@@ -462,7 +463,7 @@ def test_full_round_trip_json_mdio_json(self, tmp_path: Path) -> None:
462463
"dataType": str(coord.dtype),
463464
"dimensions": list(coord.dims),
464465
}
465-
466+
466467
# Add metadata for coord variable from original TEST_SCHEMA
467468
if coord_name == "coord":
468469
var_dict["metadata"] = {
@@ -473,7 +474,7 @@ def test_full_round_trip_json_mdio_json(self, tmp_path: Path) -> None:
473474
"unitsV1": {"length": "m"},
474475
}
475476
variables.append(var_dict)
476-
477+
477478
# Add data variables with original metadata
478479
for var_name in reader.data_vars:
479480
var = reader[var_name]
@@ -482,7 +483,7 @@ def test_full_round_trip_json_mdio_json(self, tmp_path: Path) -> None:
482483
"dataType": str(var.dtype),
483484
"dimensions": list(var.dims),
484485
}
485-
486+
486487
# Add original metadata back from TEST_SCHEMA
487488
if var_name == "actual_variable":
488489
var_dict["compressor"] = {"name": "blosc", "level": 3}
@@ -494,30 +495,32 @@ def test_full_round_trip_json_mdio_json(self, tmp_path: Path) -> None:
494495
},
495496
}
496497
variables.append(var_dict)
497-
498+
498499
# Create final dataset
499500
final_data = {
500501
"metadata": {
501502
"name": reader.attrs.get("name", "test_dataset"),
502503
"apiVersion": reader.attrs.get("apiVersion", "1.0.0"),
503504
"createdOn": reader.attrs.get("createdOn", "2023-01-01T00:00:00Z"),
504505
},
505-
"variables": variables
506+
"variables": variables,
506507
}
507-
508+
508509
final_dataset = V1Dataset.model_validate(final_data)
509510
final_json = final_dataset.model_dump_json(by_alias=True)
510511
final_parsed = json.loads(final_json)
511-
512+
512513
# Step 4: Verify round-trip integrity
513514
assert final_parsed["metadata"]["name"] == original_parsed["metadata"]["name"]
514-
assert final_parsed["metadata"]["apiVersion"] == original_parsed["metadata"]["apiVersion"]
515-
515+
assert (
516+
final_parsed["metadata"]["apiVersion"] == original_parsed["metadata"]["apiVersion"]
517+
)
518+
516519
# Verify camelCase is preserved
517520
assert "apiVersion" in final_parsed["metadata"]
518521
assert "createdOn" in final_parsed["metadata"]
519522
assert "dataType" in final_parsed["variables"][0]
520-
523+
521524
# Verify variable structure is preserved
522525
original_var_names = {v["name"] for v in original_parsed["variables"]}
523526
final_var_names = {v["name"] for v in final_parsed["variables"]}
@@ -543,9 +546,9 @@ def test_invalid_snake_case_json_fails(self) -> None:
543546
"data_type": "float32", # snake_case should fail
544547
"dimensions": ["dim0"],
545548
}
546-
]
549+
],
547550
}
548-
551+
549552
# This should fail validation
550553
with pytest.raises(ValidationError):
551554
V1Dataset.model_validate(invalid_snake_case_schema)
@@ -555,20 +558,20 @@ def test_camel_case_serialization_only(self) -> None:
555558
dataset = V1Dataset.model_validate(TEST_SCHEMA)
556559
json_str = dataset.model_dump_json()
557560
parsed = json.loads(json_str)
558-
561+
559562
# Verify camelCase fields are present
560563
assert "apiVersion" in parsed["metadata"]
561564
assert "createdOn" in parsed["metadata"]
562-
565+
563566
# Verify snake_case fields are NOT present
564567
assert "api_version" not in parsed["metadata"]
565568
assert "created_on" not in parsed["metadata"]
566-
569+
567570
# Check variables use camelCase
568571
for var in parsed["variables"]:
569572
assert "dataType" in var
570573
assert "data_type" not in var
571-
574+
572575
# Check nested metadata if present
573576
if "metadata" in var and "chunkGrid" in var["metadata"]:
574577
assert "chunkGrid" in var["metadata"]

uv.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)