Skip to content

Commit 8fa4b68

Browse files
committed
Add JSON serialization test
1 parent 046a73c commit 8fa4b68

File tree

1 file changed

+252
-0
lines changed

1 file changed

+252
-0
lines changed

tests/unit/test_schema.py

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
"""Test the schema for the v1 dataset."""
22

3+
import copy
4+
import json
5+
from pathlib import Path
6+
7+
import pytest
8+
from pydantic import ValidationError
9+
310
from mdio.schemas.v1 import Dataset as V1Dataset
411

512
TEST_SCHEMA = {
@@ -51,3 +58,248 @@
5158
def test_dataset_schema_validation() -> None:
5259
"""Test that the dataset schema validates correctly."""
5360
V1Dataset.model_validate(TEST_SCHEMA)
61+
62+
63+
class TestV1DatasetJSONSerialization:
64+
"""Test JSON serialization capabilities of V1Dataset using Pydantic methods."""
65+
66+
@pytest.fixture
67+
def sample_dataset(self) -> V1Dataset:
68+
"""Create a sample V1Dataset for testing."""
69+
# Use a deep copy to avoid test interference
70+
return V1Dataset.model_validate(copy.deepcopy(TEST_SCHEMA))
71+
72+
def test_model_dump_json_default_camel_case(self, sample_dataset: V1Dataset) -> None:
73+
"""Test that JSON serialization uses camelCase by default."""
74+
json_str = sample_dataset.model_dump_json(by_alias=True)
75+
76+
print(json_str)
77+
78+
# Should be valid JSON
79+
parsed = json.loads(json_str)
80+
assert isinstance(parsed, dict)
81+
82+
# Should contain expected top-level keys
83+
assert "metadata" in parsed
84+
assert "variables" in parsed
85+
86+
# Metadata should have expected fields
87+
assert parsed["metadata"]["name"] == "test_dataset"
88+
assert parsed["metadata"]["apiVersion"] == "1.0.0"
89+
assert parsed["metadata"]["createdOn"] == "2023-01-01T00:00:00Z"
90+
91+
# Should have 4 variables
92+
assert len(parsed["variables"]) == 4 # noqa: PLR2004
93+
94+
def test_model_dump_json_exclude_none(self, sample_dataset: V1Dataset) -> None:
95+
"""Test JSON serialization excluding None values."""
96+
json_str = sample_dataset.model_dump_json(exclude_none=True)
97+
parsed = json.loads(json_str) # noqa: F841
98+
99+
# Should not contain null values in the JSON
100+
json_str_lower = json_str.lower()
101+
assert "null" not in json_str_lower
102+
103+
def test_model_validate_json_basic(self) -> None:
104+
"""Test basic JSON deserialization using model_validate_json."""
105+
json_str = json.dumps(TEST_SCHEMA)
106+
dataset = V1Dataset.model_validate_json(json_str)
107+
108+
assert dataset.metadata.name == "test_dataset"
109+
assert dataset.metadata.api_version == "1.0.0"
110+
assert len(dataset.variables) == 4 # noqa: PLR2004
111+
112+
# Check first variable
113+
var = dataset.variables[0]
114+
assert var.name == "actual_variable"
115+
assert var.data_type.value == "float32"
116+
assert var.dimensions == ["dim0", "dim1"]
117+
118+
def test_model_validate_json_invalid(self) -> None:
119+
"""Test JSON deserialization with invalid data."""
120+
invalid_json = '{"metadata": {"name": "test"}, "variables": []}'
121+
122+
with pytest.raises(ValidationError) as exc_info:
123+
V1Dataset.model_validate_json(invalid_json)
124+
125+
# Should have validation errors
126+
errors = exc_info.value.errors()
127+
assert len(errors) > 0
128+
129+
def test_model_validate_json_malformed(self) -> None:
130+
"""Test JSON deserialization with malformed JSON."""
131+
malformed_json = '{"metadata": {"name": "test"' # Missing closing braces
132+
133+
with pytest.raises(ValidationError):
134+
V1Dataset.model_validate_json(malformed_json)
135+
136+
def test_json_schema_generation(self) -> None:
137+
"""Test JSON schema generation using model_json_schema."""
138+
schema = V1Dataset.model_json_schema()
139+
140+
# Should be a valid JSON schema
141+
assert isinstance(schema, dict)
142+
assert schema["type"] == "object"
143+
assert "properties" in schema
144+
145+
# Should have metadata and variables properties
146+
properties = schema["properties"]
147+
assert "metadata" in properties
148+
assert "variables" in properties
149+
150+
# Should have required fields
151+
assert "required" in schema
152+
required = schema["required"]
153+
assert "metadata" in required
154+
assert "variables" in required
155+
156+
def test_json_schema_with_mode(self) -> None:
157+
"""Test JSON schema generation with different modes."""
158+
# Test validation mode (default)
159+
validation_schema = V1Dataset.model_json_schema(mode="validation")
160+
assert "properties" in validation_schema
161+
162+
# Test serialization mode
163+
serialization_schema = V1Dataset.model_json_schema(mode="serialization")
164+
assert "properties" in serialization_schema
165+
166+
def test_round_trip_consistency_default(self, sample_dataset: V1Dataset) -> None:
167+
"""Test that serialization -> deserialization preserves data (default camelCase)."""
168+
# Export to JSON (default camelCase)
169+
json_str = sample_dataset.model_dump_json()
170+
171+
# Import from JSON
172+
restored_dataset = V1Dataset.model_validate_json(json_str)
173+
174+
# Export again
175+
json_str2 = restored_dataset.model_dump_json()
176+
177+
# Should be identical
178+
assert json_str == json_str2
179+
180+
# Key properties should match
181+
assert sample_dataset.metadata.name == restored_dataset.metadata.name
182+
assert sample_dataset.metadata.api_version == restored_dataset.metadata.api_version
183+
assert len(sample_dataset.variables) == len(restored_dataset.variables)
184+
185+
# Variables should match
186+
for orig_var, restored_var in zip(
187+
sample_dataset.variables, restored_dataset.variables, strict=False
188+
):
189+
assert orig_var.name == restored_var.name
190+
assert orig_var.data_type == restored_var.data_type
191+
assert orig_var.dimensions == restored_var.dimensions
192+
193+
def test_round_trip_with_aliases(self, sample_dataset: V1Dataset) -> None:
194+
"""Test round-trip consistency when using aliases."""
195+
# Export with aliases (should be default now)
196+
json_str = sample_dataset.model_dump_json()
197+
198+
# Import (should handle aliases automatically)
199+
restored_dataset = V1Dataset.model_validate_json(json_str)
200+
201+
# Should preserve data
202+
assert sample_dataset.metadata.name == restored_dataset.metadata.name
203+
assert len(sample_dataset.variables) == len(restored_dataset.variables)
204+
205+
def test_json_file_operations(self, sample_dataset: V1Dataset, tmp_path: Path) -> None:
206+
"""Test JSON serialization to/from files."""
207+
json_file = tmp_path / "test_dataset.json"
208+
209+
# Write to file (using default camelCase)
210+
json_str = sample_dataset.model_dump_json(indent=2)
211+
json_file.write_text(json_str, encoding="utf-8")
212+
213+
# Verify file exists and has content
214+
assert json_file.exists()
215+
assert json_file.stat().st_size > 0
216+
217+
# Read from file
218+
file_content = json_file.read_text(encoding="utf-8")
219+
restored_dataset = V1Dataset.model_validate_json(file_content)
220+
221+
# Should match original
222+
assert sample_dataset.metadata.name == restored_dataset.metadata.name
223+
assert len(sample_dataset.variables) == len(restored_dataset.variables)
224+
225+
def test_json_validation_without_instantiation(self) -> None:
226+
"""Test JSON validation without creating a dataset instance."""
227+
valid_json = json.dumps(TEST_SCHEMA)
228+
229+
# This should not raise an exception
230+
try:
231+
V1Dataset.model_validate_json(valid_json)
232+
validation_passed = True
233+
except ValidationError:
234+
validation_passed = False
235+
236+
assert validation_passed
237+
238+
def test_partial_json_validation(self) -> None:
239+
"""Test validation of partial/incomplete JSON data."""
240+
# Missing required fields
241+
incomplete_schema = {
242+
"metadata": {
243+
"name": "test_dataset",
244+
# Missing apiVersion and createdOn
245+
},
246+
"variables": [],
247+
}
248+
249+
with pytest.raises(ValidationError) as exc_info:
250+
V1Dataset.model_validate_json(json.dumps(incomplete_schema))
251+
252+
errors = exc_info.value.errors()
253+
# Should have errors for missing required fields
254+
error_fields = {error["loc"][-1] for error in errors}
255+
assert "apiVersion" in error_fields or "api_version" in error_fields
256+
257+
def test_json_with_extra_fields(self) -> None:
258+
"""Test JSON deserialization with extra fields."""
259+
# Use a copy to avoid modifying the global TEST_SCHEMA
260+
schema_with_extra = copy.deepcopy(TEST_SCHEMA)
261+
schema_with_extra["extra_field"] = "should_be_ignored"
262+
schema_with_extra["metadata"]["extra_metadata"] = "also_ignored"
263+
264+
# Should raise ValidationError because extra fields are forbidden
265+
with pytest.raises(ValidationError) as exc_info:
266+
V1Dataset.model_validate_json(json.dumps(schema_with_extra))
267+
268+
# Should have error about extra fields
269+
errors = exc_info.value.errors()
270+
assert any("extra_forbidden" in str(error) for error in errors)
271+
272+
def test_json_schema_contains_examples(self) -> None:
273+
"""Test that generated JSON schema contains useful information."""
274+
schema = V1Dataset.model_json_schema()
275+
276+
# Should have descriptions for properties
277+
properties = schema.get("properties", {})
278+
if "metadata" in properties:
279+
# Check if metadata has some schema information
280+
metadata_schema = properties["metadata"]
281+
assert isinstance(metadata_schema, dict)
282+
283+
if "variables" in properties:
284+
# Check if variables has some schema information
285+
variables_schema = properties["variables"]
286+
assert isinstance(variables_schema, dict)
287+
assert variables_schema.get("type") == "array"
288+
289+
def test_json_serialization_performance(self, sample_dataset: V1Dataset) -> None:
290+
"""Test that JSON serialization is reasonably performant."""
291+
import time
292+
293+
# Time multiple serializations
294+
start_time = time.time()
295+
for _ in range(100):
296+
json_str = sample_dataset.model_dump_json()
297+
end_time = time.time()
298+
299+
# Should complete 100 serializations in reasonable time (< 1 second)
300+
elapsed = end_time - start_time
301+
assert elapsed < 1.0
302+
303+
# Verify the JSON is still valid
304+
parsed = json.loads(json_str)
305+
assert parsed["metadata"]["name"] == "test_dataset"

0 commit comments

Comments
 (0)