|
1 | 1 | """Test the schema for the v1 dataset.""" |
2 | 2 |
|
| 3 | +import copy |
| 4 | +import json |
| 5 | +from pathlib import Path |
| 6 | + |
| 7 | +import pytest |
| 8 | +from pydantic import ValidationError |
| 9 | + |
3 | 10 | from mdio.schemas.v1 import Dataset as V1Dataset |
4 | 11 |
|
5 | 12 | TEST_SCHEMA = { |
|
51 | 58 | def test_dataset_schema_validation() -> None: |
52 | 59 | """Test that the dataset schema validates correctly.""" |
53 | 60 | V1Dataset.model_validate(TEST_SCHEMA) |
| 61 | + |
| 62 | + |
| 63 | +class TestV1DatasetJSONSerialization: |
| 64 | + """Test JSON serialization capabilities of V1Dataset using Pydantic methods.""" |
| 65 | + |
| 66 | + @pytest.fixture |
| 67 | + def sample_dataset(self) -> V1Dataset: |
| 68 | + """Create a sample V1Dataset for testing.""" |
| 69 | + # Use a deep copy to avoid test interference |
| 70 | + return V1Dataset.model_validate(copy.deepcopy(TEST_SCHEMA)) |
| 71 | + |
| 72 | + def test_model_dump_json_default_camel_case(self, sample_dataset: V1Dataset) -> None: |
| 73 | + """Test that JSON serialization uses camelCase by default.""" |
| 74 | + json_str = sample_dataset.model_dump_json(by_alias=True) |
| 75 | + |
| 76 | + print(json_str) |
| 77 | + |
| 78 | + # Should be valid JSON |
| 79 | + parsed = json.loads(json_str) |
| 80 | + assert isinstance(parsed, dict) |
| 81 | + |
| 82 | + # Should contain expected top-level keys |
| 83 | + assert "metadata" in parsed |
| 84 | + assert "variables" in parsed |
| 85 | + |
| 86 | + # Metadata should have expected fields |
| 87 | + assert parsed["metadata"]["name"] == "test_dataset" |
| 88 | + assert parsed["metadata"]["apiVersion"] == "1.0.0" |
| 89 | + assert parsed["metadata"]["createdOn"] == "2023-01-01T00:00:00Z" |
| 90 | + |
| 91 | + # Should have 4 variables |
| 92 | + assert len(parsed["variables"]) == 4 # noqa: PLR2004 |
| 93 | + |
| 94 | + def test_model_dump_json_exclude_none(self, sample_dataset: V1Dataset) -> None: |
| 95 | + """Test JSON serialization excluding None values.""" |
| 96 | + json_str = sample_dataset.model_dump_json(exclude_none=True) |
| 97 | + parsed = json.loads(json_str) # noqa: F841 |
| 98 | + |
| 99 | + # Should not contain null values in the JSON |
| 100 | + json_str_lower = json_str.lower() |
| 101 | + assert "null" not in json_str_lower |
| 102 | + |
| 103 | + def test_model_validate_json_basic(self) -> None: |
| 104 | + """Test basic JSON deserialization using model_validate_json.""" |
| 105 | + json_str = json.dumps(TEST_SCHEMA) |
| 106 | + dataset = V1Dataset.model_validate_json(json_str) |
| 107 | + |
| 108 | + assert dataset.metadata.name == "test_dataset" |
| 109 | + assert dataset.metadata.api_version == "1.0.0" |
| 110 | + assert len(dataset.variables) == 4 # noqa: PLR2004 |
| 111 | + |
| 112 | + # Check first variable |
| 113 | + var = dataset.variables[0] |
| 114 | + assert var.name == "actual_variable" |
| 115 | + assert var.data_type.value == "float32" |
| 116 | + assert var.dimensions == ["dim0", "dim1"] |
| 117 | + |
| 118 | + def test_model_validate_json_invalid(self) -> None: |
| 119 | + """Test JSON deserialization with invalid data.""" |
| 120 | + invalid_json = '{"metadata": {"name": "test"}, "variables": []}' |
| 121 | + |
| 122 | + with pytest.raises(ValidationError) as exc_info: |
| 123 | + V1Dataset.model_validate_json(invalid_json) |
| 124 | + |
| 125 | + # Should have validation errors |
| 126 | + errors = exc_info.value.errors() |
| 127 | + assert len(errors) > 0 |
| 128 | + |
| 129 | + def test_model_validate_json_malformed(self) -> None: |
| 130 | + """Test JSON deserialization with malformed JSON.""" |
| 131 | + malformed_json = '{"metadata": {"name": "test"' # Missing closing braces |
| 132 | + |
| 133 | + with pytest.raises(ValidationError): |
| 134 | + V1Dataset.model_validate_json(malformed_json) |
| 135 | + |
| 136 | + def test_json_schema_generation(self) -> None: |
| 137 | + """Test JSON schema generation using model_json_schema.""" |
| 138 | + schema = V1Dataset.model_json_schema() |
| 139 | + |
| 140 | + # Should be a valid JSON schema |
| 141 | + assert isinstance(schema, dict) |
| 142 | + assert schema["type"] == "object" |
| 143 | + assert "properties" in schema |
| 144 | + |
| 145 | + # Should have metadata and variables properties |
| 146 | + properties = schema["properties"] |
| 147 | + assert "metadata" in properties |
| 148 | + assert "variables" in properties |
| 149 | + |
| 150 | + # Should have required fields |
| 151 | + assert "required" in schema |
| 152 | + required = schema["required"] |
| 153 | + assert "metadata" in required |
| 154 | + assert "variables" in required |
| 155 | + |
| 156 | + def test_json_schema_with_mode(self) -> None: |
| 157 | + """Test JSON schema generation with different modes.""" |
| 158 | + # Test validation mode (default) |
| 159 | + validation_schema = V1Dataset.model_json_schema(mode="validation") |
| 160 | + assert "properties" in validation_schema |
| 161 | + |
| 162 | + # Test serialization mode |
| 163 | + serialization_schema = V1Dataset.model_json_schema(mode="serialization") |
| 164 | + assert "properties" in serialization_schema |
| 165 | + |
| 166 | + def test_round_trip_consistency_default(self, sample_dataset: V1Dataset) -> None: |
| 167 | + """Test that serialization -> deserialization preserves data (default camelCase).""" |
| 168 | + # Export to JSON (default camelCase) |
| 169 | + json_str = sample_dataset.model_dump_json() |
| 170 | + |
| 171 | + # Import from JSON |
| 172 | + restored_dataset = V1Dataset.model_validate_json(json_str) |
| 173 | + |
| 174 | + # Export again |
| 175 | + json_str2 = restored_dataset.model_dump_json() |
| 176 | + |
| 177 | + # Should be identical |
| 178 | + assert json_str == json_str2 |
| 179 | + |
| 180 | + # Key properties should match |
| 181 | + assert sample_dataset.metadata.name == restored_dataset.metadata.name |
| 182 | + assert sample_dataset.metadata.api_version == restored_dataset.metadata.api_version |
| 183 | + assert len(sample_dataset.variables) == len(restored_dataset.variables) |
| 184 | + |
| 185 | + # Variables should match |
| 186 | + for orig_var, restored_var in zip( |
| 187 | + sample_dataset.variables, restored_dataset.variables, strict=False |
| 188 | + ): |
| 189 | + assert orig_var.name == restored_var.name |
| 190 | + assert orig_var.data_type == restored_var.data_type |
| 191 | + assert orig_var.dimensions == restored_var.dimensions |
| 192 | + |
| 193 | + def test_round_trip_with_aliases(self, sample_dataset: V1Dataset) -> None: |
| 194 | + """Test round-trip consistency when using aliases.""" |
| 195 | + # Export with aliases (should be default now) |
| 196 | + json_str = sample_dataset.model_dump_json() |
| 197 | + |
| 198 | + # Import (should handle aliases automatically) |
| 199 | + restored_dataset = V1Dataset.model_validate_json(json_str) |
| 200 | + |
| 201 | + # Should preserve data |
| 202 | + assert sample_dataset.metadata.name == restored_dataset.metadata.name |
| 203 | + assert len(sample_dataset.variables) == len(restored_dataset.variables) |
| 204 | + |
| 205 | + def test_json_file_operations(self, sample_dataset: V1Dataset, tmp_path: Path) -> None: |
| 206 | + """Test JSON serialization to/from files.""" |
| 207 | + json_file = tmp_path / "test_dataset.json" |
| 208 | + |
| 209 | + # Write to file (using default camelCase) |
| 210 | + json_str = sample_dataset.model_dump_json(indent=2) |
| 211 | + json_file.write_text(json_str, encoding="utf-8") |
| 212 | + |
| 213 | + # Verify file exists and has content |
| 214 | + assert json_file.exists() |
| 215 | + assert json_file.stat().st_size > 0 |
| 216 | + |
| 217 | + # Read from file |
| 218 | + file_content = json_file.read_text(encoding="utf-8") |
| 219 | + restored_dataset = V1Dataset.model_validate_json(file_content) |
| 220 | + |
| 221 | + # Should match original |
| 222 | + assert sample_dataset.metadata.name == restored_dataset.metadata.name |
| 223 | + assert len(sample_dataset.variables) == len(restored_dataset.variables) |
| 224 | + |
| 225 | + def test_json_validation_without_instantiation(self) -> None: |
| 226 | + """Test JSON validation without creating a dataset instance.""" |
| 227 | + valid_json = json.dumps(TEST_SCHEMA) |
| 228 | + |
| 229 | + # This should not raise an exception |
| 230 | + try: |
| 231 | + V1Dataset.model_validate_json(valid_json) |
| 232 | + validation_passed = True |
| 233 | + except ValidationError: |
| 234 | + validation_passed = False |
| 235 | + |
| 236 | + assert validation_passed |
| 237 | + |
| 238 | + def test_partial_json_validation(self) -> None: |
| 239 | + """Test validation of partial/incomplete JSON data.""" |
| 240 | + # Missing required fields |
| 241 | + incomplete_schema = { |
| 242 | + "metadata": { |
| 243 | + "name": "test_dataset", |
| 244 | + # Missing apiVersion and createdOn |
| 245 | + }, |
| 246 | + "variables": [], |
| 247 | + } |
| 248 | + |
| 249 | + with pytest.raises(ValidationError) as exc_info: |
| 250 | + V1Dataset.model_validate_json(json.dumps(incomplete_schema)) |
| 251 | + |
| 252 | + errors = exc_info.value.errors() |
| 253 | + # Should have errors for missing required fields |
| 254 | + error_fields = {error["loc"][-1] for error in errors} |
| 255 | + assert "apiVersion" in error_fields or "api_version" in error_fields |
| 256 | + |
| 257 | + def test_json_with_extra_fields(self) -> None: |
| 258 | + """Test JSON deserialization with extra fields.""" |
| 259 | + # Use a copy to avoid modifying the global TEST_SCHEMA |
| 260 | + schema_with_extra = copy.deepcopy(TEST_SCHEMA) |
| 261 | + schema_with_extra["extra_field"] = "should_be_ignored" |
| 262 | + schema_with_extra["metadata"]["extra_metadata"] = "also_ignored" |
| 263 | + |
| 264 | + # Should raise ValidationError because extra fields are forbidden |
| 265 | + with pytest.raises(ValidationError) as exc_info: |
| 266 | + V1Dataset.model_validate_json(json.dumps(schema_with_extra)) |
| 267 | + |
| 268 | + # Should have error about extra fields |
| 269 | + errors = exc_info.value.errors() |
| 270 | + assert any("extra_forbidden" in str(error) for error in errors) |
| 271 | + |
| 272 | + def test_json_schema_contains_examples(self) -> None: |
| 273 | + """Test that generated JSON schema contains useful information.""" |
| 274 | + schema = V1Dataset.model_json_schema() |
| 275 | + |
| 276 | + # Should have descriptions for properties |
| 277 | + properties = schema.get("properties", {}) |
| 278 | + if "metadata" in properties: |
| 279 | + # Check if metadata has some schema information |
| 280 | + metadata_schema = properties["metadata"] |
| 281 | + assert isinstance(metadata_schema, dict) |
| 282 | + |
| 283 | + if "variables" in properties: |
| 284 | + # Check if variables has some schema information |
| 285 | + variables_schema = properties["variables"] |
| 286 | + assert isinstance(variables_schema, dict) |
| 287 | + assert variables_schema.get("type") == "array" |
| 288 | + |
| 289 | + def test_json_serialization_performance(self, sample_dataset: V1Dataset) -> None: |
| 290 | + """Test that JSON serialization is reasonably performant.""" |
| 291 | + import time |
| 292 | + |
| 293 | + # Time multiple serializations |
| 294 | + start_time = time.time() |
| 295 | + for _ in range(100): |
| 296 | + json_str = sample_dataset.model_dump_json() |
| 297 | + end_time = time.time() |
| 298 | + |
| 299 | + # Should complete 100 serializations in reasonable time (< 1 second) |
| 300 | + elapsed = end_time - start_time |
| 301 | + assert elapsed < 1.0 |
| 302 | + |
| 303 | + # Verify the JSON is still valid |
| 304 | + parsed = json.loads(json_str) |
| 305 | + assert parsed["metadata"]["name"] == "test_dataset" |
0 commit comments