Skip to content

Commit b6cee42

Browse files
authored
Merge pull request #1116 from benjeffery/struct-default
Make struct codec encode default values
2 parents e1eef38 + 8ccb607 commit b6cee42

File tree

4 files changed

+93
-9
lines changed

4 files changed

+93
-9
lines changed

docs/metadata.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,9 @@ This codec places extra restrictions on the schema:
8282
This sets the binary encoding used for the property.
8383

8484
#. All metadata objects must have fixed properties.
85-
This means that they can have no missing properties and have no additional
86-
properties not listed in the schema.
85+
This means that they can no additional properties not listed in the schema. Any
86+
property that does not have a `default` specified in the schema must be present.
87+
Default values will be encoded.
8788

8889
#. Arrays must be lists of homogeneous objects.
8990
For example, this is not valid::

python/CHANGELOG.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
``metadata={}`` is no longer needed as an argument when a schema is present
1616
(:user:`benjeffery`, :issue:`1084`).
1717

18+
- ``default`` in metadata schemas is used to fill in missing values when encoding for
19+
the struct codec. (:user:`benjeffery`, :issue:`1073`, :pr:`1116`).
1820

1921
--------------------
2022
[0.3.4] - 2020-12-02

python/tests/test_metadata.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,6 +1006,23 @@ def test_null_union_top_level(self):
10061006
assert ms.decode_row(ms.validate_and_encode_row(row_data)) == row_data
10071007
assert ms.decode_row(ms.validate_and_encode_row(None)) is None
10081008

1009+
def test_default_values(self):
1010+
schema = {
1011+
"codec": "struct",
1012+
"type": "object",
1013+
"properties": {
1014+
"int": {"type": "number", "binaryFormat": "b", "default": 42},
1015+
"float": {"type": "number", "binaryFormat": "d"},
1016+
},
1017+
}
1018+
ms = metadata.MetadataSchema(schema)
1019+
row_data = {"float": 5.5}
1020+
assert ms.validate_and_encode_row(row_data) == b"\x00\x00\x00\x00\x00\x00\x16@*"
1021+
assert ms.decode_row(ms.validate_and_encode_row(row_data)) == {
1022+
"float": 5.5,
1023+
"int": 42,
1024+
}
1025+
10091026

10101027
class TestStructCodecRoundTrip:
10111028
def round_trip(self, schema, row_data):
@@ -1494,6 +1511,37 @@ def test_additional_properties(self):
14941511
):
14951512
metadata.MetadataSchema(schema)
14961513

1514+
def test_unrequired_property_needs_default(self):
1515+
schema = {
1516+
"codec": "struct",
1517+
"type": "object",
1518+
"properties": {
1519+
"int": {"type": "number", "binaryFormat": "i"},
1520+
"float": {"type": "number", "binaryFormat": "d"},
1521+
},
1522+
"required": ["float"],
1523+
}
1524+
with pytest.raises(
1525+
exceptions.MetadataSchemaValidationError,
1526+
match="Optional property 'int' must have a default value",
1527+
):
1528+
metadata.MetadataSchema(schema)
1529+
1530+
def test_no_default_implies_required(self):
1531+
schema = {
1532+
"codec": "struct",
1533+
"type": "object",
1534+
"properties": {
1535+
"int": {"type": "number", "binaryFormat": "i", "default": 5},
1536+
"float": {"type": "number", "binaryFormat": "d"},
1537+
},
1538+
}
1539+
self.encode(schema, {"float": 5.5})
1540+
with pytest.raises(
1541+
exceptions.MetadataValidationError, match="'float' is a required property"
1542+
):
1543+
self.encode(schema, {})
1544+
14971545

14981546
class TestSLiMDecoding:
14991547
"""

python/tskit/metadata.py

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,21 @@ def binary_format_validator(validator, types, instance, schema):
163163
)
164164

165165

166+
def required_validator(validator, required, instance, schema):
167+
# Do the normal validation
168+
yield from jsonschema._validators.required(validator, required, instance, schema)
169+
170+
# For struct codec if a property is not required, then it must have a default
171+
for prop, sub_schema in instance["properties"].items():
172+
if prop not in instance["required"] and "default" not in sub_schema:
173+
yield jsonschema.ValidationError(
174+
f"Optional property '{prop}' must have" f" a default value"
175+
)
176+
177+
166178
StructCodecSchemaValidator = jsonschema.validators.extend(
167-
TSKITMetadataSchemaValidator, {"type": binary_format_validator}
179+
TSKITMetadataSchemaValidator,
180+
{"type": binary_format_validator, "required": required_validator},
168181
)
169182
META_SCHEMA: Mapping[str, Any] = copy.deepcopy(StructCodecSchemaValidator.META_SCHEMA)
170183
# No union types
@@ -410,9 +423,22 @@ def make_object_encode(cls, sub_schema):
410423
key: StructCodec.make_encode(prop)
411424
for key, prop in sub_schema["properties"].items()
412425
}
413-
return lambda obj: b"".join(
414-
sub_encoder(obj[key]) for key, sub_encoder in sub_encoders.items()
415-
)
426+
defaults = {
427+
key: prop["default"]
428+
for key, prop in sub_schema["properties"].items()
429+
if "default" in prop
430+
}
431+
432+
def object_encode(obj):
433+
values = []
434+
for key, sub_encoder in sub_encoders.items():
435+
try:
436+
values.append(sub_encoder(obj[key]))
437+
except KeyError:
438+
values.append(sub_encoder(defaults[key]))
439+
return b"".join(values)
440+
441+
return object_encode
416442

417443
@classmethod
418444
def make_object_or_null_encode(cls, sub_schema):
@@ -445,7 +471,7 @@ def make_numeric_encode(cls, sub_schema):
445471

446472
@classmethod
447473
def modify_schema(cls, schema: Mapping) -> Mapping:
448-
# This codec requires that all properties are required and additional ones
474+
# This codec requires that additional properties are
449475
# not allowed. Rather than get schema authors to repeat that everywhere
450476
# we add it here, sadly we can't do this in the metaschema as "default" isn't
451477
# used by the validator.
@@ -454,12 +480,19 @@ def enforce_fixed_properties(obj):
454480
return [enforce_fixed_properties(j) for j in obj]
455481
elif type(obj) == dict:
456482
ret = {k: enforce_fixed_properties(v) for k, v in obj.items()}
457-
if ret.get("type") == "object":
483+
if "object" in ret.get("type", []):
458484
if ret.get("additional_properties"):
459485
raise ValueError(
460486
"Struct codec does not support additional_properties"
461487
)
462-
ret["required"] = list(ret.get("properties", {}).keys())
488+
# To prevent authors having to list required properties the default
489+
# is that all without a default are required.
490+
if "required" not in ret:
491+
ret["required"] = [
492+
prop
493+
for prop, sub_schema in ret.get("properties", {}).items()
494+
if "default" not in sub_schema
495+
]
463496
ret["additionalProperties"] = False
464497
return ret
465498
else:

0 commit comments

Comments
 (0)