Skip to content

Commit 8c92be2

Browse files
authored
Prevent duplicated fields from parents in child types (#556)
1 parent 9f5180b commit 8c92be2

File tree

5 files changed

+87
-16
lines changed

5 files changed

+87
-16
lines changed

schema_salad/avro/schema.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -503,15 +503,6 @@ def make_field_objects(field_data: List[PropsType], names: Names) -> List[Field]
503503
new_field = Field(
504504
atype, name, has_default, default, order, names, doc, other_props
505505
)
506-
# make sure field name has not been used yet
507-
if new_field.name in parsed_fields:
508-
old_field = parsed_fields[new_field.name]
509-
if not is_subtype(old_field["type"], field["type"]):
510-
raise SchemaParseException(
511-
f"Field name {new_field.name} already in use with "
512-
"incompatible type. "
513-
f"{field['type']} vs {old_field['type']}."
514-
)
515506
parsed_fields[new_field.name] = field
516507
else:
517508
raise SchemaParseException(f"Not a valid field: {field}")

schema_salad/schema.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
)
3535

3636
from . import _logger, jsonld_context, ref_resolver, validate
37-
from .avro.schema import Names, SchemaParseException, make_avsc_object
37+
from .avro.schema import Names, SchemaParseException, make_avsc_object, is_subtype
3838
from .exceptions import (
3939
ClassValidationException,
4040
SchemaSaladException,
@@ -617,7 +617,7 @@ def extend_and_specialize(
617617
for spec in aslist(stype["specialize"]):
618618
specs[spec["specializeFrom"]] = spec["specializeTo"]
619619

620-
exfields = [] # type: List[str]
620+
exfields = [] # type: List[Any]
621621
exsym = [] # type: List[str]
622622
for ex in aslist(stype["extends"]):
623623
if ex not in types:
@@ -645,8 +645,46 @@ def extend_and_specialize(
645645

646646
if stype["type"] == "record":
647647
stype = copy.copy(stype)
648-
exfields.extend(stype.get("fields", []))
649-
stype["fields"] = exfields
648+
combined_fields = []
649+
fields = stype.get("fields", [])
650+
# We use short names here so that if a type inherits a field
651+
# (e.g. Child#id) from a parent (Parent#id) we avoid adding
652+
# the same field twice (previously we had just
653+
# ``exfields.extends(stype.fields)``).
654+
sns_fields = {shortname(field["name"]): field for field in fields}
655+
sns_exfields = {
656+
shortname(exfield["name"]): exfield for exfield in exfields
657+
}
658+
659+
# N.B.: This could be simpler. We could have a single loop
660+
# to create the list of fields. The reason for this more
661+
# convoluted solution is to make sure we keep the order
662+
# of ``exfields`` first, and then the type fields. That's
663+
# because we have unit tests that rely on the order that
664+
# fields are written. Codegen output changes as well.
665+
# We are relying on the insertion order preserving
666+
# property of python dicts (i.e. relyig on Py3.5+).
667+
668+
# First pass adding the exfields.
669+
for sn_exfield, exfield in sns_exfields.items():
670+
field = sns_fields.get(sn_exfield, None)
671+
if field is None:
672+
field = exfield
673+
else:
674+
# make sure field name has not been used yet
675+
if not is_subtype(exfield["type"], field["type"]):
676+
raise SchemaParseException(
677+
f"Field name {field['name']} already in use with "
678+
"incompatible type. "
679+
f"{field['type']} vs {exfield['type']}."
680+
)
681+
combined_fields.append(field)
682+
# Second pass, now add the ones that are specific to the subtype.
683+
for field in sns_fields.values():
684+
if field not in combined_fields:
685+
combined_fields.append(field)
686+
687+
stype["fields"] = combined_fields
650688

651689
fieldnames = set() # type: Set[str]
652690
for field in stype["fields"]:
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
- name: Parent
2+
doc: |
3+
Parent record
4+
documentRoot: true
5+
docChild:
6+
- "#Child"
7+
type: record
8+
fields:
9+
- name: id
10+
jsonldPredicate:
11+
_id: "#id"
12+
type: int
13+
doc: Parent ID
14+
15+
- name: Child
16+
doc: |
17+
Child record
18+
type: record
19+
extends: Parent
20+
fields:
21+
- name: id
22+
jsonldPredicate:
23+
_id: "#id"
24+
type: int
25+
doc: Child ID

schema_salad/tests/test_makedoc.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
"""Test schema-salad makedoc"""
2+
3+
from io import StringIO
4+
5+
from schema_salad.makedoc import makedoc
6+
7+
from .util import get_data
8+
9+
10+
def test_schema_salad_inherit_docs() -> None:
11+
"""Test schema-salad-doc when types inherit and override values from parent types."""
12+
schema_path = get_data("tests/inherited-attributes.yml")
13+
assert schema_path
14+
stdout = StringIO()
15+
makedoc(stdout, schema_path)
16+
17+
# The parent ID documentation (i.e. Parent ID) must appear exactly once.
18+
assert 1 == stdout.getvalue().count("Parent ID")

schema_salad/tests/test_subtypes.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,8 @@ def test_avro_loading_subtype_bad() -> None:
101101
path = get_data("tests/test_schema/avro_subtype_bad.yml")
102102
assert path
103103
target_error = (
104-
r"Union\s+item\s+must\s+be\s+a\s+valid\s+Avro\s+schema:\s+"
105-
r"Field\s+name\s+override_me\s+already\s+in\s+use\s+with\s+incompatible\s+"
106-
r"type\.\s+org\.w3id\.cwl\.salad\.Any\s+vs\s+\['string',\s+'int'\]\."
104+
r"Field name .*\/override_me already in use with incompatible type. "
105+
r"Any vs \['string', 'int'\]\."
107106
)
108107
with pytest.raises(SchemaParseException, match=target_error):
109108
document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema(

0 commit comments

Comments
 (0)