Skip to content

Commit fa41138

Browse files
committed
#276 - Cannot deserialize from JSON Cas if child type comes before super type
- Use toposort when deserializing JSON type systems
1 parent 26d12b0 commit fa41138

3 files changed

Lines changed: 71 additions & 12 deletions

File tree

cassis/json.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,19 @@ def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] =
7676

7777
embedded_typesystem = TypeSystem()
7878
json_typesystem = data.get(TYPES_FIELD)
79-
# First load all the types but no features since features of a type X might be of a later loaded type Y
79+
80+
# First, build a dependency graph to support cases where a child type is defined before its super type
81+
type_dependencies = defaultdict(set)
8082
for type_name, json_type in json_typesystem.items():
81-
self._parse_type(embedded_typesystem, type_name, json_type)
83+
type_dependencies[type_name].add(json_type[SUPER_TYPE_FIELD])
84+
85+
# Second, load all the types but no features since features of a type X might be of a later loaded type Y
86+
for type_name in toposort_flatten(type_dependencies):
87+
if is_predefined(type_name):
88+
continue
89+
90+
self._parse_type(embedded_typesystem, type_name, json_typesystem[type_name])
91+
8292
# Now we are sure we know all the types, we can create the features
8393
for type_name, json_type in json_typesystem.items():
8494
self._parse_features(embedded_typesystem, type_name, json_type)
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{
2+
"%TYPES": {
3+
"example.ChildType": {
4+
"%NAME": "example.ChildType",
5+
"%SUPER_TYPE": "example.ParentType",
6+
"childFeature": {
7+
"%NAME": "childFeature",
8+
"%RANGE": "uima.cas.Integer"
9+
}
10+
},
11+
"example.ParentType": {
12+
"%NAME": "example.ParentType",
13+
"%SUPER_TYPE": "uima.tcas.Annotation",
14+
"parentFeature": {
15+
"%NAME": "parentFeature",
16+
"%RANGE": "uima.cas.String"
17+
}
18+
}
19+
},
20+
"%FEATURE_STRUCTURES": [
21+
{
22+
"%ID": 1,
23+
"%TYPE": "uima.cas.Sofa",
24+
"sofaNum": 1,
25+
"sofaID": "_InitialView"
26+
},
27+
{
28+
"%ID": 2,
29+
"%TYPE": "example.ChildType",
30+
"childFeature": "child",
31+
"parentFeature": "parent",
32+
"@sofa": 1
33+
}
34+
],
35+
"%VIEWS": {
36+
"_InitialView": {
37+
"%SOFA": 1,
38+
"%MEMBERS": [
39+
2
40+
]
41+
}
42+
}
43+
}

tests/test_json.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,16 @@
55
from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator, MultiTypeRandomCasGenerator
66
from tests.util import assert_json_equal
77

8-
FIXTURE_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_files", "json", "fs_as_array", "ser-ref")
8+
FIXTURE_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_files", "json")
9+
SER_REF_DIR = os.path.join(FIXTURE_DIR, "fs_as_array", "ser-ref")
910

1011
FIXTURES = [
11-
(os.path.join(FIXTURE_DIR, "casWithSofaDataArray"), []),
12-
(os.path.join(FIXTURE_DIR, "casWithSofaDataURI"), []),
13-
(os.path.join(FIXTURE_DIR, "casWithFloatingPointSpecialValues"), []),
14-
(os.path.join(FIXTURE_DIR, "casWithText"), [["uima.tcas.DocumentAnnotation", 0, 15, "This is a test."]]),
12+
(os.path.join(SER_REF_DIR, "casWithSofaDataArray"), []),
13+
(os.path.join(SER_REF_DIR, "casWithSofaDataURI"), []),
14+
(os.path.join(SER_REF_DIR, "casWithFloatingPointSpecialValues"), []),
15+
(os.path.join(SER_REF_DIR, "casWithText"), [["uima.tcas.DocumentAnnotation", 0, 15, "This is a test."]]),
1516
(
16-
os.path.join(FIXTURE_DIR, "casWithoutTextButWithAnnotations"),
17+
os.path.join(SER_REF_DIR, "casWithoutTextButWithAnnotations"),
1718
[
1819
["uima.tcas.Annotation", 0, 4, None],
1920
["uima.tcas.Annotation", 5, 7, None],
@@ -22,7 +23,7 @@
2223
],
2324
),
2425
(
25-
os.path.join(FIXTURE_DIR, "casWithTextAndAnnotations"),
26+
os.path.join(SER_REF_DIR, "casWithTextAndAnnotations"),
2627
[
2728
["uima.tcas.Annotation", 0, 4, "This"],
2829
["uima.tcas.Annotation", 5, 7, "is"],
@@ -32,7 +33,7 @@
3233
],
3334
),
3435
(
35-
os.path.join(FIXTURE_DIR, "casWithEmojiUnicodeTextAndAnnotations"),
36+
os.path.join(SER_REF_DIR, "casWithEmojiUnicodeTextAndAnnotations"),
3637
[
3738
["uima.tcas.Annotation", 0, 1, "🥳", b"\xf0\x9f\xa5\xb3"],
3839
["uima.tcas.Annotation", 2, 6, "This"],
@@ -59,15 +60,15 @@
5960
],
6061
),
6162
(
62-
os.path.join(FIXTURE_DIR, "casWithLeftToRightTextAndAnnotations"),
63+
os.path.join(SER_REF_DIR, "casWithLeftToRightTextAndAnnotations"),
6364
[
6465
["uima.tcas.Annotation", 0, 3, "هذا"],
6566
["uima.tcas.Annotation", 4, 10, "اختبار"],
6667
["uima.tcas.DocumentAnnotation", 0, 10, "هذا اختبار"],
6768
],
6869
),
6970
(
70-
os.path.join(FIXTURE_DIR, "casWithTraditionalChineseTextAndAnnotations"),
71+
os.path.join(SER_REF_DIR, "casWithTraditionalChineseTextAndAnnotations"),
7172
[
7273
["uima.tcas.Annotation", 0, 1, "這"],
7374
["uima.tcas.Annotation", 1, 2, "是"],
@@ -172,3 +173,8 @@ def test_recursive_type_system():
172173
assert target_type_a.get_feature("typeB").rangeType.name == target_type_b.name
173174
assert target_type_b is not None
174175
assert target_type_b.get_feature("typeA").rangeType.name == target_type_a.name
176+
177+
178+
def test_deserializing_type_system_if_child_type_is_defined_before_supertype():
179+
with open(os.path.join(FIXTURE_DIR, "child_type_before_parent.json"), "rb") as f:
180+
load_cas_from_json(f)

0 commit comments

Comments
 (0)