Skip to content

Commit f450c8c

Browse files
authored
fix: use same base type for all components (#10)
Signed-off-by: Michele Dolfi <[email protected]>
1 parent 3f81690 commit f450c8c

File tree

5 files changed

+559
-551
lines changed

5 files changed

+559
-551
lines changed

docling_core/types/doc/base.py

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -128,37 +128,28 @@ class GlmTableCell(TableCell):
128128
)
129129

130130

131-
class Table(AliasModel):
132-
"""Table."""
131+
class BaseCell(AliasModel):
132+
"""Base cell."""
133133

134-
num_cols: int = Field(alias="#-cols")
135-
num_rows: int = Field(alias="#-rows")
136134
bounding_box: Optional[BoundingBoxContainer] = Field(
137135
default=None, alias="bounding-box", json_schema_extra=es_field(suppress=True)
138136
)
139-
data: Optional[list[list[Union[GlmTableCell, TableCell]]]] = None
140-
model: Optional[str] = None
141137
prov: Optional[list[Prov]] = None
142138
text: Optional[str] = Field(
143139
default=None, json_schema_extra=es_field(term_vector="with_positions_offsets")
144140
)
145141
obj_type: str = Field(
146-
alias="type",
147-
json_schema_extra=es_field(type="keyword", ignore_above=8191),
142+
alias="type", json_schema_extra=es_field(type="keyword", ignore_above=8191)
148143
)
149144

150145

151-
class BaseCell(AliasModel):
152-
"""Base cell."""
146+
class Table(BaseCell):
147+
"""Table."""
153148

154-
bounding_box: Optional[BoundingBoxContainer] = Field(
155-
default=None, alias="bounding-box", json_schema_extra=es_field(suppress=True)
156-
)
157-
prov: Optional[list[Prov]] = None
158-
text: Optional[str] = None
159-
obj_type: str = Field(
160-
alias="type", json_schema_extra=es_field(type="keyword", ignore_above=8191)
161-
)
149+
num_cols: int = Field(alias="#-cols")
150+
num_rows: int = Field(alias="#-rows")
151+
data: Optional[list[list[Union[GlmTableCell, TableCell]]]] = None
152+
model: Optional[str] = None
162153

163154

164155
class BaseText(AliasModel):

docling_core/types/doc/document.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -393,14 +393,28 @@ def from_dict(cls, data):
393393

394394
return data
395395

396-
def _resolve_ref(self, item: Ref) -> Optional[Table]:
397-
"""Return the resolved reference in case of table reference, otherwise None."""
398-
result: Optional[Table] = None
396+
def _resolve_ref(self, item: Ref) -> Optional[Union[BaseCell, BaseText]]:
397+
"""Return the resolved reference.
399398
400-
# NOTE: currently only resolves table refs & makes assumptions on ref parts
399+
Resolved the Ref object within the document.
400+
If the object is not found, None is returned.
401+
"""
402+
result: Optional[Union[BaseCell, BaseText]] = None
403+
404+
# NOTE: currently only resolves refs explicitely, such that we can make
405+
# assumptions on ref parts
401406
if item.obj_type == "table" and self.tables:
402407
parts = item.ref.split("/")
403408
result = self.tables[int(parts[2])]
409+
elif item.obj_type == "figure" and self.figures:
410+
parts = item.ref.split("/")
411+
result = self.figures[int(parts[2])]
412+
elif item.obj_type == "equation" and self.equations:
413+
parts = item.ref.split("/")
414+
result = self.equations[int(parts[2])]
415+
elif item.obj_type == "footnote" and self.footnotes:
416+
parts = item.ref.split("/")
417+
result = self.footnotes[int(parts[2])]
404418

405419
return result
406420

docs/Document.json

Lines changed: 39 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,8 @@
236236
}
237237
],
238238
"default": null,
239-
"title": "Text"
239+
"title": "Text",
240+
"x-es-term_vector": "with_positions_offsets"
240241
},
241242
"type": {
242243
"title": "Type",
@@ -1571,14 +1572,6 @@
15711572
"Table": {
15721573
"description": "Table.",
15731574
"properties": {
1574-
"#-cols": {
1575-
"title": "#-Cols",
1576-
"type": "integer"
1577-
},
1578-
"#-rows": {
1579-
"title": "#-Rows",
1580-
"type": "integer"
1581-
},
15821575
"bounding-box": {
15831576
"anyOf": [
15841577
{
@@ -1591,21 +1584,11 @@
15911584
"default": null,
15921585
"x-es-suppress": true
15931586
},
1594-
"data": {
1587+
"prov": {
15951588
"anyOf": [
15961589
{
15971590
"items": {
1598-
"items": {
1599-
"anyOf": [
1600-
{
1601-
"$ref": "#/$defs/GlmTableCell"
1602-
},
1603-
{
1604-
"$ref": "#/$defs/TableCell"
1605-
}
1606-
]
1607-
},
1608-
"type": "array"
1591+
"$ref": "#/$defs/Prov"
16091592
},
16101593
"type": "array"
16111594
},
@@ -1614,9 +1597,9 @@
16141597
}
16151598
],
16161599
"default": null,
1617-
"title": "Data"
1600+
"title": "Prov"
16181601
},
1619-
"model": {
1602+
"text": {
16201603
"anyOf": [
16211604
{
16221605
"type": "string"
@@ -1626,13 +1609,38 @@
16261609
}
16271610
],
16281611
"default": null,
1629-
"title": "Model"
1612+
"title": "Text",
1613+
"x-es-term_vector": "with_positions_offsets"
16301614
},
1631-
"prov": {
1615+
"type": {
1616+
"title": "Type",
1617+
"type": "string",
1618+
"x-es-ignore_above": 8191,
1619+
"x-es-type": "keyword"
1620+
},
1621+
"#-cols": {
1622+
"title": "#-Cols",
1623+
"type": "integer"
1624+
},
1625+
"#-rows": {
1626+
"title": "#-Rows",
1627+
"type": "integer"
1628+
},
1629+
"data": {
16321630
"anyOf": [
16331631
{
16341632
"items": {
1635-
"$ref": "#/$defs/Prov"
1633+
"items": {
1634+
"anyOf": [
1635+
{
1636+
"$ref": "#/$defs/GlmTableCell"
1637+
},
1638+
{
1639+
"$ref": "#/$defs/TableCell"
1640+
}
1641+
]
1642+
},
1643+
"type": "array"
16361644
},
16371645
"type": "array"
16381646
},
@@ -1641,9 +1649,9 @@
16411649
}
16421650
],
16431651
"default": null,
1644-
"title": "Prov"
1652+
"title": "Data"
16451653
},
1646-
"text": {
1654+
"model": {
16471655
"anyOf": [
16481656
{
16491657
"type": "string"
@@ -1653,20 +1661,13 @@
16531661
}
16541662
],
16551663
"default": null,
1656-
"title": "Text",
1657-
"x-es-term_vector": "with_positions_offsets"
1658-
},
1659-
"type": {
1660-
"title": "Type",
1661-
"type": "string",
1662-
"x-es-ignore_above": 8191,
1663-
"x-es-type": "keyword"
1664+
"title": "Model"
16641665
}
16651666
},
16661667
"required": [
1668+
"type",
16671669
"#-cols",
1668-
"#-rows",
1669-
"type"
1670+
"#-rows"
16701671
],
16711672
"title": "Table",
16721673
"type": "object"

0 commit comments

Comments
 (0)