Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 1.0.28

### Enhancements

* **Added better syncblock handling in Notion connector**

## 1.0.27

### Fixes
Expand Down
2 changes: 1 addition & 1 deletion unstructured_ingest/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.0.27" # pragma: no cover
__version__ = "1.0.28" # pragma: no cover
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ def can_have_children() -> bool:

@classmethod
def from_dict(cls, data: dict):
# Original blocks contain children content
if "children" not in data:
raise ValueError(f"OriginalSyncedBlock data missing 'children': {data}")
return cls(children=data["children"])

def get_html(self) -> Optional[HtmlTag]:
Expand All @@ -31,27 +34,56 @@ class DuplicateSyncedBlock(BlockBase):

@staticmethod
def can_have_children() -> bool:
# Duplicate blocks themselves don't have children directly fetched here,
# but they represent content that does, so Notion API might report has_children=True
# on the parent block object. The actual children are fetched from the original block.
return True

@classmethod
def from_dict(cls, data: dict):
return cls(**data)
# Duplicate blocks contain a 'synced_from' reference
synced_from_data = data.get("synced_from")
if not synced_from_data or not isinstance(synced_from_data, dict):
raise ValueError(f"Invalid data structure for DuplicateSyncedBlock: {data}")
# Ensure required keys are present in the nested dictionary
if "type" not in synced_from_data or "block_id" not in synced_from_data:
raise ValueError(
f"Missing 'type' or 'block_id' in synced_from data: {synced_from_data}"
)
return cls(type=synced_from_data["type"], block_id=synced_from_data["block_id"])

def get_html(self) -> Optional[HtmlTag]:
# HTML representation might need fetching the original block's content,
# which is outside the scope of this simple data class.
return None


class SyncBlock(BlockBase):
@staticmethod
def can_have_children() -> bool:
# Synced blocks (both original and duplicate) can conceptually have children.
return True

@classmethod
def from_dict(cls, data: dict):
if "synced_from" in data:
# Determine if it's a duplicate (has 'synced_from') or original (has 'children')
if data.get("synced_from") is not None:
# It's a duplicate block containing a reference
return DuplicateSyncedBlock.from_dict(data)
elif "children" in data:
# It's an original block containing children
return OriginalSyncedBlock.from_dict(data)
else:
return DuplicateSyncedBlock.from_dict(data)
# Handle cases where neither 'synced_from' nor 'children' are present.
# Notion API might return this for an empty original synced block.
# Let's treat it as an empty OriginalSyncedBlock.
# If this assumption is wrong, errors might occur later.
# Consider logging a warning here if strictness is needed.
return OriginalSyncedBlock(children=[])


def get_html(self) -> Optional[HtmlTag]:
# The specific instance returned by from_dict (Original or Duplicate)
# will handle its own get_html logic.
# This method on the base SyncBlock might not be directly called.
return None
Loading