Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 1.0.36

* **Added Notion connector sync block handling by teddysupercuts**

## 1.0.35

* **Fix output path in blob storage destination connector**
Expand Down
2 changes: 1 addition & 1 deletion unstructured_ingest/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.0.35" # pragma: no cover
__version__ = "1.0.36" # pragma: no cover
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ def can_have_children() -> bool:

@classmethod
def from_dict(cls, data: dict):
"""Create OriginalSyncedBlock from dictionary data.

Original blocks contain children content.
"""
if "children" not in data:
raise ValueError(f"OriginalSyncedBlock data missing 'children': {data}")
return cls(children=data["children"])

def get_html(self) -> Optional[HtmlTag]:
Expand All @@ -31,27 +37,74 @@ class DuplicateSyncedBlock(BlockBase):

@staticmethod
def can_have_children() -> bool:
"""Check if duplicate synced blocks can have children.

Duplicate blocks themselves don't have children directly fetched here,
but they represent content that does, so Notion API might report has_children=True
on the parent block object. The actual children are fetched from the original block.
"""
return True

@classmethod
def from_dict(cls, data: dict):
return cls(**data)
"""Create DuplicateSyncedBlock from dictionary data.

Duplicate blocks contain a 'synced_from' reference.
"""
synced_from_data = data.get("synced_from")
if not synced_from_data or not isinstance(synced_from_data, dict):
raise ValueError(f"Invalid data structure for DuplicateSyncedBlock: {data}")
# Ensure required keys are present in the nested dictionary
if "type" not in synced_from_data or "block_id" not in synced_from_data:
raise ValueError(
f"Missing 'type' or 'block_id' in synced_from data: {synced_from_data}"
)
return cls(type=synced_from_data["type"], block_id=synced_from_data["block_id"])

def get_html(self) -> Optional[HtmlTag]:
"""Get HTML representation of the duplicate synced block.

HTML representation might need fetching the original block's content,
which is outside the scope of this simple data class.
"""
return None


class SyncBlock(BlockBase):
@staticmethod
def can_have_children() -> bool:
"""Check if synced blocks can have children.

Synced blocks (both original and duplicate) can conceptually have children.
"""
return True

@classmethod
def from_dict(cls, data: dict):
if "synced_from" in data:
"""Create appropriate SyncedBlock subclass from dictionary data.

Determine if it's a duplicate (has 'synced_from') or original (has 'children').
"""
if data.get("synced_from") is not None:
# It's a duplicate block containing a reference
return DuplicateSyncedBlock.from_dict(data)
elif "children" in data:
# It's an original block containing children
return OriginalSyncedBlock.from_dict(data)
else:
return DuplicateSyncedBlock.from_dict(data)
# Handle cases where neither 'synced_from' nor 'children' are present.
# Notion API might return this for an empty original synced block.
# Let's treat it as an empty OriginalSyncedBlock.
# If this assumption is wrong, errors might occur later.
# Consider logging a warning here if strictness is needed.
return OriginalSyncedBlock(children=[])


def get_html(self) -> Optional[HtmlTag]:
"""Get HTML representation of the synced block.

The specific instance returned by from_dict (Original or Duplicate)
will handle its own get_html logic.
This method on the base SyncBlock might not be directly called.
"""
return None
Loading