Skip to content

Commit 4ec3834

Browse files
fix: Notion connector missing database properties fields (#490)
* Add optional description field to notion database properties * Handle unsupported button db property * Test notion db properties mappings * Update changelog and version; Fix Notion connector missing database properties fields * add htmlBuilder dependency to test dependencies * add htmlBuilder dependency to test dependencies * Remove duplicated description property * Overwrite notion integration test fixtures
1 parent 88e3b33 commit 4ec3834

File tree

32 files changed

+167
-19
lines changed

32 files changed

+167
-19
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 1.0.22
2+
3+
* **Fix Notion connector missing database properties fields**
4+
15
## 1.0.21
26

37
* **Fix Jira connector cloud option not working issue**

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,14 +136,14 @@ test = [
136136
"deepdiff",
137137
"bs4",
138138
"pandas",
139-
140139
# Connector specific deps
141140
"cryptography",
142141
"fsspec",
143142
"vertexai",
144143
"pyiceberg",
145144
"pyarrow",
146-
"networkx"
145+
"networkx",
146+
"htmlbuilder",
147147
]
148148
# Add constraints needed for CI
149149
ci = [

test/integration/connectors/expected_results/notion_database/file_data/1722c3765a0a8082b382ebc2c62d3f4c.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
"database_id": "1722c3765a0a8082b382ebc2c62d3f4c"
1414
},
1515
"date_created": "2025-01-05T18:34:00.000Z",
16-
"date_modified": "2025-01-07T19:15:00.000Z",
17-
"date_processed": "1736277913.3980532",
16+
"date_modified": "2025-04-25T13:45:00.000Z",
17+
"date_processed": "1745588835.1260726",
1818
"permissions_data": null,
1919
"filesize_bytes": null
2020
},
@@ -24,7 +24,7 @@
2424
"object": "user"
2525
},
2626
"last_edited_by": {
27-
"id": "118d872b-594c-8171-b46f-00020d10d8b2",
27+
"id": "34c6c783-c141-44c9-930d-6f74dd8769c9",
2828
"object": "user"
2929
},
3030
"parent": {
@@ -34,6 +34,6 @@
3434
"url": "https://www.notion.so/1722c3765a0a8082b382ebc2c62d3f4c"
3535
},
3636
"reprocess": false,
37-
"local_download_path": "/private/var/folders/h7/n848df9s5yn7ml8rxb61vhyc0000gp/T/tmpxu906ary/1722c3765a0a8082b382ebc2c62d3f4c.html",
37+
"local_download_path": "/tmp/tmpopmj74lc/1722c3765a0a8082b382ebc2c62d3f4c.html",
3838
"display_name": null
3939
}

test/integration/connectors/expected_results/notion_page/downloads/1572c3765a0a806299f0dd6999f9e4c7.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
<div>
1717
testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2 testtext2
1818
</div>
19-
<img src='https://prod-files-secure.s3.us-west-2.amazonaws.com/9e97f74a-ce4a-43ae-b704-4b8501948642/902effc2-1280-4e9c-92cc-77d940b24ac0/image.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45FSPPWI6X%2F20250107%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20250107T192520Z&X-Amz-Expires=3600&X-Amz-Signature=b6f7eb5d579e9412619e94adee500e627aea915f8f5d8a9383ddd9b832fe2098&X-Amz-SignedHeaders=host&x-id=GetObject'/>
19+
<img src='https://prod-files-secure.s3.us-west-2.amazonaws.com/9e97f74a-ce4a-43ae-b704-4b8501948642/902effc2-1280-4e9c-92cc-77d940b24ac0/image.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=ASIAZI2LB466WLUW7QMG%2F20250425%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20250425T134727Z&X-Amz-Expires=3600&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEJb%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLXdlc3QtMiJHMEUCIEzyS69nxDRljZ06YqW%2FOmZ5O%2BDqX7%2BGapP7d%2BmEtS4vAiEAx3hj3IU3tEyLzKSItb%2BSBg7YbmlUpXSj9mnmCPazuJgq%2FwMILhAAGgw2Mzc0MjMxODM4MDUiDOB%2F9anCPuqKjN9CAircA%2Bj3YpyDwDTbmHqFJcdXi6kqOQnEOV%2FWRXvA0MMGE2BDpekp8c35XJrR9%2F8HgURE98viLioNEVPE1tL%2FHjggGePd1FNod5cYzW0ZAQSR6g4jvc4JFypzZUGCOKQjzRB98JfOoa0OZRscco8vdL3NxHyix9M8ULG6vPu%2BUSRZohSUtp75LUlRV%2FQv3rWUDqCfHS2yNFiaApcwFlXYuq%2B%2B2YDRWT7blDRArEnZAVlPerUb07vDpkdqI16YyP4nM%2FMAT79YcViVWsOGHtsXjqyOWKeW4T6XDx558YCudPVLXiJgg5vLqPrvI0pqnL4hHZ8urop0yTdGF2NZwyp4NORx7CUNCXiD%2B4NXsJvBWvHlQnQtM%2FLVDCjfxGYgE2k%2BHK7Jy4n%2Bm3bmnZbzPHSw5LNlXUE1Og%2Ft0sNcBaH4xLqpOJZfbCx36f8mt60zWRQXoR0t641oEoWcS9SWNFbtDs3EUTNq38M5KmBdZ18J6IGFcKSqLIhVXo9H4FAZL0gMETFvujo9ssAukwuweO4k11Eu1%2BrZpjd03Qv%2FTcRlyhhwLLeWjgTe6XslYRcMxSYsKWqgVjpf5jEvEfItcVURZmP40%2FY9Cha8LJp2eXyHL11WMPipugWGcdDIu3VS2YZGMMiZrsAGOqUBG5Id9hpzMlmW0EAc7YvSEBGxb%2BCkaR%2FkTslquCslUDdT92mTB2ss%2BDawd3afX4AlV8RX4mOcjIx41JEHEe9pGkGWOlLhkl7dmgfmiSZ0iaMEGZtSeeEu%2BAU4RDYJo4kqg%2F7vVSZDAeysJV1U%2Fn6brT9XPXwvV3lzPHda2egaX0g4OKBGzNyaT%2BPxsMjg6n2LHIfRKz%2BiXcBeDOov4gtjBvZMC2B%2B&X-Amz-Signature=45d59807b42ea635184e3f0ad03a3d659e019f1406673d062a74b3c3c4d81f1b&X-Amz-SignedHeaders=host&x-id=GetObject'/>
2020
<div>
2121
<ol style='margin-left: 0px'>
2222
<li>
@@ -137,7 +137,7 @@
137137
</td>
138138
</tr>
139139
</table>
140-
<img src='https://prod-files-secure.s3.us-west-2.amazonaws.com/9e97f74a-ce4a-43ae-b704-4b8501948642/bef64626-dfad-4bf2-9486-0fe380e90e4f/image.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45FSPPWI6X%2F20250107%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20250107T192520Z&X-Amz-Expires=3600&X-Amz-Signature=f1decc7c99b1fe97c02246260df176e49ec040f48d00e619166cc6a41e33ce4e&X-Amz-SignedHeaders=host&x-id=GetObject'/>
140+
<img src='https://prod-files-secure.s3.us-west-2.amazonaws.com/9e97f74a-ce4a-43ae-b704-4b8501948642/bef64626-dfad-4bf2-9486-0fe380e90e4f/image.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=ASIAZI2LB466WLUW7QMG%2F20250425%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20250425T134727Z&X-Amz-Expires=3600&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEJb%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLXdlc3QtMiJHMEUCIEzyS69nxDRljZ06YqW%2FOmZ5O%2BDqX7%2BGapP7d%2BmEtS4vAiEAx3hj3IU3tEyLzKSItb%2BSBg7YbmlUpXSj9mnmCPazuJgq%2FwMILhAAGgw2Mzc0MjMxODM4MDUiDOB%2F9anCPuqKjN9CAircA%2Bj3YpyDwDTbmHqFJcdXi6kqOQnEOV%2FWRXvA0MMGE2BDpekp8c35XJrR9%2F8HgURE98viLioNEVPE1tL%2FHjggGePd1FNod5cYzW0ZAQSR6g4jvc4JFypzZUGCOKQjzRB98JfOoa0OZRscco8vdL3NxHyix9M8ULG6vPu%2BUSRZohSUtp75LUlRV%2FQv3rWUDqCfHS2yNFiaApcwFlXYuq%2B%2B2YDRWT7blDRArEnZAVlPerUb07vDpkdqI16YyP4nM%2FMAT79YcViVWsOGHtsXjqyOWKeW4T6XDx558YCudPVLXiJgg5vLqPrvI0pqnL4hHZ8urop0yTdGF2NZwyp4NORx7CUNCXiD%2B4NXsJvBWvHlQnQtM%2FLVDCjfxGYgE2k%2BHK7Jy4n%2Bm3bmnZbzPHSw5LNlXUE1Og%2Ft0sNcBaH4xLqpOJZfbCx36f8mt60zWRQXoR0t641oEoWcS9SWNFbtDs3EUTNq38M5KmBdZ18J6IGFcKSqLIhVXo9H4FAZL0gMETFvujo9ssAukwuweO4k11Eu1%2BrZpjd03Qv%2FTcRlyhhwLLeWjgTe6XslYRcMxSYsKWqgVjpf5jEvEfItcVURZmP40%2FY9Cha8LJp2eXyHL11WMPipugWGcdDIu3VS2YZGMMiZrsAGOqUBG5Id9hpzMlmW0EAc7YvSEBGxb%2BCkaR%2FkTslquCslUDdT92mTB2ss%2BDawd3afX4AlV8RX4mOcjIx41JEHEe9pGkGWOlLhkl7dmgfmiSZ0iaMEGZtSeeEu%2BAU4RDYJo4kqg%2F7vVSZDAeysJV1U%2Fn6brT9XPXwvV3lzPHda2egaX0g4OKBGzNyaT%2BPxsMjg6n2LHIfRKz%2BiXcBeDOov4gtjBvZMC2B%2B&X-Amz-Signature=0e8cac29d83c138953bed589f69e160359037057306e043ecd1a5662b2087045&X-Amz-SignedHeaders=host&x-id=GetObject'/>
141141
<div>
142142
2 Columns in ColumnList
143143
</div>

test/integration/connectors/expected_results/notion_page/file_data/1572c3765a0a806299f0dd6999f9e4c7.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
},
1515
"date_created": "2024-12-09T18:13:00.000Z",
1616
"date_modified": "2025-01-07T19:24:00.000Z",
17-
"date_processed": "1736277919.434568",
17+
"date_processed": "1745588845.2252257",
1818
"permissions_data": null,
1919
"filesize_bytes": null
2020
},
@@ -34,6 +34,6 @@
3434
"url": "https://www.notion.so/test-doc1-1572c3765a0a806299f0dd6999f9e4c7"
3535
},
3636
"reprocess": false,
37-
"local_download_path": "/private/var/folders/h7/n848df9s5yn7ml8rxb61vhyc0000gp/T/tmpluf__jry/1572c3765a0a806299f0dd6999f9e4c7.html",
37+
"local_download_path": "/tmp/tmpb4ooe42f/1572c3765a0a806299f0dd6999f9e4c7.html",
3838
"display_name": null
3939
}

test/unit/connectors/notion/__init__.py

Whitespace-only changes.
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import pytest
2+
import pytest_mock
3+
4+
from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
5+
from unstructured_ingest.processes.connectors.notion.types.database_properties import (
6+
db_cell_type_mapping,
7+
db_prop_type_mapping,
8+
map_cells,
9+
map_properties,
10+
unsupported_db_prop_types,
11+
)
12+
13+
14+
def test_map_properties_success(mocker: pytest_mock.MockerFixture):
15+
mock_property = mocker.MagicMock(spec=DBPropertyBase)
16+
mock_property.from_dict = mocker.MagicMock(return_value=mock_property)
17+
mocker.patch.dict(db_prop_type_mapping, {"mock_type": mock_property})
18+
19+
props = {
20+
"property1": {"type": "mock_type", "data": "value1"},
21+
"property2": {"type": "mock_type", "data": "value2"},
22+
}
23+
result = map_properties(props)
24+
25+
assert len(result) == 2
26+
assert "property1" in result
27+
assert "property2" in result
28+
mock_property.from_dict.assert_any_call({"type": "mock_type", "data": "value1"})
29+
mock_property.from_dict.assert_any_call({"type": "mock_type", "data": "value2"})
30+
31+
32+
def test_map_properties_unsupported_type(caplog: pytest.LogCaptureFixture):
33+
props = {
34+
"property1": {"type": unsupported_db_prop_types[0], "data": "value1"},
35+
}
36+
result = map_properties(props)
37+
38+
assert result == {}
39+
assert "Unsupported property type 'button' for property 'property1'. Skipping." in caplog.text
40+
41+
42+
def test_map_properties_key_error():
43+
props = {
44+
"property1": {"data": "value1"}, # Missing "type" key
45+
}
46+
47+
with pytest.raises(KeyError, match="failed to map to associated database property"):
48+
map_properties(props)
49+
50+
51+
def test_map_properties_invalid_type():
52+
props = {
53+
"property1": {"type": "non_existent_type", "data": "value1"},
54+
}
55+
56+
with pytest.raises(KeyError, match="failed to map to associated database property"):
57+
map_properties(props)
58+
59+
60+
def test_map_cells_success(mocker: pytest_mock.MockerFixture):
61+
mock_property = mocker.MagicMock(spec=DBCellBase)
62+
mock_property.from_dict = mocker.MagicMock(return_value=mock_property)
63+
mocker.patch.dict(db_cell_type_mapping, {"mock_type": mock_property})
64+
65+
props = {
66+
"property1": {"type": "mock_type", "data": "value1"},
67+
"property2": {"type": "mock_type", "data": "value2"},
68+
}
69+
70+
result = map_cells(props)
71+
72+
assert len(result) == 2
73+
assert "property1" in result
74+
assert "property2" in result
75+
mock_property.from_dict.assert_any_call({"type": "mock_type", "data": "value1"})
76+
mock_property.from_dict.assert_any_call({"type": "mock_type", "data": "value2"})
77+
78+
79+
def test_map_cells_unsupported_property():
80+
props = {
81+
"property1": {"type": "non_existent_type", "data": "value1"},
82+
}
83+
84+
with pytest.raises(KeyError):
85+
map_cells(props)
86+
87+
88+
def test_map_cells_key_error():
89+
props = {
90+
"property1": {"value": "MissingType"},
91+
}
92+
93+
with pytest.raises(KeyError, match="failed to map to associated database property"):
94+
map_cells(props)
95+
96+
97+
def test_map_cells_logs_warning_for_unsupported_type(caplog: pytest.LogCaptureFixture):
98+
props = {
99+
"property1": {"type": "button", "value": "Unsupported"},
100+
}
101+
102+
result = map_cells(props)
103+
104+
assert result == {}
105+
assert "Unsupported property type 'button' for property 'property1'. Skipping." in caplog.text

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.0.21" # pragma: no cover
1+
__version__ = "1.0.22" # pragma: no cover

unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from typing import Dict
22

3+
from unstructured_ingest.logger import logger
34
from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
45

56
from .checkbox import Checkbox, CheckboxCell
@@ -25,6 +26,13 @@
2526
from .url import URL, URLCell
2627
from .verification import Verification, VerificationCell
2728

29+
# It's possible to add 'button' property to Notion database.
30+
# However, current Notion API documentation doesn't mention it.
31+
# Buttons are only functional inside Notion UI. We can simply
32+
# ignore them so that the we don't throw an error when trying
33+
# to map 'button' properties.
34+
unsupported_db_prop_types = ["button"]
35+
2836
db_prop_type_mapping = {
2937
"checkbox": Checkbox,
3038
"created_by": CreatedBy,
@@ -55,7 +63,13 @@ def map_properties(props: Dict[str, dict]) -> Dict[str, DBPropertyBase]:
5563
mapped_dict = {}
5664
for k, v in props.items():
5765
try:
58-
mapped_dict[k] = db_prop_type_mapping[v["type"]].from_dict(v) # type: ignore
66+
property_type = v["type"]
67+
if property_type in unsupported_db_prop_types:
68+
logger.warning(
69+
f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
70+
)
71+
continue
72+
mapped_dict[k] = db_prop_type_mapping[property_type].from_dict(v) # type: ignore
5973
except KeyError as ke:
6074
raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
6175

@@ -92,8 +106,13 @@ def map_cells(props: Dict[str, dict]) -> Dict[str, DBCellBase]:
92106
mapped_dict = {}
93107
for k, v in props.items():
94108
try:
95-
t = v["type"]
96-
mapped_dict[k] = db_cell_type_mapping[t].from_dict(v) # type: ignore
109+
property_type = v["type"]
110+
if property_type in unsupported_db_prop_types:
111+
logger.warning(
112+
f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
113+
)
114+
continue
115+
mapped_dict[k] = db_cell_type_mapping[property_type].from_dict(v) # type: ignore
97116
except KeyError as ke:
98117
raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
99118

unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class Checkbox(DBPropertyBase):
1313
id: str
1414
name: str
1515
type: str = "checkbox"
16+
description: Optional[str] = None
1617
checkbox: dict = field(default_factory=dict)
1718

1819
@classmethod

0 commit comments

Comments
 (0)