Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 75 additions & 27 deletions notion_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1039,23 +1039,34 @@ def _parse_collection(self, page_blocks: typing.List[PageBaseBlock], block):
ignore_ssl = os.environ.get('NOTION_IGNORE_SSL', 'false').lower() == 'true'
if ignore_ssl:
import httpx
http_client = httpx.Client(verify=False)

from notion_reader import NotionReader
client = NotionReader.get_client()

# Retrieve database schema and description
database = client.databases.retrieve(database_id=db_id)
properties = database.get('properties', {})
headers = list(properties.keys())

# Parse column weight configuration from description
description = database.get('description', [])
column_weights = self._parse_column_weights(description)
if column_weights:
print(f"[Database Parsing] Found property-order config: {column_weights}")

# Query database for rows
# Use httpx directly to avoid potential issues with notion-client query method
import httpx
# Prepare HTTP client
http_client_req = None
should_close_client = False

if ignore_ssl:
# Priority: if SSL ignore is requested, use our own client
http_client_req = httpx.Client(verify=False)
should_close_client = True
elif hasattr(client, 'client') and client.client:
http_client_req = client.client
else:
http_client_req = httpx.Client()
should_close_client = True

token = Config.notion_token()
headers_http = {
"Authorization": f"Bearer {token}",
Expand All @@ -1064,64 +1075,101 @@ def _parse_collection(self, page_blocks: typing.List[PageBaseBlock], block):
}

url = f"https://api.notion.com/v1/databases/{db_id}/query"

# Determine sorting logic using utility class
from utils.database_utils import DatabaseColumnOrderingUtils

# Check for page-order configuration in description first
description_text = NotionUtils.get_plain_text(description)
page_order_sorts = DatabaseColumnOrderingUtils.parse_page_order(description_text)

if page_order_sorts:
print(f"[Database Parsing] Found page-order config: {page_order_sorts}")
sorts = page_order_sorts
# Bypassing pre-query if page-order is configured
is_default_sort = False
else:
sorts = DatabaseColumnOrderingUtils.get_database_sorts(properties)
print(f"[Database Parsing] Using schema-based/default sorts: {sorts}")
# If properties are empty (common with Inline Databases), we miss schema info for sorting.
# Perform a pre-query to infer schema from the first row to check for 'Order' column.
is_default_sort = len(sorts) == 1 and sorts[0].get('timestamp') == 'created_time'

# Query with sorting by created_time to ensure consistent row order
if not properties and is_default_sort:
print(f"[Database Parsing] Schema properties empty & default sort detected. Attempting Pre-query inference...")
# Pre-query one row to check schema
pre_query_body = {
"page_size": 1,
"sorts": sorts # Default sort
}
try:
pre_response = http_client_req.post(url, headers=headers_http, json=pre_query_body)
pre_response.raise_for_status()
pre_results = pre_response.json().get('results', [])

if pre_results:
# Infer properties from row data
inferred_props = pre_results[0].get('properties', {})
print(f"[Database Parsing] Inferred properties from Pre-query: {list(inferred_props.keys())}")
# Re-calculate sorts with inferred properties
sorts = DatabaseColumnOrderingUtils.get_database_sorts(inferred_props)
print(f"[Database Parsing] Recalculated sorts after inference: {sorts}")
else:
print("[Database Parsing] Pre-query returned no results. Cannot infer schema.")
except Exception as e:
print(f"[Database Parsing] Failed to infer schema for sorting: {e}")

query_body = {
"sorts": [
{
"timestamp": "created_time",
"direction": "ascending"
}
]
"sorts": sorts
}

# Use the client's internal http client to respect SSL settings
if hasattr(client, 'client') and client.client:
# Use the existing client's httpx client (respects SSL settings)
response = client.client.post(url, headers=headers_http, json=query_body)
else:
# Fallback: create new httpx client
if ignore_ssl:
http_client_req = httpx.Client(verify=False)
else:
http_client_req = httpx.Client()
response = http_client_req.post(url, headers=headers_http, json=query_body)
response = http_client_req.post(url, headers=headers_http, json=query_body)

response.raise_for_status()
results = response.json().get('results', [])
print(f"[Database Parsing] Query returned {len(results)} rows.")

headers = list(properties.keys()) if properties else []
if not headers and results:
# Infer headers from the first row if schema properties are empty
first_row_props = results[0].get('properties', {})
headers = list(first_row_props.keys())
print(f"[Database Parsing] Inferred headers from first row: {headers}")

# Apply column ordering
if headers and column_weights:
# Only apply weighted ordering if explicitly configured
headers = self._sort_columns_by_weight(headers, column_weights)
# Otherwise, preserve the original order from Notion API
print(f"[Database Parsing] Applied column weights. Final headers: {headers}")
elif headers:
print(f"[Database Parsing] No column weights config. Using default headers: {headers}")

rows = []
for page in results:
row_data = []
page_props = page.get('properties', {})
for header in headers:
prop = page_props.get(header, {})
row_data.append(self._parse_property_value(prop))
if header in page_props:
row_data.append(self._parse_property_value(page_props[header]))
else:
row_data.append("")
rows.append(row_data)

page_block = PageTableBlock()
page_block.id = block.get('id')
page_block.set_data(headers, rows)
page_blocks.append(page_block)

except Exception as e:
print(f"Failed to parse database {db_id}: {e}")
page_block = PageBaseBlock()
page_block.id = block.get('id')
page_block.type = 'collection_view_error'
page_block.text = f"Error parsing database: {e}"
page_blocks.append(page_block)
finally:
if should_close_client and http_client_req:
http_client_req.close()

def _parse_property_value(self, prop_value):
"""Parse various property types to string"""
Expand Down
102 changes: 102 additions & 0 deletions tests/notion-sdk-py-official-apis/test_database_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,5 +198,107 @@ def test_parse_database_description_column_ordering(self):

print("✓ All column ordering tests passed")

def test_database_row_sorting(self):
"""Test database row sorting logic priority."""
print("\n[Test] Database Row Sorting Priority")

from utils.database_utils import DatabaseColumnOrderingUtils

# Test 1: Default sorting (Created Time)
props_1 = {}
sorts_1 = DatabaseColumnOrderingUtils.get_database_sorts(props_1)
print(f"Test 1 - Default: {sorts_1}")
self.assertEqual(sorts_1[0]['timestamp'], 'created_time')

# Test 2: Priority 2 - Name == 'Order'
props_2 = {
'Name': {'type': 'title'},
'Order': {'type': 'number'},
'Age': {'type': 'number'}
}
sorts_2 = DatabaseColumnOrderingUtils.get_database_sorts(props_2)
print(f"Test 2 - Name 'Order': {sorts_2}")
self.assertEqual(sorts_2[0]['property'], 'Order')

# Test 3: Priority 1 - Description == 'order'
props_3 = {
'Name': {'type': 'title'},
'CustomSort': {'type': 'number', 'description': 'order'},
'Order': {'type': 'number'} # Should be ignored because CustomSort has higher priority
}
sorts_3 = DatabaseColumnOrderingUtils.get_database_sorts(props_3)
print(f"Test 3 - Description 'order': {sorts_3}")
self.assertEqual(sorts_3[0]['property'], 'CustomSort')

# Test 4: Description check is case-insensitive?
# API usually returns description as is. Our logic does .lower() == 'order'.
props_4 = {
'MySort': {'type': 'number', 'description': 'ORDER'}
}
sorts_4 = DatabaseColumnOrderingUtils.get_database_sorts(props_4)
print(f"Test 4 - Case-insensitive Desc: {sorts_4}")
self.assertEqual(sorts_4[0]['property'], 'MySort')

# Test 5: Empty properties (simulating inline database issue)
# Should fallback to created_time
props_5 = {}
sorts_5 = DatabaseColumnOrderingUtils.get_database_sorts(props_5)
print(f"Test 5 - Empty properties: {sorts_5}")
self.assertEqual(sorts_5[0]['timestamp'], 'created_time')

print("✓ All row sorting tests passed")

def test_page_order_configuration(self):
"""Test parsing of page-order configuration."""
print("\n[Test] Page Order Configuration")
from utils.database_utils import DatabaseColumnOrderingUtils

# Test 1: Simple page-order
desc_1 = "Some text\npage-order: Order1, Created\nMore text"
sorts_1 = DatabaseColumnOrderingUtils.parse_page_order(desc_1)
print(f"Test 1 - Simple: {sorts_1}")
self.assertEqual(len(sorts_1), 2)
self.assertEqual(sorts_1[0]['property'], 'Order1')
self.assertEqual(sorts_1[1]['timestamp'], 'created_time')

# Test 2: Multi-line and case insensitive
desc_2 = "page-order: Priority, Name, CREATED"
sorts_2 = DatabaseColumnOrderingUtils.parse_page_order(desc_2)
print(f"Test 2 - Multi keys: {sorts_2}")
self.assertEqual(len(sorts_2), 3)
self.assertEqual(sorts_2[0]['property'], 'Priority')
self.assertEqual(sorts_2[2]['timestamp'], 'created_time')

# Test 3: No config
desc_3 = "Just description"
sorts_3 = DatabaseColumnOrderingUtils.parse_page_order(desc_3)
self.assertIsNone(sorts_3)

print("✓ All page-order tests passed")

def test_column_hiding(self):
"""Test hiding columns via negative weights."""
print("\n[Test] Column Hiding")
from utils.database_utils import DatabaseColumnOrderingUtils

headers = ['Title', 'Tags', 'Secret', 'Date']
weights = {
'Title': 100,
'Secret': -1, # Should be hidden
'Tags': 50
}

sorted_headers = DatabaseColumnOrderingUtils.sort_columns_by_weight(headers, weights)
print(f"Original: {headers}")
print(f"Weights: {weights}")
print(f"Result: {sorted_headers}")

self.assertNotIn('Secret', sorted_headers)
self.assertEqual(sorted_headers[0], 'Title')
self.assertEqual(sorted_headers[1], 'Tags')
self.assertIn('Date', sorted_headers) # Default weight 0

print("✓ Column hiding tests passed")

if __name__ == '__main__':
unittest.main()
Loading