@@ -1039,23 +1039,34 @@ def _parse_collection(self, page_blocks: typing.List[PageBaseBlock], block):
10391039 ignore_ssl = os .environ .get ('NOTION_IGNORE_SSL' , 'false' ).lower () == 'true'
10401040 if ignore_ssl :
10411041 import httpx
1042- http_client = httpx . Client ( verify = False )
1042+
10431043 from notion_reader import NotionReader
10441044 client = NotionReader .get_client ()
10451045
10461046 # Retrieve database schema and description
10471047 database = client .databases .retrieve (database_id = db_id )
10481048 properties = database .get ('properties' , {})
1049- headers = list (properties .keys ())
10501049
10511050 # Parse column weight configuration from description
10521051 description = database .get ('description' , [])
10531052 column_weights = self ._parse_column_weights (description )
1053+ if column_weights :
1054+ print (f"[Database Parsing] Found property-order config: { column_weights } " )
10541055
1055- # Query database for rows
1056- # Use httpx directly to avoid potential issues with notion-client query method
1057- import httpx
1056+ # Prepare HTTP client
1057+ http_client_req = None
1058+ should_close_client = False
10581059
1060+ if ignore_ssl :
1061+ # Priority: if SSL ignore is requested, use our own client
1062+ http_client_req = httpx .Client (verify = False )
1063+ should_close_client = True
1064+ elif hasattr (client , 'client' ) and client .client :
1065+ http_client_req = client .client
1066+ else :
1067+ http_client_req = httpx .Client ()
1068+ should_close_client = True
1069+
10591070 token = Config .notion_token ()
10601071 headers_http = {
10611072 "Authorization" : f"Bearer { token } " ,
@@ -1064,64 +1075,101 @@ def _parse_collection(self, page_blocks: typing.List[PageBaseBlock], block):
10641075 }
10651076
10661077 url = f"https://api.notion.com/v1/databases/{ db_id } /query"
1078+
1079+ # Determine sorting logic using utility class
1080+ from utils .database_utils import DatabaseColumnOrderingUtils
1081+
1082+ # Check for page-order configuration in description first
1083+ description_text = NotionUtils .get_plain_text (description )
1084+ page_order_sorts = DatabaseColumnOrderingUtils .parse_page_order (description_text )
1085+
1086+ if page_order_sorts :
1087+ print (f"[Database Parsing] Found page-order config: { page_order_sorts } " )
1088+ sorts = page_order_sorts
1089+ # Bypassing pre-query if page-order is configured
1090+ is_default_sort = False
1091+ else :
1092+ sorts = DatabaseColumnOrderingUtils .get_database_sorts (properties )
1093+ print (f"[Database Parsing] Using schema-based/default sorts: { sorts } " )
1094+ # If properties are empty (common with Inline Databases), we miss schema info for sorting.
1095+ # Perform a pre-query to infer schema from the first row to check for 'Order' column.
1096+ is_default_sort = len (sorts ) == 1 and sorts [0 ].get ('timestamp' ) == 'created_time'
10671097
1068- # Query with sorting by created_time to ensure consistent row order
1098+ if not properties and is_default_sort :
1099+ print (f"[Database Parsing] Schema properties empty & default sort detected. Attempting Pre-query inference..." )
1100+ # Pre-query one row to check schema
1101+ pre_query_body = {
1102+ "page_size" : 1 ,
1103+ "sorts" : sorts # Default sort
1104+ }
1105+ try :
1106+ pre_response = http_client_req .post (url , headers = headers_http , json = pre_query_body )
1107+ pre_response .raise_for_status ()
1108+ pre_results = pre_response .json ().get ('results' , [])
1109+
1110+ if pre_results :
1111+ # Infer properties from row data
1112+ inferred_props = pre_results [0 ].get ('properties' , {})
1113+ print (f"[Database Parsing] Inferred properties from Pre-query: { list (inferred_props .keys ())} " )
1114+ # Re-calculate sorts with inferred properties
1115+ sorts = DatabaseColumnOrderingUtils .get_database_sorts (inferred_props )
1116+ print (f"[Database Parsing] Recalculated sorts after inference: { sorts } " )
1117+ else :
1118+ print ("[Database Parsing] Pre-query returned no results. Cannot infer schema." )
1119+ except Exception as e :
1120+ print (f"[Database Parsing] Failed to infer schema for sorting: { e } " )
1121+
10691122 query_body = {
1070- "sorts" : [
1071- {
1072- "timestamp" : "created_time" ,
1073- "direction" : "ascending"
1074- }
1075- ]
1123+ "sorts" : sorts
10761124 }
10771125
1078- # Use the client's internal http client to respect SSL settings
1079- if hasattr (client , 'client' ) and client .client :
1080- # Use the existing client's httpx client (respects SSL settings)
1081- response = client .client .post (url , headers = headers_http , json = query_body )
1082- else :
1083- # Fallback: create new httpx client
1084- if ignore_ssl :
1085- http_client_req = httpx .Client (verify = False )
1086- else :
1087- http_client_req = httpx .Client ()
1088- response = http_client_req .post (url , headers = headers_http , json = query_body )
1126+ response = http_client_req .post (url , headers = headers_http , json = query_body )
10891127
10901128 response .raise_for_status ()
10911129 results = response .json ().get ('results' , [])
1130+ print (f"[Database Parsing] Query returned { len (results )} rows." )
10921131
1132+ headers = list (properties .keys ()) if properties else []
10931133 if not headers and results :
10941134 # Infer headers from the first row if schema properties are empty
10951135 first_row_props = results [0 ].get ('properties' , {})
10961136 headers = list (first_row_props .keys ())
1137+ print (f"[Database Parsing] Inferred headers from first row: { headers } " )
10971138
10981139 # Apply column ordering
10991140 if headers and column_weights :
11001141 # Only apply weighted ordering if explicitly configured
11011142 headers = self ._sort_columns_by_weight (headers , column_weights )
1102- # Otherwise, preserve the original order from Notion API
1143+ print (f"[Database Parsing] Applied column weights. Final headers: { headers } " )
1144+ elif headers :
1145+ print (f"[Database Parsing] No column weights config. Using default headers: { headers } " )
11031146
11041147 rows = []
11051148 for page in results :
11061149 row_data = []
11071150 page_props = page .get ('properties' , {})
11081151 for header in headers :
1109- prop = page_props .get (header , {})
1110- row_data .append (self ._parse_property_value (prop ))
1152+ if header in page_props :
1153+ row_data .append (self ._parse_property_value (page_props [header ]))
1154+ else :
1155+ row_data .append ("" )
11111156 rows .append (row_data )
11121157
11131158 page_block = PageTableBlock ()
11141159 page_block .id = block .get ('id' )
11151160 page_block .set_data (headers , rows )
11161161 page_blocks .append (page_block )
1117-
1162+
11181163 except Exception as e :
11191164 print (f"Failed to parse database { db_id } : { e } " )
11201165 page_block = PageBaseBlock ()
11211166 page_block .id = block .get ('id' )
11221167 page_block .type = 'collection_view_error'
11231168 page_block .text = f"Error parsing database: { e } "
11241169 page_blocks .append (page_block )
1170+ finally :
1171+ if should_close_client and http_client_req :
1172+ http_client_req .close ()
11251173
11261174 def _parse_property_value (self , prop_value ):
11271175 """Parse various property types to string"""
0 commit comments