11import itertools
2+ import logging
23import uuid
34from typing import Optional , Union , List
45
1718)
1819from sqlalchemy .engine import make_url
1920from sqlalchemy .exc import NoSuchModuleError
20- from sqlalchemy .orm import Session
21+ from sqlalchemy .orm import Session , Query
2122
2223from ingestify .domain import File , Revision
2324from ingestify .domain .models import (
4243 task_summary_table ,
4344)
4445
46+ logger = logging .getLogger (__name__ )
47+
4548
4649def parse_value (v ):
4750 try :
@@ -199,9 +202,6 @@ def _filter_query(
199202 if not selectors :
200203 raise ValueError ("Selectors must contain at least one item" )
201204
202- attribute_keys = selectors [
203- 0
204- ].filtered_attributes .keys () # Assume all selectors have the same keys
205205 attribute_sets = {
206206 tuple (selector .filtered_attributes .items ()) for selector in selectors
207207 }
@@ -303,6 +303,12 @@ def load_datasets(self, dataset_ids: list[str]) -> list[Dataset]:
303303 )
304304 return datasets
305305
306+ def _debug_query (self , q : Query ):
307+ text_ = q .statement .compile (
308+ compile_kwargs = {"literal_binds" : True }, dialect = self .session .bind .dialect
309+ )
310+ logger .debug (f"Running query: { text_ } " )
311+
306312 def get_dataset_collection (
307313 self ,
308314 bucket : str ,
@@ -326,18 +332,33 @@ def apply_query_filter(query):
326332 dataset_query = apply_query_filter (
327333 self .session .query (dataset_table .c .dataset_id )
328334 )
335+ self ._debug_query (dataset_query )
329336 dataset_ids = [row .dataset_id for row in dataset_query ]
330337 datasets = self .load_datasets (dataset_ids )
338+
339+ dataset_collection_metadata = DatasetCollectionMetadata (
340+ last_modified = max (dataset .last_modified_at for dataset in datasets )
341+ if datasets
342+ else None ,
343+ row_count = len (datasets ),
344+ )
331345 else :
332346 datasets = []
333347
334- metadata_result_row = apply_query_filter (
335- self .session .query (
336- func .max (dataset_table .c .last_modified_at ).label ("last_modified_at" ),
337- func .count ().label ("row_count" ),
348+ metadata_result_query = apply_query_filter (
349+ self .session .query (
350+ func .max (dataset_table .c .last_modified_at ).label (
351+ "last_modified_at"
352+ ),
353+ func .count ().label ("row_count" ),
354+ )
355+ )
356+
357+ self ._debug_query (metadata_result_query )
358+
359+ dataset_collection_metadata = DatasetCollectionMetadata (
360+ * metadata_result_query .first ()
338361 )
339- ).first ()
340- dataset_collection_metadata = DatasetCollectionMetadata (* metadata_result_row )
341362
342363 return DatasetCollection (dataset_collection_metadata , datasets )
343364
0 commit comments