Skip to content

Commit 89f4dcb

Browse files
committed
Improve _touch_collection_update_time_cte performance
by avoiding left outer join. The extra subquery is an sqlalchemy artifact optimized away by the query planner. The improved query is this: ```sql WITH RECURSIVE collection_hierarchy(collection_id, depth_level) AS ( SELECT dataset_collection_element.dataset_collection_id AS collection_id, 0 AS depth_level FROM dataset_collection_element JOIN history_dataset_association ON dataset_collection_element.hda_id = history_dataset_association.id WHERE history_dataset_association.dataset_id = 134996190 UNION SELECT dataset_collection_element.dataset_collection_id AS collection_id, 0 AS depth_level FROM dataset_collection_element JOIN library_dataset_dataset_association ON dataset_collection_element.ldda_id = library_dataset_dataset_association.id WHERE library_dataset_dataset_association.dataset_id = 134996190 UNION ALL SELECT parent_dce.dataset_collection_id AS collection_id, ch.depth_level + 1 AS depth_level FROM dataset_collection_element AS parent_dce INNER JOIN collection_hierarchy AS ch ON parent_dce.child_collection_id = ch.collection_id WHERE ch.depth_level < 50 ) SELECT collection_id FROM collection_hierarchy ORDER BY collection_id; ``` while before it was: ```sql WITH RECURSIVE collection_hierarchy(collection_id, depth_level) AS ( SELECT dataset_collection_element.dataset_collection_id AS collection_id, 0 AS depth_level FROM dataset_collection_element LEFT OUTER JOIN history_dataset_association ON dataset_collection_element.hda_id = history_dataset_association.id LEFT OUTER JOIN library_dataset_dataset_association ON dataset_collection_element.ldda_id = library_dataset_dataset_association.id WHERE history_dataset_association.dataset_id = 134996190 OR library_dataset_dataset_association.dataset_id = 134996190 UNION ALL SELECT parent_dce.dataset_collection_id AS collection_id, ch.depth_level + 1 AS depth_level FROM dataset_collection_element AS parent_dce, collection_hierarchy AS ch WHERE parent_dce.child_collection_id = ch.collection_id AND ch.depth_level < 50 ) SELECT collection_id FROM collection_hierarchy ORDER BY collection_id ```
1 parent c443d08 commit 89f4dcb

File tree

1 file changed

+18
-12
lines changed

1 file changed

+18
-12
lines changed

lib/galaxy/model/__init__.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4727,26 +4727,32 @@ def _touch_collection_update_time_cte(self, session, supports_skip_locked):
47274727
literal(0).label("depth_level"),
47284728
]
47294729

4730-
# Create a single base query that covers both HDA and LDDA cases using OR conditions
4731-
base_query = (
4730+
# Create separate base queries for HDA and LDDA cases and union them
4731+
# We need to wrap the union in a subquery to use it as the anchor for the recursive CTE
4732+
union_query = (
47324733
select(*base_columns)
47334734
.select_from(
4734-
DatasetCollectionElement.__table__.outerjoin(
4735+
DatasetCollectionElement.__table__.join(
47354736
HistoryDatasetAssociation.__table__, DatasetCollectionElement.hda_id == HistoryDatasetAssociation.id
4736-
).outerjoin(
4737-
LibraryDatasetDatasetAssociation.__table__,
4738-
DatasetCollectionElement.ldda_id == LibraryDatasetDatasetAssociation.id,
47394737
)
47404738
)
4741-
.where(
4742-
or_(
4743-
HistoryDatasetAssociation.dataset_id == self.id,
4744-
LibraryDatasetDatasetAssociation.dataset_id == self.id,
4739+
.where(HistoryDatasetAssociation.dataset_id == self.id)
4740+
.union(
4741+
select(*base_columns)
4742+
.select_from(
4743+
DatasetCollectionElement.__table__.join(
4744+
LibraryDatasetDatasetAssociation.__table__,
4745+
DatasetCollectionElement.ldda_id == LibraryDatasetDatasetAssociation.id,
4746+
)
47454747
)
4748+
.where(LibraryDatasetDatasetAssociation.dataset_id == self.id)
47464749
)
4747-
)
4750+
).subquery()
4751+
4752+
# Select from the union subquery to create a proper base query for the CTE
4753+
base_query = select(union_query.c.collection_id, union_query.c.depth_level)
47484754

4749-
# Create the recursive CTE from the single base query
4755+
# Create the recursive CTE from the base query
47504756
collection_hierarchy_cte = base_query.cte(name="collection_hierarchy", recursive=True)
47514757

47524758
# Create aliases for the recursive part

0 commit comments

Comments
 (0)