Skip to content

Commit 579b94a

Browse files
committed
Add root_id and optimize database queries
1 parent f2f92ae commit 579b94a

File tree

3 files changed

+41
-44
lines changed

3 files changed

+41
-44
lines changed

isimip_data/metadata/filters.py

Lines changed: 32 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import logging
22

33
from django.contrib.postgres.search import SearchQuery
4-
from django.core.exceptions import FieldError, ValidationError
5-
from django.db.models import Exists, OuterRef, Q
4+
from django.core.exceptions import ValidationError
5+
from django.db.models import Q
66

77
from rest_framework.filters import BaseFilterBackend
88

@@ -65,11 +65,13 @@ def filter_queryset(self, request, queryset, view):
6565
if path_list:
6666
q = Q()
6767
for path in path_list:
68-
q |= Q(path__startswith=path)
68+
filter_kwargs = {'path__startswith': path}
69+
6970
if getattr(view, 'filter_resolve_links', True):
70-
q |= Exists(
71-
queryset.model.objects.filter(target_id=OuterRef('pk'), path__startswith=path)
72-
)
71+
subquery = queryset.model.objects.filter(**filter_kwargs).values('root_id').order_by()
72+
q |= Q(root_id__in=subquery)
73+
else:
74+
q |= Q(**filter_kwargs)
7375

7476
queryset = queryset.filter(q)
7577

@@ -99,19 +101,15 @@ def filter_queryset(self, request, queryset, view):
99101

100102
# last, perform a full text search on the search_vector field
101103
if queryset.model == File:
102-
q = Q(dataset__search__vector=search_query)
103-
if getattr(view, 'filter_resolve_links', True):
104-
q |= Exists(
105-
queryset.model.objects.filter(target_id=OuterRef('pk'), dataset__search__vector=search_query)
106-
)
104+
filter_kwargs = {'dataset__search__vector': search_query}
107105
else:
108-
q = Q(search__vector=search_query)
109-
if getattr(view, 'filter_resolve_links', True):
110-
q |= Exists(
111-
queryset.model.objects.filter(target_id=OuterRef('pk'), search__vector=search_query)
112-
)
106+
filter_kwargs = {'search__vector': search_query}
113107

114-
queryset = queryset.filter(q)
108+
if getattr(view, 'filter_resolve_links', True):
109+
subquery = queryset.model.objects.filter(**filter_kwargs).values('root_id').order_by()
110+
queryset = queryset.filter(root_id__in=subquery)
111+
else:
112+
queryset = queryset.filter(**filter_kwargs)
115113

116114
return queryset
117115

@@ -123,12 +121,10 @@ def filter_queryset(self, request, queryset, view):
123121
return queryset
124122

125123
if request.GET.get('all') != 'true':
126-
try:
127-
# datasets have a public field
128-
queryset = queryset.filter(public=True)
129-
except FieldError:
130-
# files need to check the public field of the corresponding dataset
124+
if queryset.model == File:
131125
queryset = queryset.filter(dataset__public=True)
126+
else:
127+
queryset = queryset.filter(public=True)
132128

133129
after = request.GET.get('after')
134130
if after:
@@ -152,15 +148,13 @@ def filter_queryset(self, request, queryset, view):
152148
q = Q()
153149
for value in request.GET.getlist(identifier):
154150
if value:
155-
q |= Q(specifiers__contains={identifier: value})
151+
filter_kwargs = {'specifiers__contains': {identifier: value}}
156152

157153
if getattr(view, 'filter_resolve_links', True):
158-
q |= Exists(
159-
queryset.model.objects.filter(
160-
target_id=OuterRef('pk'),
161-
specifiers__contains={identifier: value}
162-
)
163-
)
154+
subquery = queryset.model.objects.filter(**filter_kwargs).values('root_id').order_by()
155+
q |= Q(root_id__in=subquery)
156+
else:
157+
q |= Q(**filter_kwargs)
164158

165159
queryset = queryset.filter(q)
166160

@@ -178,18 +172,17 @@ def filter_queryset(self, request, queryset, view):
178172
q = Q()
179173
for tree in tree_list:
180174
tree = tree.rstrip('/') + '/'
175+
181176
if queryset.model == File:
182-
q |= Q(dataset__tree_path__startswith=tree)
183-
if getattr(view, 'filter_resolve_links', True):
184-
q |= Exists(
185-
queryset.model.objects.filter(target_id=OuterRef('pk'), dataset__tree_path__startswith=tree)
186-
)
177+
filter_kwargs = {'dataset__tree_path__startswith': tree}
178+
else:
179+
filter_kwargs = {'tree_path__startswith': tree}
180+
181+
if getattr(view, 'filter_resolve_links', True):
182+
subquery = queryset.model.objects.filter(**filter_kwargs).values('root_id').order_by()
183+
q |= Q(root_id__in=subquery)
187184
else:
188-
q |= Q(tree_path__startswith=tree)
189-
if getattr(view, 'filter_resolve_links', True):
190-
q |= Exists(
191-
queryset.model.objects.filter(target_id=OuterRef('pk'), tree_path__startswith=tree)
192-
)
185+
q |= Q(**filter_kwargs)
193186

194187
queryset = queryset.filter(q)
195188

isimip_data/metadata/managers.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
from django.contrib.postgres.aggregates import ArrayAgg
22
from django.db import models
3+
from django.db.models.fields.json import KeyTextTransform
34

45

56
class DatasetQuerySet(models.QuerySet):
67

78
def histogram(self, identifier):
8-
field = f'specifiers__{identifier}'
99
return (
10-
self.annotate(distinct_id=models.functions.Coalesce('target_id', 'id'))
11-
.values_list(field)
12-
.annotate(count=models.Count('distinct_id', distinct=True))
13-
.order_by(field)
10+
self.annotate(specifier=KeyTextTransform(identifier, 'specifiers'))
11+
.values_list('specifier')
12+
.annotate(count=models.Count('root_id', distinct=True))
13+
.order_by('specifier')
1414
)
1515

1616

isimip_data/metadata/models.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ class Dataset(models.Model):
3636
published = models.DateTimeField()
3737
archived = models.DateTimeField()
3838

39+
root_id = models.UUIDField(editable=False)
40+
3941
class Meta:
4042
db_table = 'datasets'
4143
managed = False
@@ -129,6 +131,8 @@ class File(models.Model):
129131
created = models.DateTimeField()
130132
updated = models.DateTimeField()
131133

134+
root_id = models.UUIDField(editable=False)
135+
132136
class Meta:
133137
db_table = 'files'
134138
managed = False

0 commit comments

Comments
 (0)