From 38400c3b554da9a0fb803869bec10d773d4ada6a Mon Sep 17 00:00:00 2001 From: Noa Dove Date: Mon, 15 Dec 2025 20:37:45 -0800 Subject: [PATCH 1/4] Extract methods from filter reification --- src/azul/service/__init__.py | 46 ++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/src/azul/service/__init__.py b/src/azul/service/__init__.py index a1e9aa9021..4ed806a630 100644 --- a/src/azul/service/__init__.py +++ b/src/azul/service/__init__.py @@ -361,33 +361,20 @@ def reify(self, filters = copy_json(self.explicit) special_fields = plugin.special_fields - def extract_filter(field: str, *, default: set | None) -> set | None: - filter = filters.pop(field, {}) - # Other operators are not supported on string or boolean fields - assert filter.keys() <= {'is'}, filter - try: - values = filter['is'] - except KeyError: - return default - else: - return set(values) - - explicit_sources = extract_filter(special_fields.source_id.name, - default=None) - accessible = extract_filter(special_fields.accessible.name, - default={False, True}) + explicit_sources = self._extract_filter(filters, + special_fields.source_id.name, + default=None) + accessible = self._extract_filter(filters, + special_fields.accessible.name, + default={False, True}) source_relation = 'is' if limit_access: if explicit_sources is None: sources = self.source_ids if True in accessible else [] else: - forbidden_sources = explicit_sources - self.source_ids - if forbidden_sources: - raise ForbiddenError('Cannot filter by inaccessible sources', - forbidden_sources) - else: - sources = explicit_sources if True in accessible else [] + self._forbid_explicit_inaccessible_sources(explicit_sources) + sources = explicit_sources if True in accessible else [] else: if accessible == set(): sources = [] @@ -417,6 +404,23 @@ def extract_filter(field: str, *, default: set | None) -> set | None: return filters + def _extract_filter(self, filters, field: str, *, default: set | None) -> set | None: + filter = filters.pop(field, {}) + # Other operators are not supported on string or boolean fields + assert filter.keys() <= {'is'}, filter + try: + values = filter['is'] + except KeyError: + return default + else: + return set(values) + + def _forbid_explicit_inaccessible_sources(self, explicit_sources: set[str]): + forbidden_sources = explicit_sources - self.source_ids + if forbidden_sources: + raise ForbiddenError('Cannot filter by inaccessible sources', + forbidden_sources) + class BadArgumentException(Exception): From 246d923ca554c43ff879783a8ff62d4a3e5e0b2a Mon Sep 17 00:00:00 2001 From: Noa Dove Date: Mon, 15 Dec 2025 20:39:08 -0800 Subject: [PATCH 2/4] Parameterize query preparation --- src/azul/service/elasticsearch_service.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/azul/service/elasticsearch_service.py b/src/azul/service/elasticsearch_service.py index e998dbea58..c7d8692031 100644 --- a/src/azul/service/elasticsearch_service.py +++ b/src/azul/service/elasticsearch_service.py @@ -179,7 +179,7 @@ class FilterStage(_ElasticsearchStage[Response, Response]): post_filter: bool def prepare_request(self, request: Search) -> Search: - query = self.prepare_query() + query = self.prepare_query(self.prepared_filters) if self.post_filter: request = request.post_filter(query) else: @@ -223,12 +223,15 @@ def _translate_filters(self, filters: FiltersJSON) -> TranslatedFilters: translated_filters[field] = {relation: list(values)} return translated_filters - def prepare_query(self, skip_field_paths: tuple[FieldPath] = ()) -> Query: + def prepare_query(self, + prepared_filters: TranslatedFilters, + skip_field_paths: tuple[FieldPath] = () + ) -> Query: """ Converts the given filters into an Elasticsearch DSL Query object. """ filter_list = [] - for field_path, relation_and_values in self.prepared_filters.items(): + for field_path, relation_and_values in prepared_filters.items(): if field_path not in skip_field_paths: relation, values = one(relation_and_values.items()) # Note that `is_not` is only used internally (for filtering by @@ -320,7 +323,8 @@ def _prepare_aggregation(self, *, facet: str, facet_path: FieldPath) -> Agg: """ # Create a filter agg using a query that represents all filters # except for the current facet. - query = self.filter_stage.prepare_query(skip_field_paths=(facet_path,)) + query = self.filter_stage.prepare_query(self.filter_stage.prepared_filters, + skip_field_paths=(facet_path,)) agg = A('filter', query) field_type = self.service.field_type(self.catalog, facet_path) From b5bea1f3a2ac8cf2fc0a347885ce8f05bc27083e Mon Sep 17 00:00:00 2001 From: Noa Dove Date: Mon, 15 Dec 2025 20:39:13 -0800 Subject: [PATCH 3/4] Never use post_filter for implicit source filter (#7580) --- src/azul/service/__init__.py | 19 ++++++++++++ src/azul/service/elasticsearch_service.py | 12 +++++++- test/service/test_request_builder.py | 35 ++++++++++++++++++----- 3 files changed, 58 insertions(+), 8 deletions(-) diff --git a/src/azul/service/__init__.py b/src/azul/service/__init__.py index 4ed806a630..124f4eca0f 100644 --- a/src/azul/service/__init__.py +++ b/src/azul/service/__init__.py @@ -343,6 +343,25 @@ def to_json(self) -> JSON: def update(self, filters: FiltersJSON) -> Self: return attr.evolve(self, explicit={**self.explicit, **filters}) + def reify_implicit_only(self, plugin: MetadataPlugin) -> FiltersJSON: + """ + Construct filters for *only* the implicit restriction on accessible + sources. This is conceptually equivalent to the set difference + `self.reify(limit_access=True) - self.reify(limit_access=False)`. + + :param plugin: Metadata plugin for the current request's catalog + """ + source_id_field = plugin.special_fields.source_id.name + filters = copy_json(self.explicit) + explicit_sources = self._extract_filter(filters, source_id_field, default=None) + if explicit_sources is not None: + self._forbid_explicit_inaccessible_sources(explicit_sources) + return { + source_id_field: { + 'is': sorted(self.source_ids), + } + } + def reify(self, plugin: MetadataPlugin, *, diff --git a/src/azul/service/elasticsearch_service.py b/src/azul/service/elasticsearch_service.py index c7d8692031..e4e93c672d 100644 --- a/src/azul/service/elasticsearch_service.py +++ b/src/azul/service/elasticsearch_service.py @@ -181,6 +181,9 @@ class FilterStage(_ElasticsearchStage[Response, Response]): def prepare_request(self, request: Search) -> Search: query = self.prepare_query(self.prepared_filters) if self.post_filter: + if self.service.always_limit_access or self._limit_access: + access_query = self.prepare_query(self.prepared_access_filter) + request = request.query(access_query) request = request.post_filter(query) else: request = request.query(query) @@ -191,10 +194,17 @@ def process_response(self, response: Response) -> Response: @cached_property def prepared_filters(self) -> TranslatedFilters: - limit_access = self.service.always_limit_access or self._limit_access + # The implicit source filter is always applied via a query, and would + # therefore be redundant in the post_filter + limit_access = (self.service.always_limit_access or self._limit_access) and not self.post_filter filters_json = self.filters.reify(self.plugin, limit_access=limit_access) return self._translate_filters(filters_json) + @cached_property + def prepared_access_filter(self) -> TranslatedFilters: + filters_json = self.filters.reify_implicit_only(self.plugin) + return self._translate_filters(filters_json) + @property @abstractmethod def _limit_access(self) -> bool: diff --git a/test/service/test_request_builder.py b/test/service/test_request_builder.py index 1a8d591ded..6c00310875 100644 --- a/test/service/test_request_builder.py +++ b/test/service/test_request_builder.py @@ -107,6 +107,13 @@ def test_create_request(self): Tests creation of a simple request """ expected_output = { + 'query': { + 'bool': { + 'must': [ + self.sources_filter + ] + } + }, 'post_filter': { 'bool': { 'must': [ @@ -121,7 +128,6 @@ def test_create_request(self): } } }, - self.sources_filter ] } } @@ -154,6 +160,13 @@ def test_create_request_complex(self): Tests creation of a complex request. """ expected_output = { + 'query': { + 'bool': { + 'must': [ + self.sources_filter + ] + } + }, 'post_filter': { 'bool': { 'must': [ @@ -168,7 +181,6 @@ def test_create_request_complex(self): } } }, - self.sources_filter ] } } @@ -186,6 +198,13 @@ def test_create_request_missing_values(self): Tests creation of a request for facets that do not have a value """ expected_output = { + 'query': { + 'bool': { + 'must': [ + self.sources_filter + ] + } + }, 'post_filter': { 'bool': { 'must': [ @@ -221,7 +240,6 @@ def test_create_request_missing_values(self): } } }, - self.sources_filter ] } } @@ -236,6 +254,13 @@ def test_create_request_terms_and_missing_values(self): not have a value """ expected_output = { + 'query': { + 'bool': { + 'must': [ + self.sources_filter + ] + } + }, 'post_filter': { 'bool': { 'must': [ @@ -300,7 +325,6 @@ def test_create_request_terms_and_missing_values(self): } } }, - self.sources_filter ] } } @@ -346,9 +370,6 @@ def test_create_aggregate(self): expected_output = { 'filter': { 'bool': { - 'must': [ - self.sources_filter - ] } }, 'aggs': { From 029940c89dfbbc99a41fc9144f2fc66375c203d8 Mon Sep 17 00:00:00 2001 From: Noa Dove Date: Tue, 16 Dec 2025 19:14:32 -0800 Subject: [PATCH 4/4] Remove source ID field from request aggregations (#7580) --- src/azul/plugins/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/azul/plugins/__init__.py b/src/azul/plugins/__init__.py index 3fc3af0342..dc5e5ab482 100644 --- a/src/azul/plugins/__init__.py +++ b/src/azul/plugins/__init__.py @@ -519,7 +519,7 @@ def hot_entity_types(self) -> Iterable[str]: @property def facets(self) -> Sequence[str]: - return [self.special_fields.source_id.name] + return [] @property @abstractmethod