⏪ Revert sampled facet for empty query

simonwoerpel · simonwoerpel · commit 3858e530c9b4 · 2026-03-07T23:42:43.000+01:00
diff --git a/openaleph_search/query/base.py b/openaleph_search/query/base.py
@@ -28,8 +28,6 @@
 log = get_logger(__name__)
 settings = Settings()
 
-_FACET_SAMPLER_KEY = "facets.sampled"
-
 
 class Query:
     TEXT_FIELDS: ClassVar[list[str]] = [Field.TEXT]
@@ -207,17 +205,6 @@ def get_aggregations(self) -> dict[str, Any]:
                 else:
                     aggregations.update(facet_aggregations)
 
-        # For empty queries on large indexes, wrap facet aggregations in a
-        # sampler so ES only aggregates a subset of docs per shard. Gives
-        # approximate counts but is dramatically faster.
-        if self.is_empty_query and aggregations:
-            aggregations = {
-                _FACET_SAMPLER_KEY: {
-                    "sampler": {"shard_size": settings.facet_sampler_size},
-                    "aggs": aggregations,
-                }
-            }
-
         # Significant terms aggregations
         for facet_name in self.parser.facet_significant_names:
             facet_aggregations = {}
@@ -500,12 +487,4 @@ def search(self) -> ObjectApiResponse:
             hits=result.get("hits", {}).get("total", {}).get("value"),
         )
 
-        # Unwrap sampled facet aggregations so consumers see the same
-        # response structure regardless of whether sampling was used.
-        aggs = result.get("aggregations", {})
-        if _FACET_SAMPLER_KEY in aggs:
-            sampled = aggs.pop(_FACET_SAMPLER_KEY)
-            sampled.pop("doc_count", None)
-            aggs.update(sampled)
-
         return result
diff --git a/openaleph_search/settings.py b/openaleph_search/settings.py
@@ -61,10 +61,6 @@ class Settings(BaseSettings):
     index_boost_documents: int = 1
     index_boost_pages: int = 1
 
-    # Sampler shard_size for facet aggregations on empty queries.
-    # Uses approximate counts for faster response on large indexes.
-    facet_sampler_size: int = 5000
-
     # Sampler for significant_terms / significant_text aggregations
     significant_terms_sampler_size: int = 2000
     significant_text_sampler_size: int = 200