feedback

miguelgrinberg · miguelgrinberg · commit 0b685407b08d · 2024-10-04T16:18:08.000+01:00
diff --git a/elasticsearch_dsl/aggs.py b/elasticsearch_dsl/aggs.py
@@ -2788,6 +2788,39 @@ def __init__(
         super().__init__(path=path, **kwargs)
 
 
+class RandomSampler(Bucket[_R]):
+    """
+    A single bucket aggregation that randomly includes documents in the
+    aggregated results. Sampling provides significant speed improvement at
+    the cost of accuracy.
+
+    :arg probability: (required) The probability that a document will be
+        included in the aggregated data. Must be greater than 0, less than
+        0.5, or exactly 1. The lower the probability, the fewer documents
+        are matched.
+    :arg seed: The seed to generate the random sampling of documents. When
+        a seed is provided, the random subset of documents is the same
+        between calls.
+    :arg shard_seed: When combined with seed, setting shard_seed ensures
+        100% consistent sampling over shards where data is exactly the
+        same.
+    """
+
+    name = "random_sampler"
+
+    def __init__(
+        self,
+        *,
+        probability: Union[float, "DefaultType"] = DEFAULT,
+        seed: Union[int, "DefaultType"] = DEFAULT,
+        shard_seed: Union[int, "DefaultType"] = DEFAULT,
+        **kwargs: Any,
+    ):
+        super().__init__(
+            probability=probability, seed=seed, shard_seed=shard_seed, **kwargs
+        )
+
+
 class Sampler(Bucket[_R]):
     """
     A filtering aggregation used to limit any sub aggregations' processing
@@ -3696,7 +3729,3 @@ def __init__(
 
     def result(self, search: "SearchBase[_R]", data: Any) -> AttrDict[Any]:
         return FieldBucketData(self, search, data)
-
-
-class RandomSampler(Bucket[_R]):
-    name = "random_sampler"
diff --git a/tests/test_aggs.py b/tests/test_aggs.py
@@ -220,6 +220,7 @@ def test_filters_correctly_identifies_the_hash() -> None:
 
 
 def test_bucket_sort_agg() -> None:
+    # test the dictionary (type ignored) and fully typed alterantives
     bucket_sort_agg = aggs.BucketSort(sort=[{"total_sales": {"order": "desc"}}], size=3)  # type: ignore
     assert bucket_sort_agg.to_dict() == {
         "bucket_sort": {"sort": [{"total_sales": {"order": "desc"}}], "size": 3}
@@ -251,6 +252,7 @@ def test_bucket_sort_agg() -> None:
 
 
 def test_bucket_sort_agg_only_trnunc() -> None:
+    # test the dictionary (type ignored) and fully typed alterantives
     bucket_sort_agg = aggs.BucketSort(**{"from": 1, "size": 1, "_expand__to_dot": False})  # type: ignore
     assert bucket_sort_agg.to_dict() == {"bucket_sort": {"from": 1, "size": 1}}
     bucket_sort_agg = aggs.BucketSort(from_=1, size=1, _expand__to_dot=False)
@@ -265,20 +267,23 @@ def test_bucket_sort_agg_only_trnunc() -> None:
 
 
 def test_geohash_grid_aggregation() -> None:
+    # test the dictionary (type ignored) and fully typed alterantives
     a = aggs.GeohashGrid(**{"field": "centroid", "precision": 3})  # type: ignore
     assert {"geohash_grid": {"field": "centroid", "precision": 3}} == a.to_dict()
     a = aggs.GeohashGrid(field="centroid", precision=3)
     assert {"geohash_grid": {"field": "centroid", "precision": 3}} == a.to_dict()
 
 
 def test_geohex_grid_aggregation() -> None:
+    # test the dictionary (type ignored) and fully typed alterantives
     a = aggs.GeohexGrid(**{"field": "centroid", "precision": 3})  # type: ignore
     assert {"geohex_grid": {"field": "centroid", "precision": 3}} == a.to_dict()
     a = aggs.GeohexGrid(field="centroid", precision=3)
     assert {"geohex_grid": {"field": "centroid", "precision": 3}} == a.to_dict()
 
 
 def test_geotile_grid_aggregation() -> None:
+    # test the dictionary (type ignored) and fully typed alterantives
     a = aggs.GeotileGrid(**{"field": "centroid", "precision": 3})  # type: ignore
     assert {"geotile_grid": {"field": "centroid", "precision": 3}} == a.to_dict()
     a = aggs.GeotileGrid(field="centroid", precision=3)
@@ -318,6 +323,7 @@ def test_variable_width_histogram_aggregation() -> None:
 
 
 def test_ip_prefix_aggregation() -> None:
+    # test the dictionary (type ignored) and fully typed alterantives
     a = aggs.IPPrefix(**{"field": "ipv4", "prefix_length": 24})  # type: ignore
     assert {"ip_prefix": {"field": "ipv4", "prefix_length": 24}} == a.to_dict()
     a = aggs.IPPrefix(field="ipv4", prefix_length=24)
@@ -501,6 +507,7 @@ def test_adjancecy_matrix_aggregation() -> None:
 
 
 def test_top_metrics_aggregation() -> None:
+    # test the dictionary (type ignored) and fully typed alterantives
     a = aggs.TopMetrics(metrics={"field": "m"}, sort={"s": "desc"})  # type: ignore
     assert {
         "top_metrics": {"metrics": {"field": "m"}, "sort": {"s": "desc"}}
diff --git a/utils/templates/aggs.py.tpl b/utils/templates/aggs.py.tpl
@@ -318,6 +318,3 @@ class {{ k.name }}({{ k.parent if k.parent else parent }}[_R]):
 
     {% endif %}
 {% endfor %}
-{# the following aggregation is in technical preview and does not exist in the specification #}
-class RandomSampler(Bucket[_R]):
-    name = "random_sampler"