Use geonear or collstats as first steps and add warning in aggregate doc (to emphasize the flaws of current design in case of inheritance or skip/limit/etc as our pipeline will have an arbitrary ordering)

bagerard · bagerard · commit 23059d56bb7e · 2024-09-27T23:37:01.000+02:00
diff --git a/mongoengine/queryset/base.py b/mongoengine/queryset/base.py
@@ -1343,6 +1343,14 @@ def from_json(self, json_data):
     def aggregate(self, pipeline, *suppl_pipeline, **kwargs):
         """Perform an aggregate function based on your queryset params
 
+        If the queryset contains a query or skip/limit/sort or if the target Document class
+        uses inheritance, this method will add steps prior to the provided pipeline in an arbitrary order.
+        This may affect the performance or outcome of the aggregation, so use it consciously.
+
+        For complex/critical pipelines, we recommended to use the aggregation framework of Pymongo directly,
+        it is available through the collection object (YourDocument._collection.aggregate) and will guarantee
+        that you have full control on the pipeline.
+
         :param pipeline: list of aggregation commands,
             see: https://www.mongodb.com/docs/manual/core/aggregation-pipeline/
         :param suppl_pipeline: unpacked list of pipeline (added to support deprecation of the old interface)
@@ -1380,7 +1388,18 @@ def aggregate(self, pipeline, *suppl_pipeline, **kwargs):
         if self._skip is not None:
             initial_pipeline.append({"$skip": self._skip})
 
-        final_pipeline = initial_pipeline + user_pipeline
+        # geoNear and collStats must be the first stages in the pipeline if present
+        first_step = []
+        new_user_pipeline = []
+        for step_step in user_pipeline:
+            if "$geoNear" in step_step:
+                first_step.append(step_step)
+            elif "$collStats" in step_step:
+                first_step.append(step_step)
+            else:
+                new_user_pipeline.append(step_step)
+
+        final_pipeline = first_step + initial_pipeline + new_user_pipeline
 
         collection = self._collection
         if self._read_preference is not None or self._read_concern is not None:
diff --git a/tests/queryset/test_queryset_aggregation.py b/tests/queryset/test_queryset_aggregation.py
@@ -1,4 +1,3 @@
-import unittest
 import warnings
 
 from pymongo.read_preferences import ReadPreference
@@ -294,6 +293,44 @@ class Person(Document):
 
         assert list(data) == []
 
+    def test_aggregate_geo_near_used_as_initial_step_before_cls_implicit_step(self):
+        class BaseClass(Document):
+            meta = {"allow_inheritance": True}
 
-if __name__ == "__main__":
-    unittest.main()
+        class Aggr(BaseClass):
+            name = StringField()
+            c = PointField()
+
+        BaseClass.drop_collection()
+
+        x = Aggr(name="X", c=[10.634584, 35.8245029]).save()
+        y = Aggr(name="Y", c=[10.634584, 35.8245029]).save()
+
+        pipeline = [
+            {
+                "$geoNear": {
+                    "near": {"type": "Point", "coordinates": [10.634584, 35.8245029]},
+                    "distanceField": "c",
+                    "spherical": True,
+                }
+            }
+        ]
+        res = list(Aggr.objects.aggregate(*pipeline))
+        assert res == [
+            {"_cls": "BaseClass.Aggr", "_id": x.id, "c": 0.0, "name": "X"},
+            {"_cls": "BaseClass.Aggr", "_id": y.id, "c": 0.0, "name": "Y"},
+        ]
+
+    def test_aggregate_collstats_used_as_initial_step_before_cls_implicit_step(self):
+        class SomeDoc(Document):
+            name = StringField()
+
+        SomeDoc.drop_collection()
+
+        SomeDoc(name="X").save()
+        SomeDoc(name="Y").save()
+
+        pipeline = [{"$collStats": {"count": {}}}]
+        res = list(SomeDoc.objects.aggregate(pipeline))
+        assert len(res) == 1
+        assert res[0]["count"] == 2