Merge remote-tracking branch 'origin/claude/modern-fetch-api' into docs-2.0-migration

dimitri-yatsenko · dimitri-yatsenko · commit d5c275dcbcea · 2026-01-06T11:50:06.000-06:00
diff --git a/src/datajoint/expression.py b/src/datajoint/expression.py
@@ -306,7 +306,7 @@ def join(self, other, semantic_check=True, left=False, allow_nullable_pk=False):
         :param allow_nullable_pk: If True, bypass the left join constraint that requires
             self to determine other. When bypassed, the result PK is the union of both
             operands' PKs, and PK attributes from the right operand could be NULL.
-            Used internally by aggregation with keep_all_rows=True.
+            Used internally by aggregation when exclude_nonmatching=False.
         :return: The joined QueryExpression
 
         a * b is short for a.join(b)
@@ -538,21 +538,33 @@ def proj(self, *attributes, **named_attributes):
         )
         return result
 
-    def aggr(self, group, *attributes, keep_all_rows=False, **named_attributes):
+    def aggr(self, group, *attributes, exclude_nonmatching=False, **named_attributes):
         """
-        Aggregation of the type U('attr1','attr2').aggr(group, computation="QueryExpression")
-        has the primary key ('attr1','attr2') and performs aggregation computations for all matching elements of `group`.
+        Aggregation/grouping operation, similar to proj but with computations over a grouped relation.
 
-        :param group:  The query expression to be aggregated.
-        :param keep_all_rows: True=keep all the rows from self. False=keep only rows that match entries in group.
+        By default, keeps all rows from self (like proj). Use exclude_nonmatching=True to
+        keep only rows that have matches in group.
+
+        :param group: The query expression to be aggregated.
+        :param exclude_nonmatching: If True, exclude rows from self that have no matching
+            entries in group (INNER JOIN). Default False keeps all rows (LEFT JOIN).
         :param named_attributes: computations of the form new_attribute="sql expression on attributes of group"
         :return: The derived query expression
+
+        Example::
+
+            # Count sessions per subject (keeps all subjects, even those with 0 sessions)
+            Subject.aggr(Session, n="count(*)")
+
+            # Count sessions per subject (only subjects with at least one session)
+            Subject.aggr(Session, n="count(*)", exclude_nonmatching=True)
         """
         if Ellipsis in attributes:
             # expand ellipsis to include only attributes from the left table
             attributes = set(attributes)
             attributes.discard(Ellipsis)
             attributes.update(self.heading.secondary_attributes)
+        keep_all_rows = not exclude_nonmatching
         return Aggregation.create(self, group=group, keep_all_rows=keep_all_rows).proj(*attributes, **named_attributes)
 
     aggregate = aggr  # alias for aggr
@@ -1170,12 +1182,14 @@ def aggr(self, group, **named_attributes):
         Aggregation of the type U('attr1','attr2').aggr(group, computation="QueryExpression")
         has the primary key ('attr1','attr2') and performs aggregation computations for all matching elements of `group`.
 
+        Note: exclude_nonmatching is always True for dj.U (cannot keep all rows from infinite set).
+
         :param group:  The query expression to be aggregated.
         :param named_attributes: computations of the form new_attribute="sql expression on attributes of group"
         :return: The derived query expression
         """
-        if named_attributes.get("keep_all_rows", False):
-            raise DataJointError("Cannot set keep_all_rows=True when aggregating on a universal set.")
+        if named_attributes.pop("exclude_nonmatching", True) is False:
+            raise DataJointError("Cannot set exclude_nonmatching=False when aggregating on a universal set.")
 
         if inspect.isclass(group) and issubclass(group, QueryExpression):
             group = group()
diff --git a/src/datajoint/version.py b/src/datajoint/version.py
@@ -1,4 +1,4 @@
 # version bump auto managed by Github Actions:
 # label_prs.yaml(prep), release.yaml(bump), post_release.yaml(edit)
 # manually set this version will be eventually overwritten by the above actions
-__version__ = "2.0.0a15"
+__version__ = "2.0.0a16"
diff --git a/tests/integration/test_relational_operand.py b/tests/integration/test_relational_operand.py
@@ -230,10 +230,12 @@ def test_heading_repr(schema_simp_pop):
 
 
 def test_aggregate(schema_simp_pop):
-    x = B().aggregate(B.C())
+    # With exclude_nonmatching=True, only rows with matches are kept (INNER JOIN)
+    x = B().aggregate(B.C(), exclude_nonmatching=True)
     assert len(x) == len(B() & B.C())
 
-    x = B().aggregate(B.C(), keep_all_rows=True)
+    # Default behavior now keeps all rows (LEFT JOIN)
+    x = B().aggregate(B.C())
     assert len(x) == len(B())  # test LEFT join
 
     assert len((x & "id_b=0").to_arrays()) == len(B() & "id_b=0")  # test restricted aggregation
@@ -244,7 +246,6 @@ def test_aggregate(schema_simp_pop):
         count="count(id_c)",
         mean="avg(value)",
         max="max(value)",
-        keep_all_rows=True,
     )
     assert len(x) == len(B())
     y = x & "mean>0"  # restricted aggregation
@@ -260,12 +261,14 @@ def test_aggregate(schema_simp_pop):
 
 
 def test_aggr(schema_simp_pop):
-    x = B.aggr(B.C)
+    # With exclude_nonmatching=True, only rows with matches are kept (INNER JOIN)
+    x = B.aggr(B.C, exclude_nonmatching=True)
     l1 = len(x)
     l2 = len(B & B.C)
     assert l1 == l2
 
-    x = B().aggr(B.C(), keep_all_rows=True)
+    # Default behavior now keeps all rows (LEFT JOIN)
+    x = B().aggr(B.C())
     assert len(x) == len(B())  # test LEFT join
 
     assert len((x & "id_b=0").to_arrays()) == len(B() & "id_b=0")  # test restricted aggregation
@@ -276,7 +279,6 @@ def test_aggr(schema_simp_pop):
         count="count(id_c)",
         mean="avg(value)",
         max="max(value)",
-        keep_all_rows=True,
     )
     assert len(x) == len(B())
     y = x & "mean>0"  # restricted aggregation
diff --git a/tests/integration/test_university.py b/tests/integration/test_university.py
@@ -138,23 +138,29 @@ def test_union(schema_uni):
 
 
 def test_aggr(schema_uni):
+    # Default: keeps all courses (some may have NULL avg_grade if no grades)
     avg_grade_per_course = Course.aggr(Grade * LetterGrade, avg_grade="round(avg(points), 2)")
     assert len(avg_grade_per_course) == 45
 
-    # GPA
-    student_gpa = Student.aggr(Course * Grade * LetterGrade, gpa="round(sum(points*credits)/sum(credits), 2)")
+    # GPA - use exclude_nonmatching=True to only include students with grades
+    student_gpa = Student.aggr(
+        Course * Grade * LetterGrade,
+        gpa="round(sum(points*credits)/sum(credits), 2)",
+        exclude_nonmatching=True,
+    )
     gpa = student_gpa.to_arrays("gpa")
-    assert len(gpa) == 261
+    assert len(gpa) == 261  # only students with grades
     assert 2 < gpa.mean() < 3
 
     # Sections in biology department with zero students in them
-    section = (Section & {"dept": "BIOL"}).aggr(Enroll, n="count(student_id)", keep_all_rows=True) & "n=0"
+    # aggr now keeps all rows by default (like proj), so sections with 0 enrollments are included
+    section = (Section & {"dept": "BIOL"}).aggr(Enroll, n="count(student_id)") & "n=0"
     assert len(set(section.to_arrays("dept"))) == 1
     assert len(section) == 17
     assert bool(section)
 
     # Test correct use of ellipses in a similar query
-    section = (Section & {"dept": "BIOL"}).aggr(Grade, ..., n="count(student_id)", keep_all_rows=True) & "n>1"
+    section = (Section & {"dept": "BIOL"}).aggr(Grade, ..., n="count(student_id)") & "n>1"
     assert not any(name in section.heading.names for name in Grade.heading.secondary_attributes)
     assert len(set(section.to_arrays("dept"))) == 1
     assert len(section) == 168