posit-dev
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 22 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 7 additions & 0 deletions b/‎Makefile‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎pointblank/_agg.py‎
Lines changed: 120 additions & 0 deletions b/‎pointblank/_agg.py‎
Lines changed: 120 additions & 0 deletions
@@ -45,6 +45,28 @@ The tests are located in the `tests` folder and we use `pytest` for running them
 
 If you create new tests involving snapshots, please ensure that the resulting snapshots are relatively small. After adding snapshots, use `make test-update` (this runs `pytest --snapshot-update`). A subsequent use of `make test` should pass without any issues.
 
+### Creating Aggregation Methods
+
+Aggregation methods are generated dynamically! This is done because they all have the same signature and they're registered on the `Validate` class in the same way. So, to add a new method, go to `pointblank/_agg.py` and add either a comparison or statistical aggregation function.
+
+Comparison functions are defined by `comp_*`, for example `comp_gt` for "greater than". Statistical functions are defined by `agg_*`, for example `agg_sum` for "sum". At build time, these are registered and a grid of all combinations are created:
+```{python}
+Aggregator = Callable[[nw.DataFrame], Any]
+Comparator = Callable[[Any, Any], bool]
+
+AGGREGATOR_REGISTRY: dict[str, Aggregator] = {}
+
+COMPARATOR_REGISTRY: dict[str, Comparator] = {}
+```
+
+Once you've added a new method(s), run `make pyi` to generate the updated type stubs in `pointblank/validate.pyi` which contains the new signatures for the aggregation methods. At runtime, or import time to be precise, the methods are added to the `Validate` class and resolved internally through the registry.
+```{python}
+# pointblank/validate.py
+for method in load_validation_method_grid():  # -> `col_sum_*`, `col_mean_*`, etc.
+    setattr(Validate, method, make_agg_validator(method))
+```
+
+At this point, the methods will exist AND the docs/signature are loaded properly in the type checker and IDE/LSPs, which is very important for usability.
 ### Linting and Type Checking
 
 We use `ruff` for linting, the settings used are fairly loose and objective. Linting is run in pre-commit in CI. You can run it locally with `make lint`. Type checking is currently not enforced, but we intend on gradually typing the codebase. You can run `make type` to run Astral's new experimental type checker `ty`. Feel free to leverage type hints and occasionally type checking but it's not obligatory at this time.
@@ -1,5 +1,12 @@
 .PHONY: check
 
+.PHONY: pyi
+pyi: ## Generate .pyi stub files
+	@uv run stubgen ./pointblank/validate.py \
+		--include-private \
+		-o  .
+	@uv run scripts/generate_agg_validate_pyi.py
+
 .PHONY: test
 test:
 	@uv run pytest tests \
 
@@ -0,0 +1,120 @@
+from __future__ import annotations
+
+import itertools
+from collections.abc import Callable
+from typing import Any
+
+import narwhals as nw
+
+# TODO: Should take any frame type
+Aggregator = Callable[[nw.DataFrame], float | int]
+Comparator = Callable[[Any, Any, Any], bool]
+
+AGGREGATOR_REGISTRY: dict[str, Aggregator] = {}
+
+COMPARATOR_REGISTRY: dict[str, Comparator] = {}
+
+
+def register(fn):
+    """Register an aggregator or comparator function."""
+    name: str = fn.__name__
+    if name.startswith("comp_"):
+        COMPARATOR_REGISTRY[name.removeprefix("comp_")] = fn
+    elif name.startswith("agg_"):
+        AGGREGATOR_REGISTRY[name.removeprefix("agg_")] = fn
+    else:
+        raise NotImplementedError  # pragma: no cover
+    return fn
+
+
+## Aggregator Functions
+@register
+def agg_sum(column: nw.DataFrame) -> float:
+    return column.select(nw.all().sum()).item()
+
+
+@register
+def agg_avg(column: nw.DataFrame) -> float:
+    return column.select(nw.all().mean()).item()
+
+
+@register
+def agg_sd(column: nw.DataFrame) -> float:
+    return column.select(nw.all().std()).item()
+
+
+## Comparator functions:
+@register
+def comp_eq(real: float, lower: float, upper: float) -> bool:
+    if lower == upper:
+        return bool(real == lower)
+    return _generic_between(real, lower, upper)
+
+
+@register
+def comp_gt(real: float, lower: float, upper: float) -> bool:
+    return bool(real > lower)
+
+
+@register
+def comp_ge(real: Any, lower: float, upper: float) -> bool:
+    return bool(real >= lower)
+
+
+@register
+def comp_lt(real: float, lower: float, upper: float) -> bool:
+    return bool(real < upper)
+
+
+@register
+def comp_le(real: float, lower: float, upper: float) -> bool:
+    return bool(real <= upper)
+
+
+def _generic_between(real: Any, lower: Any, upper: Any) -> bool:
+    """Call if comparator needs to check between two values."""
+    return bool(lower <= real <= upper)
+
+
+def resolve_agg_registries(name: str) -> tuple[Aggregator, Comparator]:
+    """Resolve the assertion name to a valid aggregator
+
+    Args:
+        name (str): The name of the assertion.
+
+    Returns:
+        tuple[Aggregator, Comparator]: The aggregator and comparator functions.
+    """
+    name = name.removeprefix("col_")
+    agg_name, comp_name = name.split("_")[-2:]
+
+    aggregator = AGGREGATOR_REGISTRY.get(agg_name)
+    comparator = COMPARATOR_REGISTRY.get(comp_name)
+
+    if aggregator is None:  # pragma: no cover
+        raise ValueError(f"Aggregator '{agg_name}' not found in registry.")
+
+    if comparator is None:  # pragma: no cover
+        raise ValueError(f"Comparator '{comp_name}' not found in registry.")
+
+    return aggregator, comparator
+
+
+def is_valid_agg(name: str) -> bool:
+    try:
+        resolve_agg_registries(name)
+        return True
+    except ValueError:
+        return False
+
+
+def load_validation_method_grid() -> tuple[str, ...]:
+    """Generate all possible validation methods."""
+    methods = []
+    for agg_name, comp_name in itertools.product(
+        AGGREGATOR_REGISTRY.keys(), COMPARATOR_REGISTRY.keys()
+    ):
+        method = f"col_{agg_name}_{comp_name}"
+        methods.append(method)
+
+    return tuple(methods)