team-decent
diff --git a/‎decent_bench/benchmark/_utils.py‎
Lines changed: 16 additions & 8 deletions b/‎decent_bench/benchmark/_utils.py‎
Lines changed: 16 additions & 8 deletions
diff --git a/‎decent_bench/costs/_base/_cost.py‎
Lines changed: 2 additions & 2 deletions b/‎decent_bench/costs/_base/_cost.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎decent_bench/costs/_base/_sum_cost.py‎
Lines changed: 7 additions & 2 deletions b/‎decent_bench/costs/_base/_sum_cost.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎decent_bench/datasets/_pytorch_handler.py‎
Lines changed: 5 additions & 3 deletions b/‎decent_bench/datasets/_pytorch_handler.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎decent_bench/distributed_algorithms.py‎
Lines changed: 17 additions & 35 deletions b/‎decent_bench/distributed_algorithms.py‎
Lines changed: 17 additions & 35 deletions
diff --git a/‎decent_bench/networks.py‎
Lines changed: 32 additions & 0 deletions b/‎decent_bench/networks.py‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎decent_bench/schemes.py‎
Lines changed: 1 addition & 1 deletion b/‎decent_bench/schemes.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/user.rst‎
Lines changed: 24 additions & 0 deletions b/‎docs/source/user.rst‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎test/test_cost_operators.py‎
Lines changed: 46 additions & 5 deletions b/‎test/test_cost_operators.py‎
Lines changed: 46 additions & 5 deletions
@@ -75,8 +75,8 @@ def model_gen() -> torch.nn.Module:
             )
 
         # Mypy cannot infer that cost_cls is PyTorchCost here
-        costs = [
-            cost_cls(  # type: ignore[call-arg]
+        pytorch_costs: list[PyTorchCost] = [
+            PyTorchCost(
                 dataset=p,
                 model=model_gen(),
                 loss_fn=torch.nn.CrossEntropyLoss(),
@@ -86,11 +86,15 @@ def model_gen() -> torch.nn.Module:
             )
             for p in dataset.get_partitions()
         ]
+        costs: Sequence[Cost] = pytorch_costs
         x_optimal = None
     elif cost_cls is LogisticRegressionCost:
-        costs = [cost_cls(dataset=p, batch_size=batch_size) for p in dataset.get_partitions()]  # type: ignore[call-arg]
-        sum_cost = reduce(add, costs)
+        classification_costs: list[LogisticRegressionCost] = [
+            LogisticRegressionCost(dataset=p, batch_size=batch_size) for p in dataset.get_partitions()
+        ]
+        sum_cost = reduce(add, classification_costs)
         x_optimal = ca.accelerated_gradient_descent(sum_cost, x0=None, max_iter=50000, stop_tol=1e-100, max_tol=1e-16)
+        costs = classification_costs
     else:
         raise ValueError(f"Unsupported cost class: {cost_cls}")
 
@@ -158,15 +162,19 @@ def model_gen() -> torch.nn.Module:
                 output_size=1,
             )
 
-        costs = [
-            cost_cls(dataset=p, model=model_gen(), loss_fn=torch.nn.MSELoss(), batch_size=batch_size, device=device)  # type: ignore[call-arg]
+        pytorch_costs: list[PyTorchCost] = [
+            PyTorchCost(dataset=p, model=model_gen(), loss_fn=torch.nn.MSELoss(), batch_size=batch_size, device=device)
             for p in dataset.get_partitions()
         ]
+        costs: Sequence[Cost] = pytorch_costs
         x_optimal = None
     elif cost_cls is LinearRegressionCost:
-        costs = [cost_cls(dataset=p, batch_size=batch_size) for p in dataset.get_partitions()]  # type: ignore[call-arg]
-        sum_cost = reduce(add, costs)
+        regression_costs: list[LinearRegressionCost] = [
+            LinearRegressionCost(dataset=p, batch_size=batch_size) for p in dataset.get_partitions()
+        ]
+        sum_cost = reduce(add, regression_costs)
         x_optimal = ca.accelerated_gradient_descent(sum_cost, x0=None, max_iter=50000, stop_tol=1e-100, max_tol=1e-16)
+        costs = regression_costs
     else:
         raise ValueError(f"Unsupported cost class: {cost_cls}")
 
 
@@ -15,8 +15,8 @@ def _validate_cost_operation(
         self,
         other: object,
         *,
-        check_framework: bool = False,
-        check_device: bool = False,
+        check_framework: bool = True,
+        check_device: bool = True,
     ) -> None:
         """
         Validate that another object can participate in a binary cost operation.
 
@@ -25,15 +25,20 @@ class SumCost(Cost):
     """
 
     def __init__(self, costs: list[Cost]):
-        if not all(costs[0].shape == cf.shape for cf in costs):
-            raise ValueError("All cost functions must have the same domain shape")
+        if len(costs) == 0:
+            raise ValueError("SumCost must contain at least one cost function.")
+
         self.costs: list[Cost] = []
         for cf in costs:
             if isinstance(cf, SumCost):
                 self.costs.extend(cf.costs)
             else:
                 self.costs.append(cf)
 
+        first = self.costs[0]
+        for cf in self.costs[1:]:
+            first._validate_cost_operation(cf)  # noqa: SLF001
+
     @property
     def shape(self) -> tuple[int, ...]:
         return self.costs[0].shape
 
@@ -2,6 +2,7 @@
 
 import random
 from collections import defaultdict
+from collections.abc import Iterable
 from typing import TYPE_CHECKING, Any, cast
 
 import decent_bench.utils.interoperability as iop
@@ -161,9 +162,10 @@ def _heterogeneous_split(self) -> list[Dataset]:
         """
         # Group indices by class in a single pass
         class_to_indices: dict[int, list[int]] = defaultdict(list)
-        for idx, (_, label) in enumerate(self.torch_dataset):  # type: ignore[misc, arg-type]
-            if label in class_to_indices or len(class_to_indices) < (self.n_partitions * self.targets_per_partition):  # type: ignore[has-type]
-                class_to_indices[label].append(idx)  # type: ignore[has-type]
+        for idx, sample in enumerate(cast("Iterable[Any]", self.torch_dataset)):
+            _, label = cast("tuple[Any, int]", sample)
+            if label in class_to_indices or len(class_to_indices) < (self.n_partitions * self.targets_per_partition):
+                class_to_indices[label].append(idx)
 
         # Create partitions from class-grouped indices
         idx_partitions = []
 
@@ -1,12 +1,10 @@
-import random
 from abc import ABC, abstractmethod
 from collections.abc import Callable, Iterable, Sequence
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Final, cast, final
 
 import decent_bench.utils.algorithm_helpers as alg_helpers
 import decent_bench.utils.interoperability as iop
-from decent_bench.costs import EmpiricalRiskCost
 from decent_bench.networks import FedNetwork, Network, P2PNetwork
 from decent_bench.schemes import ClientSelectionScheme, UniformClientSelection
 from decent_bench.utils._tags import tags
@@ -179,6 +177,10 @@ class FedAlgorithm(Algorithm[FedNetwork]):
     def _cleanup_agents(self, network: FedNetwork) -> Iterable["Agent"]:
         return [network.server(), *network.clients()]
 
+    def server_broadcast(self, network: FedNetwork, selected_clients: Sequence["Agent"]) -> None:
+        """Send the current server model to the selected clients."""
+        network.send(sender=network.server(), receiver=selected_clients, msg=network.server().x)
+
     def select_clients(
         self,
         clients: Sequence["Agent"],
@@ -281,10 +283,10 @@ class FedAvg(FedAlgorithm):
     round :math:`k`. In FedAvg, each selected client performs ``num_local_epochs`` local SGD epochs, then the server
     aggregates the final local models to form :math:`\mathbf{x}_{k+1}`. The aggregation uses client weights, defaulting
     to data-size weights when ``client_weights`` is not provided. Client selection (subsampling) defaults to uniform
-    sampling with fraction 1.0 (all active clients) and can be customized via ``selection_scheme``. For
-    :class:`~decent_bench.costs.EmpiricalRiskCost`, local updates use mini-batches of size
-    :attr:`EmpiricalRiskCost.batch_size <decent_bench.costs.EmpiricalRiskCost.batch_size>`; for generic costs, local
-    updates use full-batch gradients.
+    sampling with fraction 1.0 (all active clients) and can be customized via ``selection_scheme``. Costs that
+    preserve the :class:`~decent_bench.costs.EmpiricalRiskCost` abstraction use client-side mini-batches of size
+    :attr:`EmpiricalRiskCost.batch_size <decent_bench.costs.EmpiricalRiskCost.batch_size>`; generic cost wrappers
+    fall back to full-gradient local updates.
     """
 
     # C=0.1; batch size= inf/10/50 (dataset sizes are bigger; normally 1/10 of the total dataset).
@@ -323,46 +325,28 @@ def step(self, network: FedNetwork, iteration: int) -> None:  # noqa: D102
         if not selected_clients:
             return
 
-        self._sync_server_to_clients(network, selected_clients)
+        self.server_broadcast(network, selected_clients)
         self._run_local_updates(network, selected_clients)
         self.aggregate(network, selected_clients)
 
-    def _sync_server_to_clients(self, network: FedNetwork, selected_clients: Sequence["Agent"]) -> None:
-        network.send(sender=network.server(), receiver=selected_clients, msg=network.server().x)
-
     def _run_local_updates(self, network: FedNetwork, selected_clients: Sequence["Agent"]) -> None:
         for client in selected_clients:
             client.x = self._compute_local_update(client, network.server())
             network.send(sender=client, receiver=network.server(), msg=client.x)
 
     def _compute_local_update(self, client: "Agent", server: "Agent") -> "Array":
-        local_x = iop.copy(client.messages[server]) if server in client.messages else iop.copy(client.x)
-        if isinstance(client.cost, EmpiricalRiskCost):
-            cost = client.cost
-            n_samples = cost.n_samples
-            return self._epoch_minibatch_update(cost, local_x, cost.batch_size, n_samples)
+        """
+        Run local gradient steps using the batching semantics of ``client.cost.gradient``.
 
+        Costs that preserve the empirical-risk abstraction default ``gradient`` to ``indices="batch"``, so FedAvg
+        performs mini-batch local updates automatically. Generic costs keep their usual full-gradient behavior.
+        """
+        local_x = iop.copy(client.messages[server]) if server in client.messages else iop.copy(client.x)
         for _ in range(self.num_local_epochs):
             grad = client.cost.gradient(local_x)
             local_x -= self.step_size * grad
         return local_x
 
-    def _epoch_minibatch_update(
-        self,
-        cost: EmpiricalRiskCost,
-        local_x: "Array",
-        per_client_batch: int,
-        n_samples: int,
-    ) -> "Array":
-        for _ in range(self.num_local_epochs):
-            indices = list(range(n_samples))
-            random.shuffle(indices)
-            for start in range(0, n_samples, per_client_batch):
-                batch_indices = indices[start : start + per_client_batch]
-                grad = cost.gradient(local_x, indices=batch_indices)
-                local_x -= self.step_size * grad
-        return local_x
-
 
 @tags("federated")
 @dataclass(eq=False)
@@ -1314,10 +1298,8 @@ def step(self, network: P2PNetwork, _: int) -> None:  # noqa: D102
                 network.send(i, j, s)
         for i in network.active_agents():
             for j, msg in i.messages.items():
-                i.aux_vars["z_y"][j] = (1 - self.alpha) * i.aux_vars["z_y"][j] \
-                                        + self.alpha * msg[0]  # fmt: skip
-                i.aux_vars["z_s"][j] = (1 - self.alpha) * i.aux_vars["z_s"][j] \
-                                        + self.alpha * msg[1]  # fmt: skip
+                i.aux_vars["z_y"][j] = (1 - self.alpha) * i.aux_vars["z_y"][j] + self.alpha * msg[0]
+                i.aux_vars["z_s"][j] = (1 - self.alpha) * i.aux_vars["z_s"][j] + self.alpha * msg[1]
 
 
 ADMMTracking = ATG  # alias
 
@@ -72,6 +72,7 @@ def __init__(
         agent_ids = [agent.id for agent in graph.nodes()]
         if len(agent_ids) != len(set(agent_ids)):
             raise ValueError("Agent IDs must be unique")
+        self._validate_agent_cost_compatibility(graph)
 
         self._graph = graph
         self._message_noise = self._initialize_message_schemes(message_noise, "noise", NoiseScheme, NoNoise)
@@ -84,6 +85,37 @@ def __init__(
         self._buffer_messages = buffer_messages
         self._iteration = 0  # Current iteration, updated by the algorithm
 
+    @staticmethod
+    def _validate_agent_cost_compatibility(graph: AgentGraph) -> None:
+        """
+        Validate that all agents' costs share the same shape, framework, and device.
+
+        Raises:
+            ValueError: If agents in the graph have mismatching cost shape, framework, or device.
+
+        """
+        agents = list(graph.nodes())
+        if len(agents) <= 1:
+            return
+
+        first_cost = agents[0].cost
+        first_signature = (first_cost.shape, first_cost.framework, first_cost.device)
+        mismatches: list[str] = []
+        for agent in agents[1:]:
+            signature = (agent.cost.shape, agent.cost.framework, agent.cost.device)
+            if signature != first_signature:
+                mismatches.append(
+                    f"agent {agent.id}: shape={agent.cost.shape}, framework={agent.cost.framework}, "
+                    f"device={agent.cost.device}"
+                )
+
+        if mismatches:
+            raise ValueError(
+                "All agents in a network must have costs with the same shape, framework, and device. "
+                f"Expected shape={first_cost.shape}, framework={first_cost.framework}, "
+                f"device={first_cost.device}; mismatches: {'; '.join(mismatches)}"
+            )
+
     def _initialize_message_schemes(
         self,
         scheme: object,
 
@@ -178,7 +178,7 @@ def __init__(self, n_significant_digits: int):
         self.n_significant_digits = n_significant_digits
 
     def compress(self, msg: Array) -> Array:  # noqa: D102
-        res = np.vectorize(lambda x: float(f"%.{self.n_significant_digits - 1}e" % x))(iop.to_numpy(msg))  # noqa: RUF073
+        res = np.vectorize(lambda x: float(format(x, f".{self.n_significant_digits - 1}e")))(iop.to_numpy(msg))
         return iop.to_array_like(res, msg)
 
 
 
@@ -128,6 +128,30 @@ Classification
            :module: decent_bench.costs
 
 
+PyTorchCost regularization
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+When combining :class:`~decent_bench.costs.PyTorchCost` with one of the
+built-in regularizers, instantiate the regularizer with the same framework
+and device as the empirical cost:
+
+.. code-block:: python
+
+    from decent_bench.costs import L2RegularizerCost
+    from decent_bench.utils.types import SupportedFrameworks
+
+    reg = L2RegularizerCost(
+        shape=cost.shape,
+        framework=SupportedFrameworks.PYTORCH,
+        device=cost.device,
+    )
+    objective = cost + reg
+
+This preserves compatibility with the PyTorch empirical objective and keeps
+the resulting objective in the empirical, batch-compatible abstraction.
+It is convenient for composition, but it is not necessarily the most
+efficient option compared with native framework-specific regularization.
+
+
 Execution settings
 ------------------
 Configure settings for metrics, trials, statistical confidence level, logging, and multiprocessing.
 
@@ -3,6 +3,7 @@
 
 import decent_bench.utils.interoperability as iop
 from decent_bench.costs import Cost, L1RegularizerCost, L2RegularizerCost, QuadraticCost, SumCost
+from decent_bench.utils.types import SupportedDevices, SupportedFrameworks
 
 
 def _simple_quadratic(A_scale: float, b_scale: float, c: float = 0.0) -> QuadraticCost:
@@ -12,20 +13,28 @@ def _simple_quadratic(A_scale: float, b_scale: float, c: float = 0.0) -> Quadrat
 
 
 class _SimpleCost(Cost):
-    def __init__(self, scale: float):
+    def __init__(
+        self,
+        scale: float,
+        *,
+        framework: SupportedFrameworks = SupportedFrameworks.NUMPY,
+        device: SupportedDevices = SupportedDevices.CPU,
+    ):
         self.scale = scale
+        self._framework = framework
+        self._device = device
 
     @property
     def shape(self) -> tuple[int, ...]:
         return (2,)
 
     @property
-    def framework(self) -> str:
-        return "numpy"
+    def framework(self) -> SupportedFrameworks:
+        return self._framework
 
     @property
-    def device(self) -> str | None:
-        return "cpu"
+    def device(self) -> SupportedDevices:
+        return self._device
 
     @property
     def m_smooth(self) -> float:
@@ -182,3 +191,35 @@ def test_cost_scalar_ops_reject_invalid_inputs() -> None:
         _ = cost / 0.0
     with pytest.raises(TypeError):
         _ = 0.0 / cost
+
+
+def test_cost_addition_rejects_mismatched_frameworks() -> None:
+    cost_a = _SimpleCost(scale=1.0, framework=SupportedFrameworks.NUMPY)
+    cost_b = _SimpleCost(scale=2.0, framework=SupportedFrameworks.PYTORCH)
+
+    with pytest.raises(ValueError, match="Mismatching frameworks"):
+        _ = cost_a + cost_b
+
+
+def test_cost_addition_rejects_mismatched_devices() -> None:
+    cost_a = _SimpleCost(scale=1.0, device=SupportedDevices.CPU)
+    cost_b = _SimpleCost(scale=2.0, device=SupportedDevices.GPU)
+
+    with pytest.raises(ValueError, match="Mismatching devices"):
+        _ = cost_a + cost_b
+
+
+def test_sum_cost_rejects_mismatched_frameworks() -> None:
+    cost_a = _SimpleCost(scale=1.0, framework=SupportedFrameworks.NUMPY)
+    cost_b = _SimpleCost(scale=2.0, framework=SupportedFrameworks.PYTORCH)
+
+    with pytest.raises(ValueError, match="Mismatching frameworks"):
+        SumCost([cost_a, cost_b])
+
+
+def test_sum_cost_rejects_mismatched_devices() -> None:
+    cost_a = _SimpleCost(scale=1.0, device=SupportedDevices.CPU)
+    cost_b = _SimpleCost(scale=2.0, device=SupportedDevices.GPU)
+
+    with pytest.raises(ValueError, match="Mismatching devices"):
+        SumCost([cost_a, cost_b])