facebookresearch
diff --git a/‎CHANGELOG.md‎
Lines changed: 27 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 27 additions & 1 deletion
diff --git a/‎balance/__init__.py‎
Lines changed: 3 additions & 2 deletions b/‎balance/__init__.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎balance/adjustment.py‎
Lines changed: 16 additions & 18 deletions b/‎balance/adjustment.py‎
Lines changed: 16 additions & 18 deletions
diff --git a/‎balance/balancedf_class.py‎
Lines changed: 31 additions & 33 deletions b/‎balance/balancedf_class.py‎
Lines changed: 31 additions & 33 deletions
@@ -1,4 +1,4 @@
-# 0.12.x (2025-11-16)
+# 0.12.x (2025-11-21)
 
 > TODO: update 0.12.x to 0.13.0 before release.
 
@@ -65,6 +65,25 @@
     (`weighting_methods/cbps.py`, `weighting_methods/ipw.py`,
     `weighting_methods/poststratify.py`, `weighting_methods/rake.py`), and
     datasets module (`datasets/__init__.py`)
+  - **Modernized type hints to PEP 604 syntax**: Updated all type annotations
+    across 11 files to use the newer PEP 604 union syntax (`X | Y` instead of
+    `Union[X, Y]` and `X | None` instead of `Optional[X]`), improving code
+    readability and aligning with Python 3.10+ typing conventions. Updated
+    `from __future__ import` statements to use `annotations` instead of the
+    older `absolute_import, division, print_function, unicode_literals`.
+    Removed unnecessary `Union` and `Optional` imports from `typing`. Files
+    updated: `__init__.py`, `adjustment.py`, `balancedf_class.py`, `cli.py`,
+    `datasets/__init__.py`, `sample_class.py`,
+    `stats_and_plots/weighted_comparisons_stats.py`,
+    `stats_and_plots/weighted_stats.py`, `stats_and_plots/weights_stats.py`,
+    `util.py`, `weighting_methods/ipw.py`.
+  - **Important compatibility note**:
+    Type alias definitions in `typing.py` retain `Union` syntax for Python 3.9
+    compatibility, as the `|` operator for type aliases only works at runtime
+    in Python 3.10+. Added comprehensive inline documentation explaining this
+    limitation and the distinction between type annotations (which support `|`
+    with `from __future__ import annotations`) and type alias assignments
+    (which require `Union` for runtime evaluation in Python 3.9).
   - Fixed missing `Any` import in `weighted_comparisons_plots.py` to resolve
     pyre-fixme[10] error
   - Added comprehensive type annotations for previously untyped parameters and
@@ -79,6 +98,13 @@
   - Improved `quantize` function: preserves column ordering and replaces
     assertions with proper TypeError exceptions
     ([#133](https://github.com/facebookresearch/balance/pull/133)).
+- **Statistical Functions**
+  - **Fixed division by zero in `asmd_improvement()`**: Added safety check to
+    prevent RuntimeWarning when `asmd_mean_before` is zero or very close to zero
+    (< 1e-10). The function now returns `0.0` (representing 0% improvement) when
+    the sample was already perfectly matched to the target before adjustment,
+    which is the semantically correct result. This eliminates the "invalid value
+    encountered in scalar divide" warning that appeared in test runs.
 - **Weighting Methods**
   - `rake()` and `poststratify()` now honour `weight_trimming_mean_ratio` and
     `weight_trimming_percentile`, trimming and renormalising weights through the
 
@@ -5,8 +5,9 @@
 
 # pyre-strict
 
+from __future__ import annotations
+
 import logging
-from typing import Optional
 
 from balance.balancedf_class import (  # noqa
     BalanceCovarsDF,  # noqa
@@ -41,7 +42,7 @@ def help() -> None:
 
 
 def setup_logging(
-    logger_name: Optional[str] = __package__,
+    logger_name: str | None = __package__,
     level: str = "INFO",
     removeHandler: bool = True,
 ) -> logging.Logger:
 
@@ -5,11 +5,11 @@
 
 # pyre-strict
 
-from __future__ import absolute_import, division, print_function, unicode_literals
+from __future__ import annotations
 
 import logging
 
-from typing import Any, Callable, Dict, List, Literal, Tuple, Union
+from typing import Any, Callable, Dict, Literal, Tuple
 
 import numpy as np
 import numpy.typing as npt
@@ -38,9 +38,7 @@
 }
 
 
-def _validate_limit(
-    limit: Union[float, int, None], n_weights: int
-) -> Union[float, None]:
+def _validate_limit(limit: float | int | None, n_weights: int) -> float | None:
     """Validate and adjust a percentile limit for use with scipy.stats.mstats.winsorize.
 
     This function prepares percentile limits for winsorization by:
@@ -88,13 +86,13 @@ def _validate_limit(
 
 
 def trim_weights(
-    weights: Union[pd.Series, npt.NDArray],
+    weights: pd.Series | npt.NDArray,
     # TODO: add support to more types of input weights? (e.g. list? other?)
-    weight_trimming_mean_ratio: Union[float, int, None] = None,
-    weight_trimming_percentile: Union[float, None] = None,
+    weight_trimming_mean_ratio: float | int | None = None,
+    weight_trimming_percentile: float | None = None,
     verbose: bool = False,
     keep_sum_of_weights: bool = True,
-    target_sum_weights: Union[float, int, np.floating, None] = None,
+    target_sum_weights: float | int | np.floating | None = None,
 ) -> pd.Series:
     """Trim extreme weights using mean ratio clipping or percentile-based winsorization.
 
@@ -132,22 +130,22 @@ def trim_weights(
     desired total.
 
     Args:
-        weights (Union[pd.Series, np.ndarray]): Weights to trim. np.ndarray will be
+        weights (pd.Series | np.ndarray): Weights to trim. np.ndarray will be
             converted to pd.Series internally.
-        weight_trimming_mean_ratio (Union[float, int], optional): Ratio for upper bound
+        weight_trimming_mean_ratio (float | int | None, optional): Ratio for upper bound
             clipping as mean(weights) * ratio. Mutually exclusive with
             weight_trimming_percentile. Defaults to None.
-        weight_trimming_percentile (Union[float, Tuple[float, float]], optional):
+        weight_trimming_percentile (float | tuple[float, float] | None, optional):
             Percentile limits for winsorization. Value(s) must be between 0 and 1.
             - Single float: Symmetric winsorization on both tails
-            - Tuple[float, float]: (lower_percentile, upper_percentile) for
+            - tuple[float, float]: (lower_percentile, upper_percentile) for
               independent control of each tail
             Mutually exclusive with weight_trimming_mean_ratio. Defaults to None.
         verbose (bool, optional): Whether to log details about the trimming process.
             Defaults to False.
         keep_sum_of_weights (bool, optional): Whether to rescale weights after trimming
             to preserve the original sum of weights. Defaults to True.
-        target_sum_weights (Union[float, int, np.floating, None], optional): If
+        target_sum_weights (float | int | np.floating | None, optional): If
             provided, rescale the trimmed weights so their sum equals this
             target. ``None`` (default) leaves the post-trimming sum unchanged.
 
@@ -309,14 +307,14 @@ def trim_weights(
 
 
 def default_transformations(
-    dfs: Union[Tuple[pd.DataFrame, ...], List[pd.DataFrame]],
+    dfs: tuple[pd.DataFrame, ...] | list[pd.DataFrame],
 ) -> Dict[str, Callable[..., Any]]:
     """
     Apply default transformations to dfs, i.e.
     quantize to numeric columns and fct_lump to non-numeric and boolean
 
     Args:
-        dfs (Union[Tuple[pd.DataFrame, ...], List[pd.DataFrame]]): A list or tuple of dataframes
+        dfs (tuple[pd.DataFrame, ...] | list[pd.DataFrame]): A list or tuple of dataframes
 
     Returns:
         Dict[str, Callable]: Dict of transformations
@@ -339,7 +337,7 @@ def default_transformations(
 
 def apply_transformations(
     dfs: Tuple[pd.DataFrame, ...],
-    transformations: Union[Dict[str, Callable[..., Any]], str, None],
+    transformations: Dict[str, Callable[..., Any]] | str | None,
     drop: bool = True,
 ) -> Tuple[pd.DataFrame, ...]:
     """Apply the transformations specified in transformations to all of the dfs
@@ -357,7 +355,7 @@ def apply_transformations(
 
     Args:
         dfs (Tuple[pd.DataFrame, ...]): The DataFrames on which to operate
-        transformations (Union[Dict[str, Callable], str, None]): Mapping from column name to function to apply.
+        transformations (Dict[str, Callable[..., Any]] | str | None): Mapping from column name to function to apply.
             Transformations of existing columns should be specified as functions
             of those columns (e.g. `lambda x: x*2`), whereas additions of new
             columns should be specified as functions of the DataFrame
 
@@ -5,8 +5,10 @@
 
 # pyre-strict
 
+from __future__ import annotations
+
 import logging
-from typing import Any, Dict, Literal, Optional, Tuple, Union
+from typing import Any, Dict, Literal, Tuple
 
 import numpy as np
 import numpy.typing as npt
@@ -106,14 +108,14 @@ def _sample(self: "BalanceDF") -> "Sample":
     @property
     def _weights(
         self: "BalanceDF",
-    ) -> Optional[pd.DataFrame]:
+    ) -> pd.DataFrame | None:
         """Access the weight_column in __sample.
 
         Args:
             self (BalanceDF): Object
 
         Returns:
-            Optional[pd.DataFrame]: The weights (with no column name)
+            pd.DataFrame | None: The weights (with no column name)
         """
         w = self._sample.weight_column
         return w.rename(None)
@@ -123,13 +125,11 @@ def _BalanceDF_child_from_linked_samples(
         self: "BalanceDF",
     ) -> Dict[
         str,
-        Union[
-            "BalanceDF",
-            "BalanceCovarsDF",
-            "BalanceWeightsDF",
-            "BalanceOutcomesDF",
-            None,
-        ],
+        "BalanceDF"
+        | "BalanceCovarsDF"
+        | "BalanceWeightsDF"
+        | "BalanceOutcomesDF"
+        | None,
     ]:
         """Returns a dict with self and the same type of BalanceDF_child when created from the linked samples.
 
@@ -270,13 +270,11 @@ def _BalanceDF_child_from_linked_samples(
         BalanceDF_child_method = self.__name
         d: Dict[
             str,
-            Union[
-                "BalanceDF",
-                "BalanceCovarsDF",
-                "BalanceWeightsDF",
-                "BalanceOutcomesDF",
-                None,
-            ],
+            "BalanceDF"
+            | "BalanceCovarsDF"
+            | "BalanceWeightsDF"
+            | "BalanceOutcomesDF"
+            | None,
         ] = {"self": self}
         d.update(
             {
@@ -492,7 +490,7 @@ def _descriptive_stats(
         )
         return wdf
 
-    def to_download(self: "BalanceDF", tempdir: Optional[str] = None) -> FileLink:
+    def to_download(self: "BalanceDF", tempdir: str | None = None) -> FileLink:
         """Creates a downloadable link of the DataFrame, with ids, of the BalanceDF object.
 
         File name starts with tmp_balance_out_, and some random file name (using :func:`uuid.uuid4`).
@@ -1012,7 +1010,7 @@ def mean_with_ci(
     # NOTE: Summary could return also an str in case it is overridden in other children's methods.
     def summary(
         self: "BalanceDF", on_linked_samples: bool = True
-    ) -> Union[pd.DataFrame, str]:
+    ) -> pd.DataFrame | str:
         """
         Returns a summary of the BalanceDF object.
 
@@ -1038,14 +1036,14 @@ def summary(
 
     def _get_df_and_weights(
         self: "BalanceDF",
-    ) -> Tuple[pd.DataFrame, Optional[npt.NDArray]]:
+    ) -> Tuple[pd.DataFrame, npt.NDArray | None]:
         """Extract covars df (after using model_matrix) and weights from a BalanceDF object.
 
         Args:
             self (BalanceDF): Object
 
         Returns:
-            Tuple[pd.DataFrame, Optional[np.ndarray]]:
+            Tuple[pd.DataFrame, np.ndarray | None]:
                 A pd.DataFrame output from running :func:`model_matrix`, and
                 A np.ndarray of weights from :func:`_weights`, or just None (if there are no weights).
         """
@@ -1119,7 +1117,7 @@ def _asmd_BalanceDF(
     def asmd(
         self: "BalanceDF",
         on_linked_samples: bool = True,
-        target: Optional["BalanceDF"] = None,
+        target: "BalanceDF" | None = None,
         aggregate_by_main_covar: bool = False,
         **kwargs: Any,
     ) -> pd.DataFrame:
@@ -1246,8 +1244,8 @@ def asmd(
 
     def asmd_improvement(
         self: "BalanceDF",
-        unadjusted: Optional["BalanceDF"] = None,
-        target: Optional["BalanceDF"] = None,
+        unadjusted: "BalanceDF" | None = None,
+        target: "BalanceDF" | None = None,
     ) -> np.float64:
         """Calculates the improvement in mean(asmd) from before to after applying some weight adjustment.
 
@@ -1374,10 +1372,10 @@ def _df_with_ids(self: "BalanceDF") -> pd.DataFrame:
 
     def to_csv(
         self: "BalanceDF",
-        path_or_buf: Optional[FilePathOrBuffer] = None,
+        path_or_buf: FilePathOrBuffer | None = None,
         *args: Any,
         **kwargs: Any,
-    ) -> Optional[str]:
+    ) -> str | None:
         """Write df with ids from BalanceDF to a comma-separated values (csv) file.
 
         Uses :func:`pd.DataFrame.to_csv`.
@@ -1414,9 +1412,9 @@ def __init__(self: "BalanceOutcomesDF", sample: Sample) -> None:
     #       this will also require to update _relative_response_rates a bit.
     def relative_response_rates(
         self: "BalanceOutcomesDF",
-        target: Union[bool, pd.DataFrame] = False,
+        target: bool | pd.DataFrame = False,
         per_column: bool = False,
-    ) -> Optional[pd.DataFrame]:
+    ) -> pd.DataFrame | None:
         """Produces a summary table of number of responses and proportion of completed responses.
 
         See :func:`general_stats.relative_response_rates`.
@@ -1513,7 +1511,7 @@ def relative_response_rates(
             self.df, df_target, per_column=per_column
         )
 
-    def target_response_rates(self: "BalanceOutcomesDF") -> Optional[pd.DataFrame]:
+    def target_response_rates(self: "BalanceOutcomesDF") -> pd.DataFrame | None:
         """Calculates relative_response_rates for the target in a Sample object.
 
         See :func:`general_stats.relative_response_rates`.
@@ -1569,7 +1567,7 @@ def target_response_rates(self: "BalanceOutcomesDF") -> Optional[pd.DataFrame]:
     #       The BalanceDF.summary method only returns a DataFrame. So it's a question
     #       what is the best way to structure this more generally.
     def summary(
-        self: "BalanceOutcomesDF", on_linked_samples: Optional[bool] = None
+        self: "BalanceOutcomesDF", on_linked_samples: bool | None = None
     ) -> str:
         """Produces summary printable string of a BalanceOutcomesDF object.
 
@@ -1831,8 +1829,8 @@ def _weights(self: "BalanceWeightsDF") -> None:
 
     def trim(
         self: "BalanceWeightsDF",
-        ratio: Optional[Union[float, int]] = None,
-        percentile: Optional[float] = None,
+        ratio: float | int | None = None,
+        percentile: float | None = None,
         keep_sum_of_weights: bool = True,
     ) -> None:
         """Trim weights in the sample object.
@@ -1859,7 +1857,7 @@ def trim(
         )
 
     def summary(
-        self: "BalanceWeightsDF", on_linked_samples: Optional[bool] = None
+        self: "BalanceWeightsDF", on_linked_samples: bool | None = None
     ) -> pd.DataFrame:
         """
         Generates a summary of a BalanceWeightsDF object.