diff --git a/doc/release_notes.rst b/doc/release_notes.rst index b5fc9b43..5283900d 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -2,6 +2,8 @@ Release Notes ============= .. Upcoming Version + +* Add simplify method to LinearExpression to combine duplicate terms * Add convenience function to create LinearExpression from constant * Fix compatibility for xpress versions below 9.6 (regression) * Performance: Up to 50x faster ``repr()`` for variables/constraints via O(log n) label lookup and direct numpy indexing diff --git a/linopy/constants.py b/linopy/constants.py index 3f6886ec..021a9a10 100644 --- a/linopy/constants.py +++ b/linopy/constants.py @@ -39,12 +39,14 @@ GROUP_DIM = "_group" FACTOR_DIM = "_factor" CONCAT_DIM = "_concat" +CV_DIM = "_cv" HELPER_DIMS: list[str] = [ TERM_DIM, STACKED_TERM_DIM, GROUPED_TERM_DIM, FACTOR_DIM, CONCAT_DIM, + CV_DIM, ] diff --git a/linopy/expressions.py b/linopy/expressions.py index 8e14dd0b..10e243de 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -67,6 +67,7 @@ ) from linopy.config import options from linopy.constants import ( + CV_DIM, EQUAL, FACTOR_DIM, GREATER_EQUAL, @@ -1466,6 +1467,94 @@ def to_polars(self) -> pl.DataFrame: check_has_nulls_polars(df, name=self.type) return df + def simplify(self) -> LinearExpression: + """ + Simplify the linear expression by combining terms with the same variable. + + This method finds all terms that reference the same variable and adds + their coefficients together, reducing the number of terms in the expression. + + Returns + ------- + LinearExpression + A new LinearExpression with combined terms. + + Examples + -------- + >>> from linopy import Model + >>> m = Model() + >>> x = m.add_variables(name="x") + >>> expr = 2 * x + 3 * x # Creates two terms + >>> simplified = expr.simplify() # Combines into one term: 5 * x + """ + + def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray: + """ + For a given combination of expression coordinates, try to simplify by reducing duplicate variables + """ + input_len = len(vars_row) + + # Filter out invalid entries + mask = (vars_row != -1) & (coeffs_row != 0) & ~np.isnan(coeffs_row) + valid_vars = vars_row[mask] + valid_coeffs = coeffs_row[mask] + + if len(valid_vars) == 0: + # Return arrays filled with -1 and 0.0, same length as input + return np.vstack( + [ + np.full(input_len, -1, dtype=float), + np.zeros(input_len, dtype=float), + ] + ) + + # Use bincount to sum coefficients for each variable ID efficiently + max_var = int(valid_vars.max()) + summed = np.bincount( + valid_vars, weights=valid_coeffs, minlength=max_var + 1 + ) + + # Get non-zero entries + unique_vars = np.where(summed != 0)[0] + unique_coeffs = summed[unique_vars] + + # Pad to match input length + result_vars = np.full(input_len, -1, dtype=float) + result_coeffs = np.zeros(input_len, dtype=float) + + n_unique = len(unique_vars) + result_vars[:n_unique] = unique_vars + result_coeffs[:n_unique] = unique_coeffs + + return np.vstack([result_vars, result_coeffs]) + + # Coeffs and vars have dimensions (.., TERM_DIM) where .. are the coordinate dimensions of the expression + # An operation is applied over the coordinate dimensions on both coeffs and vars, which are stacked together over a new "CV_DIM" dimension + combined: xr.DataArray = xr.apply_ufunc( + _simplify_row, + self.vars, + self.coeffs, + input_core_dims=[[TERM_DIM], [TERM_DIM]], + output_core_dims=[[CV_DIM, TERM_DIM]], + vectorize=True, + ) + # Combined has dimensions (.., CV_DIM, TERM_DIM) + + # Drop terms where all vars are -1 (i.e., empty terms across all coordinates) + vars = combined.isel({CV_DIM: 0}).astype(int) + non_empty_terms = (vars != -1).any(dim=[d for d in vars.dims if d != TERM_DIM]) + combined = combined.isel({TERM_DIM: non_empty_terms}) + + # Extract vars and coeffs from the combined result + vars = combined.isel({CV_DIM: 0}).astype(int) + coeffs = combined.isel({CV_DIM: 1}) + + # Create new dataset with simplified data + new_data = self.data.copy() + new_data = assign_multiindex_safe(new_data, vars=vars, coeffs=coeffs) + + return LinearExpression(new_data, self.model) + @classmethod def _from_scalarexpression_list( cls, diff --git a/test/test_linear_expression.py b/test/test_linear_expression.py index cf8eb4bb..a75ace3f 100644 --- a/test/test_linear_expression.py +++ b/test/test_linear_expression.py @@ -1228,3 +1228,88 @@ def test_cumsum(m: Model, multiple: float) -> None: expr = m.variables["x"] + m.variables["y"] cumsum = (multiple * expr).cumsum() cumsum.nterm == 2 + + +def test_simplify_basic(x: Variable) -> None: + """Test basic simplification with duplicate terms.""" + expr = 2 * x + 3 * x + 1 * x + simplified = expr.simplify() + assert simplified.nterm == 1, f"Expected 1 term, got {simplified.nterm}" + + x_len = len(x.coords["dim_0"]) + # Check that the coefficient is 6 (2 + 3 + 1) + coeffs: np.ndarray = simplified.coeffs.values + assert len(coeffs) == x_len, f"Expected {x_len} coefficients, got {len(coeffs)}" + assert all(coeffs == 6.0), f"Expected coefficient 6.0, got {coeffs[0]}" + + +def test_simplify_multiple_dimensions() -> None: + model = Model() + a_index = pd.Index([0, 1, 2, 3], name="a") + b_index = pd.Index([0, 1, 2], name="b") + coords = [a_index, b_index] + x = model.add_variables(name="x", coords=coords) + + expr = 2 * x + 3 * x + x + # Simplify + simplified = expr.simplify() + assert simplified.nterm == 1, f"Expected 1 term, got {simplified.nterm}" + assert simplified.ndim == 2, f"Expected 2 dimensions, got {simplified.ndim}" + assert all(simplified.coeffs.values.reshape(-1) == 6), ( + f"Expected coefficients of 6, got {simplified.coeffs.values}" + ) + + +def test_simplify_with_different_variables(x: Variable, y: Variable) -> None: + """Test that different variables are kept separate.""" + # Create expression: 2*x + 3*x + 4*y + expr = 2 * x + 3 * x + 4 * y + + # Simplify + simplified = expr.simplify() + # Should have 2 terms (one for x with coeff 5, one for y with coeff 4) + assert simplified.nterm == 2, f"Expected 2 terms, got {simplified.nterm}" + + coeffs: list[float] = simplified.coeffs.values.flatten().tolist() + assert set(coeffs) == {5.0, 4.0}, ( + f"Expected coefficients {{5.0, 4.0}}, got {set(coeffs)}" + ) + + +def test_simplify_with_constant(x: Variable) -> None: + """Test that constants are preserved.""" + expr = 2 * x + 3 * x + 10 + + # Simplify + simplified = expr.simplify() + + # Check constant is preserved + assert all(simplified.const.values == 10.0), ( + f"Expected constant 10.0, got {simplified.const.values}" + ) + + # Check coefficients + assert all(simplified.coeffs.values == 5.0), ( + f"Expected coefficient 5.0, got {simplified.coeffs.values}" + ) + + +def test_simplify_cancellation(x: Variable) -> None: + """Test that terms cancel out correctly when coefficients sum to zero.""" + expr = x - x + simplified = expr.simplify() + + assert simplified.nterm == 0, f"Expected 0 terms, got {simplified.nterm}" + assert simplified.coeffs.values.size == 0 + assert simplified.vars.values.size == 0 + + +def test_simplify_partial_cancellation(x: Variable, y: Variable) -> None: + """Test partial cancellation where some terms cancel but others remain.""" + expr = 2 * x - 2 * x + 3 * y + simplified = expr.simplify() + + assert simplified.nterm == 1, f"Expected 1 term, got {simplified.nterm}" + assert all(simplified.coeffs.values == 3.0), ( + f"Expected coefficient 3.0, got {simplified.coeffs.values}" + )