Skip to content

Commit 0ea5ed5

Browse files
fix: update consumption calculation, update energy_flows dataframe creation
Signed-off-by: Mohammad Tayyab <[email protected]>
1 parent f7e0f1e commit 0ea5ed5

File tree

3 files changed

+124
-49
lines changed

3 files changed

+124
-49
lines changed

RELEASE_NOTES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,7 @@
2020
- Published a locale-aware `ColumnMapper` utility that reads the YAML schema so notebooks can seamlessly move between raw API headers, canonical identifiers, and localized display labels.
2121

2222
## Bug Fixes
23+
- `frequenz.lib.notebooks.reporting.utils.helpers.add_energy_flows()` now infers consumption totals from existing data when explicit consumption columns are missing, preventing inconsistent outputs in notebook pipelines that only provide grid and production inputs.
24+
- `frequenz.lib.notebooks.reporting.metrics.consumption()` reindexes optional production/battery inputs and raises a warning when inferred consumption turns negative so sign-convention issues are surfaced immediately.
2325

2426
<!-- Here goes notable bug fixes that are worth a special mention or explanation -->

src/frequenz/lib/notebooks/reporting/metrics/reporting_metrics.py

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,11 @@ def production_excess_in_bat(
9696
production_excess_series = production_excess(
9797
production, consumption, production_is_positive=production_is_positive
9898
)
99-
battery = battery.astype("float64").clip(lower=0)
99+
battery = (
100+
battery.astype("float64")
101+
.reindex(production_excess_series.index, fill_value=0.0)
102+
.clip(lower=0)
103+
)
100104
return pd.concat([production_excess_series, battery], axis=1).min(axis=1)
101105

102106

@@ -199,44 +203,60 @@ def production_self_share(
199203

200204

201205
def consumption(
202-
df: pd.DataFrame, production_cols: list[str] | None, grid_cols: list[str]
206+
grid: pd.Series,
207+
production: pd.Series | None = None,
208+
battery: pd.Series | None = None,
203209
) -> pd.Series:
204-
"""Infer the consumption column from grid and production data if missing.
210+
"""Infer total consumption from grid import, on-site production, and battery power.
205211
206-
If a 'consumption' column is not present, it is computed as the total grid import
207-
(sum of all grid columns) minus total production. Safely handles missing or
208-
empty production columns by treating them as zero.
212+
Computes: consumption = grid_import - production (raw production-neg values)
213+
- battery (raw - with positive and negative values).
209214
210215
Args:
211-
df: Input DataFrame containing grid and optional production columns.
212-
production_cols: List of production column names (e.g., "pv", "chp", "battery" or "ev").
213-
Can be None or empty if no on-site generation is present.
214-
grid_cols: List of one or more grid column names.
216+
grid: Series of grid import values (e.g., kW or MW).
217+
production: Optional Series of on-site production values.
218+
If None, production is treated as zero.
219+
battery: Optional Series representing battery discharge/charge power.
220+
Positive values increase inferred consumption (battery discharge),
221+
while negative values decrease it (battery charging). If None, the
222+
battery contribution is treated as zero.
215223
216224
Returns:
217225
A Series representing inferred total consumption, named `"consumption"`.
218226
219227
Raises:
220-
ValueError: If `grid_cols` is empty.
228+
ValueError: If `grid` is None.
229+
230+
Warns:
231+
UserWarning: If negative inferred consumption values are detected,
232+
which may indicate times of net export or a sign-convention mismatch.
221233
"""
222-
if "consumption" in df.columns:
223-
return df["consumption"]
234+
if grid is None:
235+
raise ValueError("`grid` must be provided as a pandas Series.")
224236

225-
if not grid_cols:
226-
raise ValueError("At least one grid column must be specified in grid_cols.")
237+
grid_s = grid.astype("float64").fillna(0)
227238

228-
# Compute total grid import and total production
229-
grid_total = df[grid_cols].sum(axis=1)
239+
# Ensure raw production values are used (usually negative for production)
240+
if production is None:
241+
prod_s = pd.Series(0.0, index=grid_s.index)
242+
else:
243+
prod_s = production.astype("float64").reindex(grid_s.index, fill_value=0.0)
230244

231-
# Handle empty production columns safely
232-
if production_cols:
233-
production_total = df[production_cols].sum(axis=1)
245+
if battery is None:
246+
battery_s = pd.Series(0.0, index=grid_s.index)
234247
else:
235-
# No production → production_total = 0
236-
production_total = pd.Series(0, index=df.index)
248+
battery_s = battery.astype("float64")
249+
battery_s = battery_s.reindex(grid_s.index, fill_value=0.0)
237250

238-
# Compute inferred consumption (Series)
239-
consumption = grid_total - production_total
240-
consumption.name = "consumption"
251+
result = (grid_s - prod_s - battery_s).astype("float64")
252+
result.name = "consumption"
241253

242-
return consumption
254+
if (result < 0).any():
255+
warnings.warn(
256+
"Negative inferred consumption detected. This can occur during net export "
257+
"or due to a sign-convention mismatch between grid and production.",
258+
UserWarning,
259+
stacklevel=2,
260+
)
261+
262+
return result

src/frequenz/lib/notebooks/reporting/utils/helpers.py

Lines changed: 76 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import pandas as pd
2424

2525
from frequenz.lib.notebooks.reporting.metrics.reporting_metrics import (
26+
asset_production,
2627
grid_feed_in,
2728
production_excess,
2829
production_excess_in_bat,
@@ -43,11 +44,14 @@ def _get_numeric_series(df: pd.DataFrame, col: str | None) -> pd.Series:
4344
col: Column name to retrieve. If None or missing, zeros are returned.
4445
4546
Returns:
46-
A float64 Series with non-negative values, matching the input index.
47+
A float64 Series aligned to the input index.
4748
"""
4849
if col is None:
49-
return pd.Series(0.0, index=df.index, dtype="float64")
50-
return df.reindex(columns=[col], fill_value=0)[col].astype("float64").clip(lower=0)
50+
series = pd.Series(0.0, index=df.index, dtype="float64")
51+
else:
52+
raw = df.reindex(columns=[col], fill_value=0)[col]
53+
series = pd.to_numeric(raw, errors="coerce").fillna(0.0).astype("float64")
54+
return series
5155

5256

5357
def _sum_cols(df: pd.DataFrame, cols: list[str] | None) -> pd.Series:
@@ -67,32 +71,45 @@ def _sum_cols(df: pd.DataFrame, cols: list[str] | None) -> pd.Series:
6771
if not cols:
6872
return pd.Series(0.0, index=df.index, dtype="float64")
6973

70-
# Safely extract each column as a numeric, non-negative Series, then sum row-wise
74+
# Safely extract each column as a numeric Series then sum row-wise
7175
series_list = [_get_numeric_series(df, c) for c in cols]
7276
return pd.concat(series_list, axis=1).sum(axis=1).astype("float64")
7377

7478

75-
# pylint: disable=too-many-arguments, too-many-locals
79+
def _column_has_data(df: pd.DataFrame, col: str | None) -> bool:
80+
"""Return True when the column exists and has at least one non-zero value."""
81+
if col is None or col not in df.columns:
82+
return False
83+
84+
series = pd.to_numeric(df[col], errors="coerce").fillna(0.0).astype("float64")
85+
if series.empty or not series.notna().any():
86+
return False
87+
88+
return not series.fillna(0).eq(0).all()
89+
90+
91+
# pylint: disable=too-many-arguments, too-many-locals, too-many-positional-arguments
7692
def add_energy_flows(
7793
df: pd.DataFrame,
7894
production_cols: list[str] | None = None,
7995
consumption_cols: list[str] | None = None,
80-
battery_charge_col: str | None = None,
96+
battery_cols: list[str] | None = None,
8197
production_is_positive: bool = False,
8298
) -> pd.DataFrame:
8399
"""Compute and add derived energy flow metrics to the DataFrame.
84100
85101
This function aggregates production and consumption data, derives energy flow
86102
relationships such as grid feed-in, battery charging, and self-consumption,
87-
and appends these computed columns to the given DataFrame.
103+
and appends these computed columns to the given DataFrame. Columns that are
104+
specified but missing or contain only null/zero values are ignored.
88105
89106
Args:
90107
df: Input DataFrame containing production, consumption, and optionally
91-
battery charge data.
108+
battery power data.
92109
production_cols: list of column names representing production sources.
93110
consumption_cols: list of column names representing consumption sources.
94-
battery_charge_col: optional column name for battery charging power. If None,
95-
battery-related flows are set to zero.
111+
battery_cols: optional column names representing signed battery power.
112+
Positive values indicate charging, negative values indicate discharging.
96113
production_is_positive: Whether production values are already positive.
97114
If False, `production` is inverted before clipping.
98115
@@ -106,47 +123,83 @@ def add_energy_flows(
106123
"""
107124
df_flows = df.copy()
108125

109-
# Total production and consumption (returns pandas series with 0.0 for missing cols)
110-
df_flows["production_total"] = _sum_cols(df_flows, production_cols)
111-
df_flows["consumption_total"] = _sum_cols(df_flows, consumption_cols)
126+
# Normalize production, consumption and battery columns by removing None entries
127+
resolved_production_cols = [
128+
col for col in (production_cols or []) if _column_has_data(df_flows, col)
129+
]
130+
resolved_consumption_cols = [
131+
col for col in (consumption_cols or []) if _column_has_data(df_flows, col)
132+
]
133+
resolved_battery_cols = [
134+
col for col in (battery_cols or []) if _column_has_data(df_flows, col)
135+
]
136+
137+
battery_power_series = _sum_cols(df_flows, resolved_battery_cols)
138+
battery_charge_series = (
139+
battery_power_series.reindex(df_flows.index).fillna(0.0).clip(lower=0.0)
140+
)
141+
142+
# Compute total asset production
143+
asset_production_cols: list[str] = []
144+
for col in resolved_production_cols:
145+
series = _get_numeric_series(
146+
df_flows,
147+
col,
148+
)
149+
asset_series = asset_production(
150+
series,
151+
production_is_positive=production_is_positive,
152+
)
153+
asset_col_name = f"{col}_asset_production"
154+
df_flows[asset_col_name] = asset_series
155+
asset_production_cols.append(asset_col_name)
156+
157+
df_flows["production_total"] = _sum_cols(df_flows, asset_production_cols)
112158

113-
# Surplus vs. consumption
159+
# Compute total consumption
160+
consumption_series_cols: list[str] = []
161+
for col in resolved_consumption_cols:
162+
df_flows[col] = _get_numeric_series(df_flows, col)
163+
consumption_series_cols.append(col)
164+
165+
df_flows["consumption_total"] = _sum_cols(df_flows, consumption_series_cols)
166+
167+
# Surplus vs. consumption (production is already positive because of the above cleaning)
114168
df_flows["production_excess"] = production_excess(
115169
df_flows["production_total"],
116170
df_flows["consumption_total"],
117-
production_is_positive=production_is_positive,
171+
production_is_positive=True,
118172
)
119173

120174
# Battery charging power (optional)
121-
bat_in = _get_numeric_series(df_flows, battery_charge_col)
122175
df_flows["production_excess_in_bat"] = production_excess_in_bat(
123176
df_flows["production_total"],
124177
df_flows["consumption_total"],
125-
bat_in,
126-
production_is_positive=production_is_positive,
178+
battery=battery_charge_series,
179+
production_is_positive=True,
127180
)
128181

129182
# Split excess into battery vs. grid
130183
df_flows["grid_feed_in"] = grid_feed_in(
131184
df_flows["production_total"],
132185
df_flows["consumption_total"],
133-
bat_in,
134-
production_is_positive=production_is_positive,
186+
battery=battery_charge_series,
187+
production_is_positive=True,
135188
)
136189

137190
# If no production columns exist, set self-consumption metrics to zero
138-
if production_cols:
191+
if asset_production_cols:
139192
# Use total production for self-consumption instead of asset_production
140193
# (which may not exist)
141194
df_flows["production_self_use"] = production_self_consumption(
142195
df_flows["production_total"],
143196
df_flows["consumption_total"],
144-
production_is_positive=production_is_positive,
197+
production_is_positive=True,
145198
)
146199
df_flows["production_self_share"] = production_self_share(
147200
df_flows["production_total"],
148201
df_flows["consumption_total"],
149-
production_is_positive=production_is_positive,
202+
production_is_positive=True,
150203
)
151204
else:
152205
df_flows["production_self_use"] = 0.0

0 commit comments

Comments
 (0)