Skip to content

Commit bc99cfb

Browse files
authored
Xarray patch: Check array dimensions (#647)
* Check array dimensions after multiplication * Fix tests in costs module * Fix objectives tests * Fix demand_share tests * Fix quantities tests, delete gross_margin * Fix most remaining tests * Remove some unnecessary casting * Doctest, hardcode "asset" * Better docstring * Fix remaining test (?) * Error message and better docstring * Fix doctest
1 parent f39f640 commit bc99cfb

File tree

10 files changed

+221
-301
lines changed

10 files changed

+221
-301
lines changed

src/muse/__main__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,22 @@ def patched_broadcast_compat_data(self, other):
102102
self_data = self.data
103103
other_data = other
104104
dims = self.dims
105+
106+
# Check output dimensions
107+
if "asset" in dims and any(
108+
dim in dims for dim in ["region", "technology", "installed"]
109+
):
110+
raise ValueError(
111+
"DataArrays with an 'asset' dimension cannot be broadcasted along "
112+
"'region', 'technology', or 'installed' dimensions. "
113+
"This error is usually raised when attempting to combine asset-level data "
114+
"(e.g. a capacity dataset with an 'asset' dimension) with a fully explicit "
115+
"technology dataset (e.g. a technology dataset with 'region' and "
116+
"'technology' dimensions). "
117+
"Please use `broadcast_techs` on the latter object before performing this "
118+
"operation."
119+
)
120+
105121
return self_data, other_data, dims
106122

107123

src/muse/objectives.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def decorated_objective(
179179
demand, ["asset", "timeslice", "commodity"], optional=["region"]
180180
)
181181
check_dimensions(
182-
technologies, ["replacement", "commodity"], optional=["timeslice"]
182+
technologies, ["replacement", "commodity"], optional=["timeslice", "asset"]
183183
)
184184

185185
# Calculate objective

src/muse/quantities.py

Lines changed: 6 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99

1010
from __future__ import annotations
1111

12-
from typing import cast
13-
1412
import numpy as np
1513
import xarray as xr
1614

@@ -143,98 +141,15 @@ def emission(
143141
from muse.utilities import broadcast_techs
144142

145143
# just in case we are passed a technologies dataset, like in other functions
146-
fouts = broadcast_techs(
147-
getattr(fixed_outputs, "fixed_outputs", fixed_outputs), production
148-
)
144+
fixed_outputs = getattr(fixed_outputs, "fixed_outputs", fixed_outputs)
145+
fouts = broadcast_techs(fixed_outputs, production)
149146
envs = is_pollutant(fouts.comm_usage)
150147
enduses = is_enduse(fouts.comm_usage)
151148
return production.sel(commodity=enduses).sum("commodity") * broadcast_timeslice(
152149
fouts.sel(commodity=envs), level=timeslice_level
153150
)
154151

155152

156-
def gross_margin(
157-
technologies: xr.Dataset,
158-
capacity: xr.DataArray,
159-
prices: xr.Dataset,
160-
timeslice_level: str | None = None,
161-
) -> xr.DataArray:
162-
"""The percentage of revenue after direct expenses have been subtracted.
163-
164-
.. _reference:
165-
https://www.investopedia.com/terms/g/grossmargin.asp
166-
We first calculate the revenues, which depend on prices
167-
We then deduct the direct expenses
168-
- energy commodities INPUTS are related to fuel costs
169-
- environmental commodities OUTPUTS are related to environmental costs
170-
- variable costs is given as technodata inputs
171-
- non-environmental commodities OUTPUTS are related to revenues.
172-
"""
173-
from muse.commodities import is_enduse, is_pollutant
174-
from muse.utilities import broadcast_techs
175-
176-
tech = broadcast_techs( # type: ignore
177-
cast(
178-
xr.Dataset,
179-
technologies[
180-
[
181-
"technical_life",
182-
"interest_rate",
183-
"var_par",
184-
"var_exp",
185-
"fixed_outputs",
186-
"fixed_inputs",
187-
]
188-
],
189-
),
190-
capacity,
191-
)
192-
193-
var_par = tech.var_par
194-
var_exp = tech.var_exp
195-
fixed_outputs = tech.fixed_outputs
196-
fixed_inputs = tech.fixed_inputs
197-
# We separate the case where we have one or more regions
198-
caparegions = np.array(capacity.region.values).reshape(-1)
199-
if len(caparegions) > 1:
200-
prices.sel(region=capacity.region)
201-
else:
202-
prices = prices.where(prices.region == capacity.region, drop=True)
203-
prices = prices.interp(year=capacity.year.values)
204-
205-
# Filters for pollutants and output commodities
206-
environmentals = is_pollutant(technologies.comm_usage)
207-
enduses = is_enduse(technologies.comm_usage)
208-
209-
# Variable costs depend on factors such as labour
210-
variable_costs = distribute_timeslice(
211-
var_par * ((fixed_outputs.sel(commodity=enduses)).sum("commodity")) ** var_exp,
212-
level=timeslice_level,
213-
)
214-
215-
# The individual prices are selected
216-
# costs due to consumables, direct inputs
217-
consumption_costs = (
218-
prices * distribute_timeslice(fixed_inputs, level=timeslice_level)
219-
).sum("commodity")
220-
# costs due to pollutants
221-
production_costs = prices * distribute_timeslice(
222-
fixed_outputs, level=timeslice_level
223-
)
224-
environmental_costs = (production_costs.sel(commodity=environmentals)).sum(
225-
"commodity"
226-
)
227-
# revenues due to product sales
228-
revenues = (production_costs.sel(commodity=enduses)).sum("commodity")
229-
230-
# Gross margin is the net between revenues and all costs
231-
result = revenues - environmental_costs - variable_costs - consumption_costs
232-
233-
# Gross margin is defined as a ratio on revenues and as a percentage
234-
result *= 100 / revenues
235-
return result
236-
237-
238153
def consumption(
239154
technologies: xr.Dataset,
240155
production: xr.DataArray,
@@ -352,8 +267,8 @@ def maximum_production(
352267
capa = filter_input(
353268
capacity, **{k: v for k, v in filters.items() if k in capacity.dims}
354269
)
355-
btechs = broadcast_techs( # type: ignore
356-
cast(xr.Dataset, technologies[["fixed_outputs", "utilization_factor"]]), capa
270+
btechs = broadcast_techs(
271+
technologies[["fixed_outputs", "utilization_factor"]], capa
357272
)
358273
ftechs = filter_input(
359274
btechs, **{k: v for k, v in filters.items() if k in btechs.dims}
@@ -470,12 +385,8 @@ def minimum_production(
470385
if "minimum_service_factor" not in technologies:
471386
return broadcast_timeslice(xr.zeros_like(capa), level=timeslice_level)
472387

473-
btechs = broadcast_techs( # type: ignore
474-
cast(
475-
xr.Dataset,
476-
technologies[["fixed_outputs", "minimum_service_factor"]],
477-
),
478-
capa,
388+
btechs = broadcast_techs(
389+
technologies[["fixed_outputs", "minimum_service_factor"]], capa
479390
)
480391
ftechs = filter_input(
481392
btechs, **{k: v for k, v in filters.items() if k in btechs.dims}

src/muse/sectors/sector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ def market_variables(self, market: xr.Dataset, technologies: xr.Dataset) -> Any:
314314

315315
# Calculate LCOE
316316
# We select data for the second year, which corresponds to the investment year
317-
technodata = cast(xr.Dataset, broadcast_techs(technologies, supply))
317+
technodata = broadcast_techs(technologies, supply)
318318
lcoe = levelized_cost_of_energy(
319319
prices=market.prices.sel(region=supply.region).isel(year=1),
320320
technologies=technodata,

src/muse/utilities.py

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,6 @@ def operation(x):
182182
def broadcast_techs(
183183
technologies: xr.Dataset | xr.DataArray,
184184
template: xr.DataArray | xr.Dataset,
185-
dimension: str = "asset",
186185
interpolation: str = "linear",
187186
installed_as_year: bool = True,
188187
**kwargs,
@@ -204,18 +203,57 @@ def broadcast_techs(
204203
Arguments:
205204
technologies: The dataset to broadcast
206205
template: the dataset or data-array to use as a template
207-
dimension: the name of the dimensiom from `template` over which to
208-
broadcast
209206
interpolation: interpolation method used across `year`
210207
installed_as_year: if the coordinate `installed` exists, then it is
211208
applied to the `year` dimension of the technologies dataset
212209
kwargs: further arguments are used initial filters over the
213210
`technologies` dataset.
211+
212+
Example:
213+
Define the technology array:
214+
>>> import xarray as xr
215+
>>> technologies = xr.DataArray(
216+
... data=[[1, 2, 3], [4, 5, 6]],
217+
... dims=['technology', 'region'],
218+
... coords={'technology': ['gasboiler', 'heatpump'],
219+
... 'region': ['R1', 'R2', 'R3']},
220+
... )
221+
222+
This array contains a value for every combination of technology and region (e.g.
223+
this could refer to the efficiency of each technology in each region).
224+
225+
Define the assets template:
226+
>>> assets = xr.DataArray(
227+
... data=[10, 50],
228+
... dims=["asset"],
229+
... coords={
230+
... "region": (["asset"], ["R1", "R2"]),
231+
... "technology": (["asset"], ["gasboiler", "heatpump"])},
232+
... )
233+
234+
We have two assets: a gas boiler in region R1 and a heat pump in region R2. In
235+
this case the values don't matter, but could correspond to the installed
236+
capacity of each asset, for example.
237+
238+
We want to select the values from the technology array that correspond to each
239+
asset in the template. To do this, we perform `broadcast_techs` on
240+
`technologies` using `assets` as a template:
241+
>>> broadcast_techs(technologies, assets)
242+
<xarray.DataArray (asset: 2)> Size: 16B
243+
array([1, 5])
244+
Coordinates:
245+
technology (asset) <U9 72B 'gasboiler' 'heatpump'
246+
region (asset) <U2 16B 'R1' 'R2'
247+
Dimensions without coordinates: asset
248+
249+
The output array has the same shape as the assets template. Each value in the
250+
output is the value in the original technology array that matches the
251+
technology & region of each asset.
214252
"""
215253
# this assert will trigger if 'year' is changed to 'installed' in
216254
# technologies, because then this function should be modified.
217255
assert "installed" not in technologies.dims
218-
names = [u for u in template.coords if template[u].dims == (dimension,)]
256+
names = [u for u in template.coords if template[u].dims == ("asset",)]
219257
# the first selection reduces the size of technologies without affecting the
220258
# dimensions.
221259
first_sel = {
@@ -293,7 +331,6 @@ def filter_input(
293331
def filter_with_template(
294332
data: xr.Dataset | xr.DataArray,
295333
template: xr.DataArray | xr.Dataset,
296-
asset_dimension: str = "asset",
297334
**kwargs,
298335
):
299336
"""Filters data to match template.
@@ -313,8 +350,8 @@ def filter_with_template(
313350
Returns:
314351
`data` transformed to match the form of `template`
315352
"""
316-
if asset_dimension in template.dims:
317-
return broadcast_techs(data, template, dimension=asset_dimension, **kwargs)
353+
if "asset" in template.dims:
354+
return broadcast_techs(data, template)
318355

319356
match_indices = set(data.dims).intersection(template.dims) - set(kwargs)
320357
match = {d: template[d].isin(data[d]).values for d in match_indices if d != "year"}

0 commit comments

Comments
 (0)