Skip to content

Commit f8196f0

Browse files
authored
Merge branch 'main' into string-dtype
2 parents 8848129 + 49ec516 commit f8196f0

File tree

18 files changed

+911
-58
lines changed

18 files changed

+911
-58
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ New Features
3636
of the same name are compared for potential conflicts when performing binary operations. The default for it is
3737
``arithmetic_compat='minimal'`` which matches the existing behaviour.
3838
By `Matthew Willson <https://github.com/mjwillson>`_.
39+
- Better ordering of coordinates when displaying Xarray objects. (:pull:`11098`).
40+
By `Ian Hunt-Isaak <https://github.com/ianhi>`_, `Julia Signell <https://github.com/jsignell>`_.
3941

4042
Breaking Changes
4143
~~~~~~~~~~~~~~~~

pixi.toml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,13 @@ cytoolz = "*"
286286
[feature.release.tasks]
287287
release-contributors = "python ci/release_contributors.py"
288288

289+
[feature.dev.dependencies]
290+
ipython = ">=9.8.0,<10"
291+
black = ">=25.1.0,<26"
292+
293+
[feature.dev.pypi-dependencies]
294+
pytest-accept = ">=0.2.2, <0.3"
295+
289296
[feature.policy.pypi-dependencies]
290297
xarray-minimum-dependency-policy = "*"
291298

@@ -422,4 +429,15 @@ doc = { features = [
422429
] }
423430
pre-commit = { features = ["pre-commit"], no-default-feature = true }
424431
release = { features = ["release"], no-default-feature = true }
432+
default = { features = [
433+
"py313",
434+
"test",
435+
"backends",
436+
"accel",
437+
"numba",
438+
"dask",
439+
"viz",
440+
"extras",
441+
"dev",
442+
] }
425443
policy = { features = ["policy"], no-default-feature = true }

xarray/backends/zarr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ def extract_zarr_variable_encoding(
491491
# The only change is to raise an error for object dtypes.
492492
def encode_zarr_variable(var, needs_copy=True, name=None):
493493
"""
494-
Converts an Variable into an Variable which follows some
494+
Converts a Variable into another Variable which follows some
495495
of the CF conventions:
496496
497497
- Nans are masked using _FillValue (or the deprecated missing_value)

xarray/compat/npcompat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def isdtype( # type: ignore[misc]
6464
)
6565
if unknown_kinds := {k for k in str_kinds if k not in kind_mapping}:
6666
raise ValueError(
67-
f"unknown kind: {unknown_kinds}, must be a np.dtype or one of {list(kind_mapping)}"
67+
f"unknown kind: {unknown_kinds}, must be an np.dtype or one of {list(kind_mapping)}"
6868
)
6969

7070
# verified the dtypes already, no need to check again

xarray/computation/rolling.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ def __init__(
279279
Object to window.
280280
windows : mapping of hashable to int
281281
A mapping from the name of the dimension to create the rolling
282-
exponential window along (e.g. `time`) to the size of the moving window.
282+
window along (e.g. `time`) to the size of the moving window.
283283
min_periods : int, default: None
284284
Minimum number of observations in window required to have a value
285285
(otherwise result is NA). The default, None, is equivalent to
@@ -791,7 +791,7 @@ def __init__(
791791
Object to window.
792792
windows : mapping of hashable to int
793793
A mapping from the name of the dimension to create the rolling
794-
exponential window along (e.g. `time`) to the size of the moving window.
794+
window along (e.g. `time`) to the size of the moving window.
795795
min_periods : int, default: None
796796
Minimum number of observations in window required to have a value
797797
(otherwise result is NA). The default, None, is equivalent to
@@ -1061,7 +1061,7 @@ def __init__(
10611061
Object to window.
10621062
windows : mapping of hashable to int
10631063
A mapping from the name of the dimension to create the rolling
1064-
exponential window along (e.g. `time`) to the size of the moving window.
1064+
window along (e.g. `time`) to the size of the moving window.
10651065
boundary : {"exact", "trim", "pad"}
10661066
If 'exact', a ValueError will be raised if dimension size is not a
10671067
multiple of window size. If 'trim', the excess indexes are trimmed.

xarray/core/common.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -619,9 +619,9 @@ def assign_coords(
619619
<xarray.Dataset> Size: 360B
620620
Dimensions: (x: 2, y: 2, time: 4)
621621
Coordinates:
622-
* time (time) datetime64[ns] 32B 2014-09-06 ... 2014-09-09
623622
lon (x, y) float64 32B 260.2 260.7 260.2 260.8
624623
lat (x, y) float64 32B 42.25 42.21 42.63 42.59
624+
* time (time) datetime64[ns] 32B 2014-09-06 ... 2014-09-09
625625
reference_time datetime64[ns] 8B 2014-09-05
626626
Dimensions without coordinates: x, y
627627
Data variables:
@@ -633,9 +633,9 @@ def assign_coords(
633633
<xarray.Dataset> Size: 360B
634634
Dimensions: (x: 2, y: 2, time: 4)
635635
Coordinates:
636-
* time (time) datetime64[ns] 32B 2014-09-06 ... 2014-09-09
637636
lon (x, y) float64 32B -99.83 -99.32 -99.79 -99.23
638637
lat (x, y) float64 32B 42.25 42.21 42.63 42.59
638+
* time (time) datetime64[ns] 32B 2014-09-06 ... 2014-09-09
639639
reference_time datetime64[ns] 8B 2014-09-05
640640
Dimensions without coordinates: x, y
641641
Data variables:

xarray/core/dataarray.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -369,9 +369,9 @@ class DataArray(
369369
[[22.60070734, 13.78914233, 14.17424919],
370370
[18.28478802, 16.15234857, 26.63418806]]])
371371
Coordinates:
372-
* time (time) datetime64[ns] 24B 2014-09-06 2014-09-07 2014-09-08
373372
lon (x, y) float64 32B -99.83 -99.32 -99.79 -99.23
374373
lat (x, y) float64 32B 42.25 42.21 42.63 42.59
374+
* time (time) datetime64[ns] 24B 2014-09-06 2014-09-07 2014-09-08
375375
reference_time datetime64[ns] 8B 2014-09-05
376376
Dimensions without coordinates: x, y
377377
Attributes:
@@ -2807,8 +2807,8 @@ def set_index(
28072807
[1., 1., 1.]])
28082808
Coordinates:
28092809
* x (x) int64 16B 0 1
2810-
* y (y) int64 24B 0 1 2
28112810
a (x) int64 16B 3 4
2811+
* y (y) int64 24B 0 1 2
28122812
>>> arr.set_index(x="a")
28132813
<xarray.DataArray (x: 2, y: 3)> Size: 48B
28142814
array([[1., 1., 1.],
@@ -5964,8 +5964,8 @@ def pad(
59645964
[nan, nan, nan, nan]])
59655965
Coordinates:
59665966
* x (x) float64 32B nan 0.0 1.0 nan
5967-
* y (y) int64 32B 10 20 30 40
59685967
z (x) float64 32B nan 100.0 200.0 nan
5968+
* y (y) int64 32B 10 20 30 40
59695969
59705970
Careful, ``constant_values`` are coerced to the data type of the array which may
59715971
lead to a loss of precision:
@@ -5978,8 +5978,8 @@ def pad(
59785978
[ 1, 1, 1, 1]])
59795979
Coordinates:
59805980
* x (x) float64 32B nan 0.0 1.0 nan
5981-
* y (y) int64 32B 10 20 30 40
59825981
z (x) float64 32B nan 100.0 200.0 nan
5982+
* y (y) int64 32B 10 20 30 40
59835983
"""
59845984
ds = self._to_temp_dataset().pad(
59855985
pad_width=pad_width,

xarray/core/dataset.py

Lines changed: 98 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from __future__ import annotations
22

3+
import ast
34
import asyncio
5+
import builtins
46
import copy
57
import datetime
68
import io
@@ -51,6 +53,11 @@
5153
from xarray.core.dataset_utils import _get_virtual_variable, _LocIndexer
5254
from xarray.core.dataset_variables import DataVariables
5355
from xarray.core.duck_array_ops import datetime_to_numeric
56+
from xarray.core.eval import (
57+
EVAL_BUILTINS,
58+
LogicalOperatorTransformer,
59+
validate_expression,
60+
)
5461
from xarray.core.indexes import (
5562
Index,
5663
Indexes,
@@ -72,7 +79,6 @@
7279
Self,
7380
T_ChunkDim,
7481
T_ChunksFreq,
75-
T_DataArray,
7682
T_DataArrayOrSet,
7783
ZarrWriteModes,
7884
)
@@ -316,10 +322,10 @@ class Dataset(
316322
<xarray.Dataset> Size: 552B
317323
Dimensions: (loc: 2, instrument: 3, time: 4)
318324
Coordinates:
319-
* instrument (instrument) <U8 96B 'manufac1' 'manufac2' 'manufac3'
320-
* time (time) datetime64[ns] 32B 2014-09-06 ... 2014-09-09
321325
lon (loc) float64 16B -99.83 -99.32
322326
lat (loc) float64 16B 42.25 42.21
327+
* instrument (instrument) <U8 96B 'manufac1' 'manufac2' 'manufac3'
328+
* time (time) datetime64[ns] 32B 2014-09-06 ... 2014-09-09
323329
reference_time datetime64[ns] 8B 2014-09-05
324330
Dimensions without coordinates: loc
325331
Data variables:
@@ -4727,8 +4733,8 @@ def set_index(
47274733
Dimensions: (x: 2, y: 3)
47284734
Coordinates:
47294735
* x (x) int64 16B 0 1
4730-
* y (y) int64 24B 0 1 2
47314736
a (x) int64 16B 3 4
4737+
* y (y) int64 24B 0 1 2
47324738
Data variables:
47334739
v (x, y) float64 48B 1.0 1.0 1.0 1.0 1.0 1.0
47344740
>>> ds.set_index(x="a")
@@ -8707,9 +8713,9 @@ def filter_by_attrs(self, **kwargs) -> Self:
87078713
<xarray.Dataset> Size: 192B
87088714
Dimensions: (x: 2, y: 2, time: 3)
87098715
Coordinates:
8710-
* time (time) datetime64[ns] 24B 2014-09-06 2014-09-07 2014-09-08
87118716
lon (x, y) float64 32B -99.83 -99.32 -99.79 -99.23
87128717
lat (x, y) float64 32B 42.25 42.21 42.63 42.59
8718+
* time (time) datetime64[ns] 24B 2014-09-06 2014-09-07 2014-09-08
87138719
reference_time datetime64[ns] 8B 2014-09-05
87148720
Dimensions without coordinates: x, y
87158721
Data variables:
@@ -8722,9 +8728,9 @@ def filter_by_attrs(self, **kwargs) -> Self:
87228728
<xarray.Dataset> Size: 288B
87238729
Dimensions: (x: 2, y: 2, time: 3)
87248730
Coordinates:
8725-
* time (time) datetime64[ns] 24B 2014-09-06 2014-09-07 2014-09-08
87268731
lon (x, y) float64 32B -99.83 -99.32 -99.79 -99.23
87278732
lat (x, y) float64 32B 42.25 42.21 42.63 42.59
8733+
* time (time) datetime64[ns] 24B 2014-09-06 2014-09-07 2014-09-08
87288734
reference_time datetime64[ns] 8B 2014-09-05
87298735
Dimensions without coordinates: x, y
87308736
Data variables:
@@ -9533,19 +9539,48 @@ def argmax(self, dim: Hashable | None = None, **kwargs) -> Self:
95339539
"Dataset.argmin() with a sequence or ... for dim"
95349540
)
95359541

9542+
def _eval_expression(self, expr: str) -> DataArray:
9543+
"""Evaluate an expression string using xarray's native operations."""
9544+
try:
9545+
tree = ast.parse(expr, mode="eval")
9546+
except SyntaxError as e:
9547+
raise ValueError(f"Invalid expression syntax: {expr}") from e
9548+
9549+
# Transform logical operators for consistency with query().
9550+
# See LogicalOperatorTransformer docstring for details.
9551+
tree = LogicalOperatorTransformer().visit(tree)
9552+
ast.fix_missing_locations(tree)
9553+
9554+
validate_expression(tree)
9555+
9556+
# Build namespace: data variables, coordinates, modules, and safe builtins.
9557+
# Empty __builtins__ blocks dangerous functions like __import__, exec, open.
9558+
# Priority order (highest to lowest): data variables > coordinates > modules > builtins
9559+
# This ensures user data always wins when names collide with builtins.
9560+
import xarray as xr # Lazy import to avoid circular dependency
9561+
9562+
namespace: dict[str, Any] = dict(EVAL_BUILTINS)
9563+
namespace.update({"np": np, "pd": pd, "xr": xr})
9564+
namespace.update({str(name): self.coords[name] for name in self.coords})
9565+
namespace.update({str(name): self[name] for name in self.data_vars})
9566+
9567+
code = compile(tree, "<xarray.eval>", "eval")
9568+
return builtins.eval(code, {"__builtins__": {}}, namespace)
9569+
95369570
def eval(
95379571
self,
95389572
statement: str,
95399573
*,
9540-
parser: QueryParserOptions = "pandas",
9541-
) -> Self | T_DataArray:
9574+
parser: QueryParserOptions | Default = _default,
9575+
) -> Self | DataArray:
95429576
"""
95439577
Calculate an expression supplied as a string in the context of the dataset.
95449578
95459579
This is currently experimental; the API may change particularly around
95469580
assignments, which currently return a ``Dataset`` with the additional variable.
9547-
Currently only the ``python`` engine is supported, which has the same
9548-
performance as executing in python.
9581+
9582+
Logical operators (``and``, ``or``, ``not``) are automatically transformed
9583+
to bitwise operators (``&``, ``|``, ``~``) which work element-wise on arrays.
95499584
95509585
Parameters
95519586
----------
@@ -9555,7 +9590,11 @@ def eval(
95559590
Returns
95569591
-------
95579592
result : Dataset or DataArray, depending on whether ``statement`` contains an
9558-
assignment.
9593+
assignment.
9594+
9595+
Warning
9596+
-------
9597+
Like ``pd.eval()``, this method should not be used with untrusted input.
95599598
95609599
Examples
95619600
--------
@@ -9584,16 +9623,55 @@ def eval(
95849623
b (x) float64 40B 0.0 0.25 0.5 0.75 1.0
95859624
c (x) float64 40B 0.0 1.25 2.5 3.75 5.0
95869625
"""
9626+
if parser is not _default:
9627+
emit_user_level_warning(
9628+
"The 'parser' argument to Dataset.eval() is deprecated and will be "
9629+
"removed in a future version. Logical operators (and/or/not) are now "
9630+
"always transformed to bitwise operators (&/|/~) for array compatibility.",
9631+
FutureWarning,
9632+
)
95879633

9588-
return pd.eval( # type: ignore[return-value]
9589-
statement,
9590-
resolvers=[self],
9591-
target=self,
9592-
parser=parser,
9593-
# Because numexpr returns a numpy array, using that engine results in
9594-
# different behavior. We'd be very open to a contribution handling this.
9595-
engine="python",
9596-
)
9634+
statement = statement.strip()
9635+
9636+
# Check for assignment: "target = expr"
9637+
# Must handle compound operators like ==, !=, <=, >=
9638+
# Use ast to detect assignment properly
9639+
try:
9640+
tree = ast.parse(statement, mode="exec")
9641+
except SyntaxError as e:
9642+
raise ValueError(f"Invalid statement syntax: {statement}") from e
9643+
9644+
if len(tree.body) != 1:
9645+
raise ValueError("Only single statements are supported")
9646+
9647+
stmt = tree.body[0]
9648+
9649+
if isinstance(stmt, ast.Assign):
9650+
# Assignment: "c = a + b"
9651+
if len(stmt.targets) != 1:
9652+
raise ValueError("Only single assignment targets are supported")
9653+
target = stmt.targets[0]
9654+
if not isinstance(target, ast.Name):
9655+
raise ValueError(
9656+
f"Assignment target must be a simple name, got {type(target).__name__}"
9657+
)
9658+
target_name = target.id
9659+
9660+
# Get the expression source
9661+
expr_source = ast.unparse(stmt.value)
9662+
result: DataArray = self._eval_expression(expr_source)
9663+
return self.assign({target_name: result})
9664+
9665+
elif isinstance(stmt, ast.Expr):
9666+
# Expression: "a + b"
9667+
expr_source = ast.unparse(stmt.value)
9668+
return self._eval_expression(expr_source)
9669+
9670+
else:
9671+
raise ValueError(
9672+
f"Unsupported statement type: {type(stmt).__name__}. "
9673+
f"Only expressions and assignments are supported."
9674+
)
95979675

95989676
def query(
95999677
self,

0 commit comments

Comments
 (0)