11from __future__ import annotations
22
3+ import ast
34import asyncio
5+ import builtins
46import copy
57import datetime
68import io
5153from xarray .core .dataset_utils import _get_virtual_variable , _LocIndexer
5254from xarray .core .dataset_variables import DataVariables
5355from xarray .core .duck_array_ops import datetime_to_numeric
56+ from xarray .core .eval import (
57+ EVAL_BUILTINS ,
58+ LogicalOperatorTransformer ,
59+ validate_expression ,
60+ )
5461from xarray .core .indexes import (
5562 Index ,
5663 Indexes ,
7279 Self ,
7380 T_ChunkDim ,
7481 T_ChunksFreq ,
75- T_DataArray ,
7682 T_DataArrayOrSet ,
7783 ZarrWriteModes ,
7884)
@@ -9533,19 +9539,48 @@ def argmax(self, dim: Hashable | None = None, **kwargs) -> Self:
95339539 "Dataset.argmin() with a sequence or ... for dim"
95349540 )
95359541
9542+ def _eval_expression (self , expr : str ) -> DataArray :
9543+ """Evaluate an expression string using xarray's native operations."""
9544+ try :
9545+ tree = ast .parse (expr , mode = "eval" )
9546+ except SyntaxError as e :
9547+ raise ValueError (f"Invalid expression syntax: { expr } " ) from e
9548+
9549+ # Transform logical operators for consistency with query().
9550+ # See LogicalOperatorTransformer docstring for details.
9551+ tree = LogicalOperatorTransformer ().visit (tree )
9552+ ast .fix_missing_locations (tree )
9553+
9554+ validate_expression (tree )
9555+
9556+ # Build namespace: data variables, coordinates, modules, and safe builtins.
9557+ # Empty __builtins__ blocks dangerous functions like __import__, exec, open.
9558+ # Priority order (highest to lowest): data variables > coordinates > modules > builtins
9559+ # This ensures user data always wins when names collide with builtins.
9560+ import xarray as xr # Lazy import to avoid circular dependency
9561+
9562+ namespace : dict [str , Any ] = dict (EVAL_BUILTINS )
9563+ namespace .update ({"np" : np , "pd" : pd , "xr" : xr })
9564+ namespace .update ({str (name ): self .coords [name ] for name in self .coords })
9565+ namespace .update ({str (name ): self [name ] for name in self .data_vars })
9566+
9567+ code = compile (tree , "<xarray.eval>" , "eval" )
9568+ return builtins .eval (code , {"__builtins__" : {}}, namespace )
9569+
95369570 def eval (
95379571 self ,
95389572 statement : str ,
95399573 * ,
9540- parser : QueryParserOptions = "pandas" ,
9541- ) -> Self | T_DataArray :
9574+ parser : QueryParserOptions | Default = _default ,
9575+ ) -> Self | DataArray :
95429576 """
95439577 Calculate an expression supplied as a string in the context of the dataset.
95449578
95459579 This is currently experimental; the API may change particularly around
95469580 assignments, which currently return a ``Dataset`` with the additional variable.
9547- Currently only the ``python`` engine is supported, which has the same
9548- performance as executing in python.
9581+
9582+ Logical operators (``and``, ``or``, ``not``) are automatically transformed
9583+ to bitwise operators (``&``, ``|``, ``~``) which work element-wise on arrays.
95499584
95509585 Parameters
95519586 ----------
@@ -9555,7 +9590,11 @@ def eval(
95559590 Returns
95569591 -------
95579592 result : Dataset or DataArray, depending on whether ``statement`` contains an
9558- assignment.
9593+ assignment.
9594+
9595+ Warning
9596+ -------
9597+ Like ``pd.eval()``, this method should not be used with untrusted input.
95599598
95609599 Examples
95619600 --------
@@ -9584,16 +9623,55 @@ def eval(
95849623 b (x) float64 40B 0.0 0.25 0.5 0.75 1.0
95859624 c (x) float64 40B 0.0 1.25 2.5 3.75 5.0
95869625 """
9626+ if parser is not _default :
9627+ emit_user_level_warning (
9628+ "The 'parser' argument to Dataset.eval() is deprecated and will be "
9629+ "removed in a future version. Logical operators (and/or/not) are now "
9630+ "always transformed to bitwise operators (&/|/~) for array compatibility." ,
9631+ FutureWarning ,
9632+ )
95879633
9588- return pd .eval ( # type: ignore[return-value]
9589- statement ,
9590- resolvers = [self ],
9591- target = self ,
9592- parser = parser ,
9593- # Because numexpr returns a numpy array, using that engine results in
9594- # different behavior. We'd be very open to a contribution handling this.
9595- engine = "python" ,
9596- )
9634+ statement = statement .strip ()
9635+
9636+ # Check for assignment: "target = expr"
9637+ # Must handle compound operators like ==, !=, <=, >=
9638+ # Use ast to detect assignment properly
9639+ try :
9640+ tree = ast .parse (statement , mode = "exec" )
9641+ except SyntaxError as e :
9642+ raise ValueError (f"Invalid statement syntax: { statement } " ) from e
9643+
9644+ if len (tree .body ) != 1 :
9645+ raise ValueError ("Only single statements are supported" )
9646+
9647+ stmt = tree .body [0 ]
9648+
9649+ if isinstance (stmt , ast .Assign ):
9650+ # Assignment: "c = a + b"
9651+ if len (stmt .targets ) != 1 :
9652+ raise ValueError ("Only single assignment targets are supported" )
9653+ target = stmt .targets [0 ]
9654+ if not isinstance (target , ast .Name ):
9655+ raise ValueError (
9656+ f"Assignment target must be a simple name, got { type (target ).__name__ } "
9657+ )
9658+ target_name = target .id
9659+
9660+ # Get the expression source
9661+ expr_source = ast .unparse (stmt .value )
9662+ result : DataArray = self ._eval_expression (expr_source )
9663+ return self .assign ({target_name : result })
9664+
9665+ elif isinstance (stmt , ast .Expr ):
9666+ # Expression: "a + b"
9667+ expr_source = ast .unparse (stmt .value )
9668+ return self ._eval_expression (expr_source )
9669+
9670+ else :
9671+ raise ValueError (
9672+ f"Unsupported statement type: { type (stmt ).__name__ } . "
9673+ f"Only expressions and assignments are supported."
9674+ )
95979675
95989676 def query (
95999677 self ,
0 commit comments