Skip to content

Commit 31bc3f6

Browse files
authored
Patch small items (#63)
* permit numpy >= 2 * add iloc to DataArray * fix from_named_objects when using Series to init * test_from_named_objects * test_dataarray_iloc * ruffen
1 parent b491fca commit 31bc3f6

File tree

3 files changed

+82
-4
lines changed

3 files changed

+82
-4
lines changed

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ name = "sharrow"
1010
requires-python = ">=3.9"
1111
dynamic = ["version"]
1212
dependencies = [
13-
"numpy >= 1.19, <2",
13+
"numpy >= 1.19",
1414
"pandas >= 1.2",
1515
"pyarrow",
1616
"xarray",
@@ -59,7 +59,6 @@ select = [
5959
"B", # flake8-bugbear
6060
]
6161
ignore = ["B905", "D1"]
62-
ignore-init-module-imports = true
6362
per-file-ignores = { "*.ipynb" = [
6463
"E402", # allow imports to appear anywhere in Jupyter Notebooks
6564
"E501", # allow long lines in Jupyter Notebooks

sharrow/dataset.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def construct(source):
9191
Parameters
9292
----------
9393
source : pandas.DataFrame, pyarrow.Table, xarray.Dataset, or Sequence[str]
94-
The source from which to create a Dataset. DataFrames and Tables
94+
The source from which to create a Dataset. DataFrame and Table objects
9595
are converted to Datasets that have one dimension (the rows) and
9696
separate variables for each of the columns. A list of strings
9797
creates a dataset with those named empty variables.
@@ -1065,6 +1065,38 @@ def __getitem__(self, key: Mapping[Hashable, Any]) -> Dataset:
10651065
return self.dataset.isel(key)
10661066

10671067

1068+
@xr.register_dataarray_accessor("iloc")
1069+
class _iLocArrayIndexer:
1070+
"""
1071+
Purely integer-location based indexing for selection by position on 1-d DataArrays.
1072+
1073+
In many ways, a dataset with a single dimensions is like a pandas DataFrame,
1074+
with the one dimension giving the rows, and the variables as columns. This
1075+
analogy eventually breaks down (DataFrame columns are ordered, Dataset
1076+
variables are not) but the similarities are enough that it’s sometimes
1077+
convenient to have iloc functionality enabled. This only works for indexing
1078+
on the rows, but if there’s only the one dimension the complexity of isel
1079+
is not needed.
1080+
"""
1081+
1082+
__slots__ = ("dataarray",)
1083+
1084+
def __init__(self, dataarray: DataArray):
1085+
self.dataarray = dataarray
1086+
1087+
def __getitem__(self, key: Mapping[Hashable, Any]) -> DataArray:
1088+
if not is_dict_like(key):
1089+
if len(self.dataarray.dims) == 1:
1090+
dim_name = self.dataarray.dims.__iter__().__next__()
1091+
key = {dim_name: key}
1092+
else:
1093+
raise TypeError(
1094+
"can only lookup dictionaries from DataArray.iloc, "
1095+
"unless there is only one dimension"
1096+
)
1097+
return self.dataarray.isel(key)
1098+
1099+
10681100
xr.Dataset.rename_dims_and_coords = xr.Dataset.rename
10691101

10701102

@@ -1182,6 +1214,8 @@ def _to_ast_literal(x):
11821214
return _to_ast_literal(x.to_list())
11831215
elif isinstance(x, np.ndarray):
11841216
return _to_ast_literal(list(x))
1217+
elif isinstance(x, np.str_):
1218+
return repr(str(x))
11851219
else:
11861220
return repr(x)
11871221

@@ -1448,7 +1482,7 @@ def from_named_objects(*args):
14481482
raise ValueError(f"argument {n} has no name") from None
14491483
if name is None:
14501484
raise ValueError(f"the name for argument {n} is None")
1451-
objs[name] = a
1485+
objs[name] = np.asarray(a)
14521486
return xr.Dataset(objs)
14531487

14541488

sharrow/tests/test_datasets.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import numpy as np
66
import openmatrix
77
import pandas as pd
8+
import pytest
89
import xarray as xr
910
from pytest import approx
1011

@@ -133,3 +134,47 @@ def test_deferred_load_to_shared_memory():
133134
xr.testing.assert_equal(d0, d1)
134135
d2 = xr.Dataset.shm.from_shared_memory(token)
135136
xr.testing.assert_equal(d0, d2)
137+
138+
139+
def test_from_named_objects():
140+
from sharrow.dataset import from_named_objects
141+
142+
s1 = pd.Series([1, 4, 9, 16], name="Squares")
143+
s2 = pd.Series([2, 3, 5, 7, 11], name="Primes")
144+
i1 = pd.Index([1, 4, 9, 16], name="Squares")
145+
a1 = xr.DataArray([1, 4, 9, 16], name="Squares")
146+
147+
for obj in [s1, i1, a1]:
148+
ds = from_named_objects(obj, s2)
149+
assert "Squares" in ds.dims
150+
assert "Primes" in ds.dims
151+
assert ds.sizes == {"Squares": 4, "Primes": 5}
152+
153+
with pytest.raises(ValueError):
154+
from_named_objects([1, 4, 9, 16], s2)
155+
156+
157+
def test_dataarray_iloc():
158+
arr = xr.DataArray([1, 4, 9, 16, 25, 36], name="Squares", dims="s")
159+
160+
assert arr.iloc[1] == 4
161+
xr.testing.assert_equal(arr.iloc[1:], xr.DataArray([4, 9, 16, 25, 36], dims="s"))
162+
xr.testing.assert_equal(arr.iloc[:2], xr.DataArray([1, 4], dims="s"))
163+
xr.testing.assert_equal(arr.iloc[2:4], xr.DataArray([9, 16], dims="s"))
164+
xr.testing.assert_equal(arr.iloc[:-2], xr.DataArray([1, 4, 9, 16], dims="s"))
165+
xr.testing.assert_equal(arr.iloc[-2:], xr.DataArray([25, 36], dims="s"))
166+
167+
with pytest.raises(TypeError):
168+
arr.iloc[1] = 5 # assignment not allowed
169+
170+
arr2 = xr.DataArray([2, 3, 5, 7, 11], name="Primes", dims="p")
171+
arr2d = arr * arr2
172+
173+
with pytest.raises(TypeError):
174+
_tmp = arr2d.iloc[1] # not allowed for 2D arrays
175+
176+
assert arr2d.iloc[dict(s=1, p=2)] == 20
177+
178+
z = arr2d.iloc[dict(s=slice(1, 2), p=slice(2, 4))]
179+
180+
xr.testing.assert_equal(z, xr.DataArray([[20, 28]], dims=["s", "p"]))

0 commit comments

Comments
 (0)