Skip to content

Commit 52da874

Browse files
committed
new configuration setting append_step
1 parent fb096d5 commit 52da874

File tree

8 files changed

+290
-42
lines changed

8 files changed

+290
-42
lines changed

CHANGES.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,16 @@
22

33
### Enhancements
44

5+
* A new configuration setting `append_step` can be used to validate
6+
the step sizes between the labels of a coordinate variable associated with
7+
the append dimension. Its value can be a number for numerical labels
8+
or a time delta value of the form `8h` (8 hours) or `2D` (two days) for
9+
date/time labels. The value can also be negative. [#21]
10+
11+
* The configuration setting `append_step` can take the special values
12+
`"+"` and `"-"` which are used to verify that the labels are monotonically
13+
increasing or decreasing. [#20]
14+
515
* It is now possible to reference environment variables
616
in configuration files using the syntax `${ENV_VAR}`. [#36]
717

docs/guide.md

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,27 @@ specify its name:
9797
}
9898
```
9999

100-
All other non-variadic dimensions can and should be specified using the
101-
`fixed_dims` setting which is a mapping from dimension name to the
102-
fixed dimension size, e.g.:
100+
The configuration setting `append_step` can be used to validate the step sizes
101+
between the labels of a coordinate variable associated with the append dimension.
102+
Its value can be a number for numerical labels or a timedelta value of the form
103+
`<count><unit>` for date/time labels. In the latter case `<count>` is an integer
104+
and `<units>` is one of the possible
105+
[numpy datetime units](https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units),
106+
for example, `8h` (8 hours) or `2D` (two days). Numerical and timedelta values
107+
may be negative. `append_step` can also take the two special values `"+"` and
108+
`"-"`. In this case it is just verified that the append labels are monotonically
109+
increasing or decreasing.
110+
111+
```json
112+
{
113+
"append_dim": "time",
114+
"append_step": "2D"
115+
}
116+
```
117+
118+
Other, non-variadic dimensions besides the append dimension can and should
119+
be specified using the `fixed_dims` setting which is a mapping from dimension
120+
name to the fixed dimension size, e.g.:
103121

104122
```json
105123
{

tests/test_api.py

Lines changed: 94 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import unittest
77

88
import numpy as np
9+
import pytest
910
import xarray as xr
1011

1112
from zappend.api import FileObj
@@ -82,7 +83,7 @@ def test_some_slices_with_inc_append_step(self):
8283
make_test_dataset(index=1, shape=(1, 50, 100)),
8384
make_test_dataset(index=2, shape=(1, 50, 100)),
8485
]
85-
zappend(slices, target_dir=target_dir, append_step="1d")
86+
zappend(slices, target_dir=target_dir, append_step="1D")
8687
ds = xr.open_zarr(target_dir)
8788
np.testing.assert_array_equal(
8889
ds.time.values,
@@ -96,57 +97,118 @@ def test_some_slices_with_dec_append_step(self):
9697
make_test_dataset(index=1, shape=(1, 50, 100)),
9798
make_test_dataset(index=0, shape=(1, 50, 100)),
9899
]
99-
zappend(slices, target_dir=target_dir, append_step="-1d")
100+
zappend(slices, target_dir=target_dir, append_step="-1D")
100101
ds = xr.open_zarr(target_dir)
101102
np.testing.assert_array_equal(
102103
ds.time.values,
103104
np.array(["2024-01-03", "2024-01-02", "2024-01-01"], dtype=np.datetime64),
104105
)
105106

106-
def test_some_slices_with_one_missing_append_step(self):
107+
# # See https://github.com/bcdev/zappend/issues/21
108+
#
109+
# def test_some_slices_with_one_missing_append_step(self):
110+
# target_dir = "memory://target.zarr"
111+
# slices = [
112+
# make_test_dataset(index=0, shape=(1, 50, 100)),
113+
# make_test_dataset(index=2, shape=(1, 50, 100)),
114+
# ]
115+
# zappend(slices, target_dir=target_dir, append_step="1D")
116+
# ds = xr.open_zarr(target_dir)
117+
# np.testing.assert_array_equal(
118+
# ds.time.values,
119+
# np.array(
120+
# ["2024-01-01", "2024-01-02", "2024-01-03"], dtype="datetime64[ns]"
121+
# ),
122+
# )
123+
124+
# # See https://github.com/bcdev/zappend/issues/21
125+
#
126+
# def test_some_slices_with_three_missing_append_steps(self):
127+
# target_dir = "memory://target.zarr"
128+
# slices = [
129+
# make_test_dataset(index=0, shape=(1, 50, 100)),
130+
# make_test_dataset(index=4, shape=(1, 50, 100)),
131+
# ]
132+
# zappend(slices, target_dir=target_dir, append_step="1D")
133+
# ds = xr.open_zarr(target_dir)
134+
# np.testing.assert_array_equal(
135+
# ds.time.values,
136+
# np.array(
137+
# [
138+
# "2024-01-01",
139+
# "2024-01-02",
140+
# "2024-01-03",
141+
# "2024-01-04",
142+
# "2024-01-05",
143+
# ],
144+
# dtype="datetime64[ns]",
145+
# ),
146+
# )
147+
148+
def test_it_raises_for_wrong_append_step(self):
149+
target_dir = "memory://target.zarr"
150+
slices = [
151+
make_test_dataset(index=0, shape=(1, 50, 100)),
152+
make_test_dataset(index=1, shape=(1, 50, 100)),
153+
]
154+
with pytest.raises(
155+
ValueError,
156+
match=(
157+
"Cannot append slice because this would result in"
158+
" an invalid step size."
159+
),
160+
):
161+
zappend(slices, target_dir=target_dir, append_step="2D")
162+
163+
def test_some_slices_with_inc_append_labels(self):
164+
append_step = "+"
165+
107166
target_dir = "memory://target.zarr"
108167
slices = [
109168
make_test_dataset(index=0, shape=(1, 50, 100)),
169+
make_test_dataset(index=1, shape=(1, 50, 100)),
110170
make_test_dataset(index=2, shape=(1, 50, 100)),
111171
]
112-
zappend(slices, target_dir=target_dir, append_step="1d")
113-
ds = xr.open_zarr(target_dir)
114-
np.testing.assert_array_equal(
115-
ds.time.values,
116-
np.array(["2024-01-01", "2024-01-02", "2024-01-03"], dtype=np.datetime64),
117-
)
172+
# OK!
173+
zappend(slices, target_dir=target_dir, append_step=append_step)
118174

119-
def test_some_slices_with_three_missing_append_steps(self):
120175
target_dir = "memory://target.zarr"
121176
slices = [
177+
make_test_dataset(index=1, shape=(1, 50, 100)),
122178
make_test_dataset(index=0, shape=(1, 50, 100)),
123-
make_test_dataset(index=4, shape=(1, 50, 100)),
124179
]
125-
zappend(slices, target_dir=target_dir, append_step="1d")
126-
ds = xr.open_zarr(target_dir)
127-
np.testing.assert_array_equal(
128-
ds.time.values,
129-
np.array(
130-
["2024-01-01", "2024-01-02", "2024-01-03", "2024-01-04", "2024-01-05"],
131-
dtype=np.datetime64,
180+
with pytest.raises(
181+
ValueError,
182+
match=(
183+
"Cannot append slice because labels must be monotonically increasing"
132184
),
133-
)
134-
135-
def test_it_raises_for_wrong_append_step(self):
136-
# TODO: implement me
137-
pass
138-
139-
def test_some_slices_with_inc_append_labels(self):
140-
# TODO: implement me
141-
pass
185+
):
186+
zappend(slices, target_dir=target_dir, append_step=append_step)
142187

143188
def test_some_slices_with_dec_append_labels(self):
144-
# TODO: implement me
145-
pass
189+
append_step = "-"
146190

147-
def test_it_raises_for_none_inc_append_labels(self):
148-
# TODO: implement me
149-
pass
191+
target_dir = "memory://target.zarr"
192+
slices = [
193+
make_test_dataset(index=2, shape=(1, 50, 100)),
194+
make_test_dataset(index=1, shape=(1, 50, 100)),
195+
make_test_dataset(index=0, shape=(1, 50, 100)),
196+
]
197+
# OK!
198+
zappend(slices, target_dir=target_dir, append_step=append_step)
199+
200+
target_dir = "memory://target.zarr"
201+
slices = [
202+
make_test_dataset(index=0, shape=(1, 50, 100)),
203+
make_test_dataset(index=1, shape=(1, 50, 100)),
204+
]
205+
with pytest.raises(
206+
ValueError,
207+
match=(
208+
"Cannot append slice because labels must be monotonically decreasing"
209+
),
210+
):
211+
zappend(slices, target_dir=target_dir, append_step=append_step)
150212

151213
def test_some_slices_with_profiling(self):
152214
target_dir = "memory://target.zarr"

tests/test_context.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,16 @@ def test_append_step(self):
4545
make_test_dataset(uri="memory://target.zarr")
4646
ctx = Context({"target_dir": "memory://target.zarr"})
4747
self.assertEqual(None, ctx.append_step_size)
48-
ctx = Context({"target_dir": "memory://target.zarr", "append_step": "1d"})
49-
self.assertEqual("1d", ctx.append_step_size)
48+
ctx = Context({"target_dir": "memory://target.zarr", "append_step": "1D"})
49+
self.assertEqual("1D", ctx.append_step_size)
5050

5151
def test_last_append_label(self):
5252
make_test_dataset(uri="memory://target.zarr")
5353
ctx = Context({"target_dir": "memory://target.zarr"})
5454
self.assertEqual(None, ctx.last_append_label)
55-
ctx = Context({"target_dir": "memory://TARGET.zarr", "append_step": "1d"})
55+
ctx = Context({"target_dir": "memory://TARGET.zarr", "append_step": "1D"})
5656
self.assertEqual(None, ctx.last_append_label)
57-
ctx = Context({"target_dir": "memory://target.zarr", "append_step": "1d"})
57+
ctx = Context({"target_dir": "memory://target.zarr", "append_step": "1D"})
5858
self.assertEqual(np.datetime64("2024-01-03"), ctx.last_append_label)
5959

6060
def test_slice_polling(self):

tests/test_processor.py

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,21 @@
33
# https://opensource.org/licenses/MIT.
44

55
import unittest
6+
7+
import numpy as np
8+
import pytest
69
import xarray as xr
710

811
from zappend.fsutil.fileobj import FileObj
12+
from zappend.context import Context
913
from zappend.processor import Processor
14+
from zappend.processor import to_timedelta
15+
from zappend.processor import verify_append_labels
1016
from .helpers import clear_memory_fs
1117
from .helpers import make_test_dataset
1218

1319

14-
class TestProcessor(unittest.TestCase):
20+
class ProcessorTest(unittest.TestCase):
1521
def setUp(self):
1622
clear_memory_fs()
1723

@@ -142,3 +148,83 @@ def test_process_two_slices_with_chunk_overlap(self):
142148
self.assertEqual(None, ds.time.chunks)
143149
self.assertEqual(((3, 1), (5, 5), (10, 10)), ds.chl.chunks)
144150
self.assertEqual(((3, 1), (5, 5), (10, 10)), ds.tsm.chunks)
151+
152+
153+
# noinspection PyMethodMayBeStatic
154+
class AppendLabelValidationTest(unittest.TestCase):
155+
def setUp(self):
156+
clear_memory_fs()
157+
158+
def test_verify_append_labels_succeeds(self):
159+
ctx = Context({"target_dir": "memory://target.zarr", "append_step": "1D"})
160+
161+
# Ok, because we have no delta
162+
slice_ds = make_test_dataset(shape=(1, 50, 100))
163+
verify_append_labels(ctx, slice_ds)
164+
165+
# Ok, because we have 4 deltas that are 1D
166+
slice_ds = make_test_dataset(shape=(5, 50, 100))
167+
verify_append_labels(ctx, slice_ds)
168+
169+
# Ok, because after removing "time" coordinate variable,
170+
# xarray will use numerical labels
171+
ctx = Context({"target_dir": "memory://target.zarr", "append_step": 1})
172+
slice_ds = make_test_dataset(shape=(3, 50, 100)).drop_vars(["time"])
173+
verify_append_labels(ctx, slice_ds)
174+
175+
# Ok, because "foo" has no labels
176+
ctx = Context(
177+
{
178+
"target_dir": "memory://target.zarr",
179+
"append_dim": "foo",
180+
"append_step": "1D",
181+
}
182+
)
183+
slice_ds = make_test_dataset(shape=(3, 50, 100))
184+
verify_append_labels(ctx, slice_ds)
185+
186+
def test_verify_append_labels_fails(self):
187+
ctx = Context({"target_dir": "memory://target.zarr", "append_step": "2D"})
188+
slice_ds = make_test_dataset(shape=(3, 50, 100))
189+
with pytest.raises(
190+
ValueError,
191+
match="Cannot append slice because this would result in an invalid step size.",
192+
):
193+
verify_append_labels(ctx, slice_ds)
194+
195+
ctx = Context({"target_dir": "memory://target.zarr", "append_step": "-"})
196+
slice_ds = make_test_dataset(shape=(3, 50, 100))
197+
with pytest.raises(
198+
ValueError,
199+
match="Cannot append slice because labels must be monotonically decreasing.",
200+
):
201+
verify_append_labels(ctx, slice_ds)
202+
203+
ctx = Context({"target_dir": "memory://target.zarr", "append_step": "+"})
204+
slice_ds = make_test_dataset(shape=(3, 50, 100))
205+
time = slice_ds["time"]
206+
slice_ds["time"] = xr.DataArray(
207+
list(reversed(time.values)), dims=time.dims, attrs=time.attrs
208+
)
209+
with pytest.raises(
210+
ValueError,
211+
match="Cannot append slice because labels must be monotonically increasing.",
212+
):
213+
verify_append_labels(ctx, slice_ds)
214+
215+
216+
class ToTimedeltaTest(unittest.TestCase):
217+
def test_it(self):
218+
self.assertEqual(np.timedelta64(1, "s"), to_timedelta("s"))
219+
self.assertEqual(np.timedelta64(1, "m"), to_timedelta("m"))
220+
self.assertEqual(np.timedelta64(1, "h"), to_timedelta("h"))
221+
self.assertEqual(np.timedelta64(1, "h"), to_timedelta("1h"))
222+
self.assertEqual(np.timedelta64(24, "h"), to_timedelta("24h"))
223+
self.assertEqual(np.timedelta64(1, "D"), to_timedelta("24h"))
224+
self.assertEqual(np.timedelta64(1, "D"), to_timedelta("D"))
225+
self.assertEqual(np.timedelta64(1, "D"), to_timedelta("1D"))
226+
self.assertEqual(np.timedelta64(7, "D"), to_timedelta("7D"))
227+
self.assertEqual(np.timedelta64(1, "W"), to_timedelta("7D"))
228+
self.assertEqual(np.timedelta64(12, "D"), to_timedelta("12D"))
229+
self.assertEqual(np.timedelta64(60 * 60 * 24, "s"), to_timedelta(60 * 60 * 24))
230+
self.assertEqual(np.timedelta64(1, "D"), to_timedelta(60 * 60 * 24))

zappend/config/markdown.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def _schema_to_md(
4141
if description:
4242
lines.append(description)
4343

44-
one_of = schema.get("oneOf")
44+
one_of = schema.get("anyOf") or schema.get("oneOf")
4545
if one_of:
4646
if sequence_name:
4747
lines.append(f"The {sequence_name} must be one of the following.")

zappend/config/schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,7 @@
479479
"If set, enforces a step size in the append dimension between two"
480480
" slices or just enforces a direction."
481481
),
482-
"oneOf": [
482+
"anyOf": [
483483
{
484484
"description": "Arbitrary step size or not applicable.",
485485
"const": None,

0 commit comments

Comments
 (0)