Skip to content

Commit 83e1616

Browse files
committed
wip
1 parent afd2eef commit 83e1616

File tree

2 files changed

+166
-2
lines changed

2 files changed

+166
-2
lines changed

zarr/convenience.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,27 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None,
399399
because it avoids de-compressing and re-compressing data, rather the compressed
400400
chunk data for each array are copied directly between stores.
401401
402+
Parameters
403+
----------
404+
source : Mapping
405+
Store to copy data from.
406+
dest : MutableMapping
407+
Store to copy data into.
408+
source_path : str, optional
409+
Only copy data from under this path in the source store.
410+
dest_path : str, optional
411+
Copy data into this path in the destination store.
412+
excludes : sequence of str, optional
413+
One or more regular expressions which will be matched against keys in the
414+
source store. Any matching key will not be copied.
415+
includes : sequence of str, optional
416+
One or more regular expressions which will be matched against keys in the
417+
source store and will override any excludes also matching.
418+
flags : int, optional
419+
Regular expression flags used for matching excludes and includes.
420+
log : callable, file path or file-like object, optional
421+
If provided, will be used to log progress information.
422+
402423
Examples
403424
--------
404425
>>> import zarr
@@ -484,3 +505,68 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None,
484505
# retrieve and copy data
485506
log('{} -> {}'.format(source_key, dest_key))
486507
dest[dest_key] = source[source_key]
508+
509+
510+
def copy(source, dest, name=None, without_attrs=False, log=None, **create_kws):
511+
"""TODO"""
512+
513+
# setup logging
514+
with _LogWriter(log) as log:
515+
_copy(log, source, dest, name=name, without_attrs=without_attrs, **create_kws)
516+
517+
518+
def _copy(log, source, dest, name=None, without_attrs=False, **create_kws):
519+
"""TODO"""
520+
521+
# are we copying to/from h5py?
522+
source_h5py = source.__module__.startswith('h5py.')
523+
dest_h5py = dest.__module__.startswith('h5py.')
524+
525+
# determine name to copy to
526+
if name is None:
527+
name = source.name.split('/')[-1]
528+
if not name:
529+
raise TypeError('source has no name, please provide the `name` '
530+
'parameter to indicate a name to copy to')
531+
532+
if hasattr(source, 'shape'):
533+
# copy a dataset/array
534+
535+
# setup creation keyword arguments
536+
kws = create_kws.copy()
537+
538+
# setup chunks option, preserve by default
539+
kws.setdefault('chunks', source.chunks)
540+
541+
# setup compression options
542+
if source_h5py:
543+
if dest_h5py:
544+
# h5py -> h5py; preserve compression options by default
545+
kws.setdefault('compression', source.compression)
546+
kws.setdefault('compression_opts', source.compression_opts)
547+
kws.setdefault('shuffle', source.shuffle)
548+
else:
549+
# h5py -> zarr; use zarr default compression options
550+
pass
551+
else:
552+
if dest_h5py:
553+
# zarr -> h5py; use some vaguely sensible defaults
554+
kws.setdefault('compression', 'gzip')
555+
kws.setdefault('compression_opts', 1)
556+
kws.setdefault('shuffle', True)
557+
else:
558+
# zarr -> zarr; preserve compression options by default
559+
kws.setdefault('compressor', source.compressor)
560+
561+
# create new dataset in destination
562+
ds = dest.create_dataset(name, shape=source.shape, dtype=source.dtype, **kws)
563+
564+
# copy data - N.B., if dest is h5py this will load all data into memory
565+
log('{} -> {}'.format(source.name, ds.name))
566+
ds[:] = source
567+
568+
# copy attributes
569+
if not without_attrs:
570+
ds.attrs.update(source.attrs)
571+
572+
else:

zarr/tests/test_convenience.py

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@
77
from nose.tools import assert_raises
88
import numpy as np
99
from numpy.testing import assert_array_equal
10+
from numcodecs import Zlib
1011

1112

12-
from zarr.convenience import open, save, save_group, load, copy_store
13+
from zarr.convenience import open, save, save_group, load, copy_store, copy
1314
from zarr.storage import atexit_rmtree
1415
from zarr.core import Array
15-
from zarr.hierarchy import Group
16+
from zarr.hierarchy import Group, group
1617

1718

1819
def test_open_array():
@@ -177,3 +178,80 @@ def test_copy_store():
177178
assert 'foo' in dest
178179
assert 'bar/baz' not in dest
179180
assert 'bar/qux' in dest
181+
182+
183+
def test_copy():
184+
185+
source = group()
186+
foo = source.create_group('foo')
187+
foo.attrs['experiment'] = 'weird science'
188+
baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=50)
189+
baz.attrs['units'] = 'metres'
190+
191+
# copy array with default options
192+
dest = group()
193+
copy(source['foo/bar/baz'], dest)
194+
a = dest['baz'] # defaults to use source name
195+
assert isinstance(a, Array)
196+
assert a.dtype == baz.dtype
197+
assert a.shape == baz.shape
198+
assert a.chunks == baz.chunks
199+
assert a.compressor == baz.compressor
200+
assert_array_equal(a[:], baz[:])
201+
assert a.attrs['units'] == 'metres'
202+
203+
# copy array with name
204+
dest = group()
205+
copy(source['foo/bar/baz'], dest, name='qux')
206+
assert 'baz' not in dest
207+
a = dest['qux']
208+
assert isinstance(a, Array)
209+
assert a.dtype == baz.dtype
210+
assert a.shape == baz.shape
211+
assert a.chunks == baz.chunks
212+
assert a.compressor == baz.compressor
213+
assert_array_equal(a[:], baz[:])
214+
assert a.attrs['units'] == 'metres'
215+
216+
# copy array, provide creation options
217+
compressor = Zlib(1)
218+
chunks = True
219+
copy(source['foo/bar/baz'], dest, without_attrs=True, compressor=compressor,
220+
chunks=chunks)
221+
a = dest['baz']
222+
assert isinstance(a, Array)
223+
assert a.dtype == baz.dtype
224+
assert a.shape == baz.shape
225+
assert a.chunks != baz.chunks # autochunking was requested
226+
assert a.compressor == compressor
227+
assert_array_equal(a[:], baz[:])
228+
assert 'units' not in a.attrs
229+
230+
# copy group, default options
231+
dest = group()
232+
copy(source['foo'], dest)
233+
g = dest['foo'] # defaults to use source name
234+
assert isinstance(g, Group)
235+
assert g.attrs['experiment'] == 'weird science'
236+
a = g['bar/baz']
237+
assert a.dtype == baz.dtype
238+
assert a.shape == baz.shape
239+
assert a.chunks == baz.chunks
240+
assert a.compressor == baz.compressor
241+
assert_array_equal(a[:], baz[:])
242+
assert a.attrs['units'] == 'metres'
243+
244+
# copy group, non-default options
245+
dest = group()
246+
copy(source['foo'], dest, name='qux', without_attrs=True)
247+
assert 'foo' not in dest
248+
g = dest['qux']
249+
assert isinstance(g, Group)
250+
assert 'experiment' not in g.attrs
251+
a = g['bar/baz']
252+
assert a.dtype == baz.dtype
253+
assert a.shape == baz.shape
254+
assert a.chunks == baz.chunks
255+
assert a.compressor == baz.compressor
256+
assert_array_equal(a[:], baz[:])
257+
assert 'units' not in a.attrs

0 commit comments

Comments
 (0)