Skip to content

Commit 85859d6

Browse files
committed
implement copy functions
1 parent 83e1616 commit 85859d6

File tree

4 files changed

+250
-29
lines changed

4 files changed

+250
-29
lines changed

docs/api/convenience.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,7 @@ Convenience functions (``zarr.convenience``)
66
.. autofunction:: load
77
.. autofunction:: save_array
88
.. autofunction:: save_group
9+
.. autofunction:: copy
10+
.. autofunction:: copy_all
911
.. autofunction:: copy_store
12+
.. autofunction:: tree

zarr/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,6 @@
1111
from zarr.hierarchy import group, open_group, Group
1212
from zarr.sync import ThreadSynchronizer, ProcessSynchronizer
1313
from zarr.codecs import *
14-
from zarr.convenience import open, save, save_array, save_group, load, copy_store
14+
from zarr.convenience import (open, save, save_array, save_group, load, copy_store,
15+
copy, copy_all, tree)
1516
from zarr.version import version as __version__

zarr/convenience.py

Lines changed: 165 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from zarr.hierarchy import open_group, group as _create_group, Group
1212
from zarr.storage import contains_array, contains_group
1313
from zarr.errors import err_path_not_found
14-
from zarr.util import normalize_storage_path
14+
from zarr.util import normalize_storage_path, TreeViewer
1515

1616

1717
# noinspection PyShadowingBuiltins
@@ -507,16 +507,59 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None,
507507
dest[dest_key] = source[source_key]
508508

509509

510-
def copy(source, dest, name=None, without_attrs=False, log=None, **create_kws):
511-
"""TODO"""
510+
def copy(source, dest, name=None, shallow=False, without_attrs=False, log=None,
511+
**create_kws):
512+
"""Copy the source object into the given destination.
513+
514+
Parameters
515+
----------
516+
source : group or array/dataset
517+
A zarr group or array, or an h5py group or dataset.
518+
dest : group
519+
A zarr or h5py group.
520+
name : str, optional
521+
Name to copy the object to.
522+
shallow : bool, optional
523+
If True, only copy immediate children of `source`.
524+
without_attrs : bool, optional
525+
Do not copy user attributes.
526+
log : callable, file path or file-like object, optional
527+
If provided, will be used to log progress information.
528+
**create_kws
529+
Passed through to the create_dataset method when copying an array/dataset.
530+
531+
Examples
532+
--------
533+
>>> import h5py
534+
>>> import zarr
535+
>>> import numpy as np
536+
>>> source = h5py.File('data/example.h5', mode='w')
537+
>>> foo = source.create_group('foo')
538+
>>> baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,))
539+
>>> spam = source.create_dataset('spam', data=np.arange(100, 200), chunks=(30,))
540+
>>> zarr.tree(source)
541+
/
542+
├── foo
543+
│ └── bar
544+
│ └── baz (100,) int64
545+
└── spam (100,) int64
546+
>>> dest = zarr.group()
547+
>>> zarr.copy(source['foo'], dest)
548+
>>> dest.tree() # N.B., no spam
549+
/
550+
└── foo
551+
└── bar
552+
└── baz (100,) int64
553+
554+
"""
512555

513556
# setup logging
514557
with _LogWriter(log) as log:
515-
_copy(log, source, dest, name=name, without_attrs=without_attrs, **create_kws)
558+
_copy(log, source, dest, name=name, root=True, shallow=shallow,
559+
without_attrs=without_attrs, **create_kws)
516560

517561

518-
def _copy(log, source, dest, name=None, without_attrs=False, **create_kws):
519-
"""TODO"""
562+
def _copy(log, source, dest, name, root, shallow, without_attrs, **create_kws):
520563

521564
# are we copying to/from h5py?
522565
source_h5py = source.__module__.startswith('h5py.')
@@ -569,4 +612,119 @@ def _copy(log, source, dest, name=None, without_attrs=False, **create_kws):
569612
if not without_attrs:
570613
ds.attrs.update(source.attrs)
571614

572-
else:
615+
elif root or not shallow:
616+
# copy a group
617+
618+
# creat new group in destination
619+
grp = dest.create_group(name)
620+
621+
# copy attributes
622+
if not without_attrs:
623+
grp.attrs.update(source.attrs)
624+
625+
# recurse
626+
for k in source.keys():
627+
_copy(log, source[k], grp, name=k, root=False, shallow=shallow,
628+
without_attrs=without_attrs, **create_kws)
629+
630+
631+
def tree(grp, expand=False, level=None):
632+
"""Provide a ``print``-able display of the hierarchy. This function is provided
633+
mainly as a convenience for obtaining a tree view of an h5py group - zarr groups
634+
have a ``.tree()`` method.
635+
636+
Parameters
637+
----------
638+
grp : Group
639+
Zarr or h5py group.
640+
expand : bool, optional
641+
Only relevant for HTML representation. If True, tree will be fully expanded.
642+
level : int, optional
643+
Maximum depth to descend into hierarchy.
644+
645+
Examples
646+
--------
647+
>>> import zarr
648+
>>> g1 = zarr.group()
649+
>>> g2 = g1.create_group('foo')
650+
>>> g3 = g1.create_group('bar')
651+
>>> g4 = g3.create_group('baz')
652+
>>> g5 = g3.create_group('qux')
653+
>>> d1 = g5.create_dataset('baz', shape=100, chunks=10)
654+
>>> g1.tree()
655+
/
656+
├── bar
657+
│ ├── baz
658+
│ └── qux
659+
│ └── baz (100,) float64
660+
└── foo
661+
>>> import h5py
662+
>>> h5f = h5py.File('data/example.h5', mode='w')
663+
>>> zarr.copy_all(g1, h5f)
664+
>>> zarr.tree(h5f)
665+
/
666+
├── bar
667+
│ ├── baz
668+
│ └── qux
669+
│ └── baz (100,) float64
670+
└── foo
671+
672+
See Also
673+
--------
674+
zarr.hierarchy.Group.tree
675+
676+
"""
677+
678+
return TreeViewer(grp, expand=expand, level=level)
679+
680+
681+
def copy_all(source, dest, shallow=False, without_attrs=False, log=None, **create_kws):
682+
"""Copy all children of the source group into the destination group.
683+
684+
Parameters
685+
----------
686+
source : group or array/dataset
687+
A zarr group or array, or an h5py group or dataset.
688+
dest : group
689+
A zarr or h5py group.
690+
shallow : bool, optional
691+
If True, only copy immediate children of `source`.
692+
without_attrs : bool, optional
693+
Do not copy user attributes.
694+
log : callable, file path or file-like object, optional
695+
If provided, will be used to log progress information.
696+
**create_kws
697+
Passed through to the create_dataset method when copying an array/dataset.
698+
699+
Examples
700+
--------
701+
>>> import h5py
702+
>>> import zarr
703+
>>> import numpy as np
704+
>>> source = h5py.File('data/example.h5', mode='w')
705+
>>> foo = source.create_group('foo')
706+
>>> baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,))
707+
>>> spam = source.create_dataset('spam', data=np.arange(100, 200), chunks=(30,))
708+
>>> zarr.tree(source)
709+
/
710+
├── foo
711+
│ └── bar
712+
│ └── baz (100,) int64
713+
└── spam (100,) int64
714+
>>> dest = zarr.group()
715+
>>> zarr.copy_all(source, dest)
716+
>>> dest.tree()
717+
/
718+
├── foo
719+
│ └── bar
720+
│ └── baz (100,) int64
721+
└── spam (100,) int64
722+
723+
"""
724+
725+
# setup logging
726+
with _LogWriter(log) as log:
727+
for k in source.keys():
728+
_copy(log, source[k], dest, name=k, root=False, shallow=shallow,
729+
without_attrs=without_attrs, **create_kws)
730+

zarr/tests/test_convenience.py

Lines changed: 80 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
from __future__ import absolute_import, print_function, division
33
import tempfile
44
import atexit
5+
import os
56

67

78
from nose.tools import assert_raises
89
import numpy as np
910
from numpy.testing import assert_array_equal
1011
from numcodecs import Zlib
12+
import pytest
1113

1214

1315
from zarr.convenience import open, save, save_group, load, copy_store, copy
@@ -180,78 +182,135 @@ def test_copy_store():
180182
assert 'bar/qux' in dest
181183

182184

183-
def test_copy():
185+
def _test_copy(new_source, new_dest):
184186

185-
source = group()
187+
source = new_source()
186188
foo = source.create_group('foo')
187189
foo.attrs['experiment'] = 'weird science'
188-
baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=50)
190+
baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,))
189191
baz.attrs['units'] = 'metres'
192+
spam = source.create_dataset('spam', data=np.arange(100, 200), chunks=(30,))
190193

191194
# copy array with default options
192-
dest = group()
195+
dest = new_dest()
193196
copy(source['foo/bar/baz'], dest)
194197
a = dest['baz'] # defaults to use source name
195-
assert isinstance(a, Array)
196198
assert a.dtype == baz.dtype
197199
assert a.shape == baz.shape
198200
assert a.chunks == baz.chunks
199-
assert a.compressor == baz.compressor
201+
if hasattr(a, 'compressor') and hasattr(baz, 'compressor'):
202+
assert a.compressor == baz.compressor
200203
assert_array_equal(a[:], baz[:])
201204
assert a.attrs['units'] == 'metres'
202205

203206
# copy array with name
204-
dest = group()
207+
dest = new_dest()
205208
copy(source['foo/bar/baz'], dest, name='qux')
206209
assert 'baz' not in dest
207210
a = dest['qux']
208-
assert isinstance(a, Array)
209211
assert a.dtype == baz.dtype
210212
assert a.shape == baz.shape
211213
assert a.chunks == baz.chunks
212-
assert a.compressor == baz.compressor
214+
if hasattr(a, 'compressor') and hasattr(baz, 'compressor'):
215+
assert a.compressor == baz.compressor
213216
assert_array_equal(a[:], baz[:])
214217
assert a.attrs['units'] == 'metres'
215218

216219
# copy array, provide creation options
217-
compressor = Zlib(1)
218-
chunks = True
219-
copy(source['foo/bar/baz'], dest, without_attrs=True, compressor=compressor,
220-
chunks=chunks)
220+
dest = new_dest()
221+
compressor = Zlib(9)
222+
if isinstance(dest, Group):
223+
copy(source['foo/bar/baz'], dest, without_attrs=True, compressor=compressor,
224+
chunks=True)
225+
else:
226+
copy(source['foo/bar/baz'], dest, without_attrs=True, compression='gzip',
227+
compression_opts=9, chunks=True)
221228
a = dest['baz']
222-
assert isinstance(a, Array)
223229
assert a.dtype == baz.dtype
224230
assert a.shape == baz.shape
225231
assert a.chunks != baz.chunks # autochunking was requested
226-
assert a.compressor == compressor
232+
if hasattr(a, 'compressor'):
233+
assert compressor == a.compressor
234+
if hasattr(baz, 'compressor'):
235+
assert a.compressor != baz.compressor
236+
else:
237+
assert a.compression == 'gzip'
238+
assert a.compression_opts == 9
227239
assert_array_equal(a[:], baz[:])
228240
assert 'units' not in a.attrs
229241

230242
# copy group, default options
231-
dest = group()
243+
dest = new_dest()
232244
copy(source['foo'], dest)
233245
g = dest['foo'] # defaults to use source name
234-
assert isinstance(g, Group)
235246
assert g.attrs['experiment'] == 'weird science'
236247
a = g['bar/baz']
237248
assert a.dtype == baz.dtype
238249
assert a.shape == baz.shape
239250
assert a.chunks == baz.chunks
240-
assert a.compressor == baz.compressor
251+
if hasattr(a, 'compressor') and hasattr(baz, 'compressor'):
252+
assert a.compressor == baz.compressor
241253
assert_array_equal(a[:], baz[:])
242254
assert a.attrs['units'] == 'metres'
243255

244256
# copy group, non-default options
245-
dest = group()
257+
dest = new_dest()
246258
copy(source['foo'], dest, name='qux', without_attrs=True)
247259
assert 'foo' not in dest
248260
g = dest['qux']
249-
assert isinstance(g, Group)
250261
assert 'experiment' not in g.attrs
251262
a = g['bar/baz']
252263
assert a.dtype == baz.dtype
253264
assert a.shape == baz.shape
254265
assert a.chunks == baz.chunks
255-
assert a.compressor == baz.compressor
266+
if hasattr(a, 'compressor') and hasattr(baz, 'compressor'):
267+
assert a.compressor == baz.compressor
256268
assert_array_equal(a[:], baz[:])
257269
assert 'units' not in a.attrs
270+
271+
# copy group, shallow
272+
dest = new_dest()
273+
copy(source, dest, name='eggs', shallow=True)
274+
assert 'eggs' in dest
275+
eggs = dest['eggs']
276+
assert 'spam' in eggs
277+
a = eggs['spam']
278+
assert a.dtype == spam.dtype
279+
assert a.shape == spam.shape
280+
assert a.chunks == spam.chunks
281+
if hasattr(a, 'compressor') and hasattr(spam, 'compressor'):
282+
assert a.compressor == spam.compressor
283+
assert_array_equal(a[:], spam[:])
284+
assert 'foo' not in eggs
285+
assert 'bar' not in eggs
286+
287+
288+
def test_copy_zarr_zarr():
289+
# zarr -> zarr
290+
_test_copy(group, group)
291+
292+
293+
try:
294+
import h5py
295+
have_h5py = True
296+
except ImportError:
297+
have_h5py = False
298+
299+
300+
def temp_h5f():
301+
fn = tempfile.mktemp()
302+
atexit.register(os.remove, fn)
303+
h5f = h5py.File(fn, mode='w')
304+
return h5f
305+
306+
307+
@pytest.mark.skipif(not have_h5py, reason='h5py not installed')
308+
def test_copy_h5py_zarr():
309+
# h5py -> zarr
310+
_test_copy(temp_h5f, group)
311+
312+
313+
@pytest.mark.skipif(not have_h5py, reason='h5py not installed')
314+
def test_copy_zarr_h5py():
315+
# zarr -> h5py
316+
_test_copy(group, temp_h5f)

0 commit comments

Comments
 (0)