@@ -768,6 +768,110 @@ Here is an example using S3Map to read an array created previously::
768
768
b'Hello from the cloud!'
769
769
770
770
771
+ .. _tutorial_copy :
772
+
773
+ Copying/migrating data
774
+ ----------------------
775
+
776
+ If you have some data in an HDF5 file and would like to copy some or all of it
777
+ into a Zarr group, or vice-versa, the :func: `zarr.convenience.copy ` and
778
+ :func: `zarr.convenience.copy_all ` functions can be used. Here's an example
779
+ copying a group named 'foo' from an HDF5 file to a Zarr group::
780
+
781
+ >>> import h5py
782
+ >>> import zarr
783
+ >>> import numpy as np
784
+ >>> source = h5py.File('data/example.h5', mode='w')
785
+ >>> foo = source.create_group('foo')
786
+ >>> baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,))
787
+ >>> spam = source.create_dataset('spam', data=np.arange(100, 200), chunks=(30,))
788
+ >>> zarr.tree(source)
789
+ /
790
+ ├── foo
791
+ │ └── bar
792
+ │ └── baz (100,) int64
793
+ └── spam (100,) int64
794
+ >>> dest = zarr.open_group('data/example.zarr', mode='w')
795
+ >>> from sys import stdout
796
+ >>> zarr.copy(source['foo'], dest, log=stdout)
797
+ copy /foo
798
+ copy /foo/bar
799
+ copy /foo/bar/baz (100,) int64
800
+ all done: 3 copied, 0 skipped, 800 bytes copied
801
+ (3, 0, 800)
802
+ >>> dest.tree() # N.B., no spam
803
+ /
804
+ └── foo
805
+ └── bar
806
+ └── baz (100,) int64
807
+ >>> source.close()
808
+
809
+ If rather than copying a single group or dataset you would like to copy all
810
+ groups and datasets, use :func: `zarr.convenience.copy_all `, e.g.::
811
+
812
+ >>> source = h5py.File('data/example.h5', mode='r')
813
+ >>> dest = zarr.open_group('data/example2.zarr', mode='w')
814
+ >>> zarr.copy_all(source, dest, log=stdout)
815
+ copy /foo
816
+ copy /foo/bar
817
+ copy /foo/bar/baz (100,) int64
818
+ copy /spam (100,) int64
819
+ all done: 4 copied, 0 skipped, 1,600 bytes copied
820
+ (4, 0, 1600)
821
+ >>> dest.tree()
822
+ /
823
+ ├── foo
824
+ │ └── bar
825
+ │ └── baz (100,) int64
826
+ └── spam (100,) int64
827
+
828
+ If you need to copy data between two Zarr groups, the
829
+ func:`zarr.convenience.copy ` and :func: `zarr.convenience.copy_all ` functions can
830
+ be used and provide the most flexibility. However, if you want to copy data
831
+ in the most efficient way possible, without changing any configuration options,
832
+ the :func: `zarr.convenience.copy_store ` function can be used. This function
833
+ copies data directly between the underlying stores, without any decompression or
834
+ re-compression, and so should be faster. E.g.::
835
+
836
+ >>> import zarr
837
+ >>> import numpy as np
838
+ >>> store1 = zarr.DirectoryStore('data/example.zarr')
839
+ >>> root = zarr.group(store1, overwrite=True)
840
+ >>> baz = root.create_dataset('foo/bar/baz', data=np.arange(100), chunks=(50,))
841
+ >>> spam = root.create_dataset('spam', data=np.arange(100, 200), chunks=(30,))
842
+ >>> root.tree()
843
+ /
844
+ ├── foo
845
+ │ └── bar
846
+ │ └── baz (100,) int64
847
+ └── spam (100,) int64
848
+ >>> from sys import stdout
849
+ >>> store2 = zarr.ZipStore('data/example.zip', mode='w')
850
+ >>> zarr.copy_store(store1, store2, log=stdout)
851
+ copy .zgroup
852
+ copy foo/.zgroup
853
+ copy foo/bar/.zgroup
854
+ copy foo/bar/baz/.zarray
855
+ copy foo/bar/baz/0
856
+ copy foo/bar/baz/1
857
+ copy spam/.zarray
858
+ copy spam/0
859
+ copy spam/1
860
+ copy spam/2
861
+ copy spam/3
862
+ all done: 11 copied, 0 skipped, 1,138 bytes copied
863
+ (11, 0, 1138)
864
+ >>> new_root = zarr.group(store2)
865
+ >>> new_root.tree()
866
+ /
867
+ ├── foo
868
+ │ └── bar
869
+ │ └── baz (100,) int64
870
+ └── spam (100,) int64
871
+ >>> new_root['foo/bar/baz'][:]
872
+ array([ 0, 1, 2, ..., 97, 98, 99])
873
+ >>> store2.close() # zip stores need to be closed
874
+
771
875
.. _tutorial_strings :
772
876
773
877
String arrays
0 commit comments