15
15
from zarr .hierarchy import open_group , group as _create_group , Group
16
16
from zarr .storage import contains_array , contains_group
17
17
from zarr .errors import err_path_not_found
18
- from zarr .util import normalize_storage_path , TreeViewer
18
+ from zarr .util import normalize_storage_path , TreeViewer , buffer_size
19
19
20
20
21
21
# noinspection PyShadowingBuiltins
@@ -396,13 +396,14 @@ def __call__(self, *args, **kwargs):
396
396
397
397
398
398
def copy_store (source , dest , source_path = '' , dest_path = '' , excludes = None ,
399
- includes = None , flags = 0 , log = None ):
399
+ includes = None , flags = 0 , if_exists = 'raise' , dry_run = False ,
400
+ log = None ):
400
401
"""Copy data directly from the `source` store to the `dest` store. Use this
401
402
function when you want to copy a group or array in the most efficient way,
402
403
preserving all configuration and attributes. This function is more efficient
403
404
than the copy() or copy_all() functions because it avoids de-compressing and
404
- re-compressing data, rather the compressed chunk data for each array are copied
405
- directly between stores.
405
+ re-compressing data, rather the compressed chunk data for each array are
406
+ copied directly between stores.
406
407
407
408
Parameters
408
409
----------
@@ -415,18 +416,27 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None,
415
416
dest_path : str, optional
416
417
Copy data into this path in the destination store.
417
418
excludes : sequence of str, optional
418
- One or more regular expressions which will be matched against keys in the
419
- source store. Any matching key will not be copied.
419
+ One or more regular expressions which will be matched against keys in
420
+ the source store. Any matching key will not be copied.
420
421
includes : sequence of str, optional
421
- One or more regular expressions which will be matched against keys in the
422
- source store and will override any excludes also matching.
422
+ One or more regular expressions which will be matched against keys in
423
+ the source store and will override any excludes also matching.
423
424
flags : int, optional
424
425
Regular expression flags used for matching excludes and includes.
426
+ if_exists : {'raise', 'replace', 'skip'}, optional
427
+ How to handle keys that already exist in the destination store. If
428
+ 'raise' then a ValueError is raised on the first key already present
429
+ in the destination store. If 'replace' then any data will be replaced in
430
+ the destination. If 'skip' then any existing keys will not be copied.
431
+ dry_run : bool, optional
432
+ If True, don't actually copy anything, just log what would have
433
+ happened.
425
434
log : callable, file path or file-like object, optional
426
435
If provided, will be used to log progress information.
427
436
428
437
Examples
429
438
--------
439
+
430
440
>>> import zarr
431
441
>>> store1 = zarr.DirectoryStore('data/example.zarr')
432
442
>>> root = zarr.group(store1, overwrite=True)
@@ -441,14 +451,15 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None,
441
451
└── bar
442
452
└── baz (100,) int64
443
453
>>> import sys
444
- >>> store2 = zarr.ZipStore('data/example.zip', mode='w') # or any type of store
454
+ >>> store2 = zarr.ZipStore('data/example.zip', mode='w')
445
455
>>> zarr.copy_store(store1, store2, log=sys.stdout)
446
- .zgroup -> .zgroup
447
- foo/.zgroup -> foo/.zgroup
448
- foo/bar/.zgroup -> foo/bar/.zgroup
449
- foo/bar/baz/.zarray -> foo/bar/baz/.zarray
450
- foo/bar/baz/0 -> foo/bar/baz/0
451
- foo/bar/baz/1 -> foo/bar/baz/1
456
+ copy .zgroup
457
+ copy foo/.zgroup
458
+ copy foo/bar/.zgroup
459
+ copy foo/bar/baz/.zarray
460
+ copy foo/bar/baz/0
461
+ copy foo/bar/baz/1
462
+ all done: 6 copy, 0 skip; 566 bytes copied
452
463
>>> new_root = zarr.group(store2)
453
464
>>> new_root.tree()
454
465
/
@@ -481,6 +492,17 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None,
481
492
excludes = [re .compile (e , flags ) for e in excludes ]
482
493
includes = [re .compile (i , flags ) for i in includes ]
483
494
495
+ # check if_exists parameter
496
+ valid_if_exists = ['raise' , 'replace' , 'skip' ]
497
+ if if_exists not in valid_if_exists :
498
+ raise ValueError ('if_exists must be one of {!r}; found {!r}'
499
+ .format (valid_if_exists , if_exists ))
500
+
501
+ # setup counting variables
502
+ n_copy = 0
503
+ n_skip = 0
504
+ n_bytes_copied = 0
505
+
484
506
# setup logging
485
507
with _LogWriter (log ) as log :
486
508
@@ -508,9 +530,42 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None,
508
530
key_suffix = source_key [len (source_path ):]
509
531
dest_key = dest_path + key_suffix
510
532
511
- # retrieve and copy data
512
- log ('{} -> {}' .format (source_key , dest_key ))
513
- dest [dest_key ] = source [source_key ]
533
+ # create a descriptive label for this operation
534
+ descr = source_key
535
+ if dest_key != source_key :
536
+ descr = descr + ' -> ' + dest_key
537
+
538
+ # decide what to do
539
+ do_copy = True
540
+ if if_exists != 'replace' :
541
+ if dest_key in dest :
542
+ if if_exists == 'raise' :
543
+ raise ValueError ('key {!r} exists in destination'
544
+ .format (dest_key ))
545
+ elif if_exists == 'skip' :
546
+ do_copy = False
547
+
548
+ # take action
549
+ if do_copy :
550
+ n_copy += 1
551
+ log ('copy {}' .format (descr ))
552
+ if not dry_run :
553
+ data = source [source_key ]
554
+ n_bytes_copied += buffer_size (data )
555
+ dest [dest_key ] = data
556
+ else :
557
+ n_skip += 1
558
+ log ('skip {}' .format (descr ))
559
+
560
+ # log a final message with a summary of what happened
561
+ if dry_run :
562
+ final_message = 'dry run: '
563
+ else :
564
+ final_message = 'all done: '
565
+ final_message += '{} copy, {} skip' .format (n_copy , n_skip )
566
+ if not dry_run :
567
+ final_message += '; {:,} bytes copied' .format (n_bytes_copied )
568
+ log (final_message )
514
569
515
570
516
571
def copy (source , dest , name = None , shallow = False , without_attrs = False , log = None ,
0 commit comments