@@ -37,57 +37,3 @@ def create_zarr_hierarchy(store: FSStore, overwrite: bool) -> Group:
3737 raise MDIOAlreadyExistsError (msg ) from e
3838
3939 return root_group
40-
41-
42- # TODO: This is not used right now, but it is a template for what we can do for
43- # automatic chunk size determination based on shape of the arrays etc.
44- def infer_header_chunksize (orig_chunks , orig_shape , target_size = 2 ** 26 , length = 240 ):
45- """Infer larger chunks based on target chunk filesize.
46-
47- This tool takes an original chunking scheme, the full shape of the
48- original array, a target size (in bytes) and length of the `array` or
49- `struct` to calculate a multidimensional scalar for smaller arrays.
50-
51- Use case is: Seismic data has 1 extra time/depth dimension, which doesn't
52- exist in headers or spatial live mask. So we can make chunk size bigger
53- for these flatter arrays.
54-
55- This module infers a scalar based on the parameters and returns a new
56- chunking scheme.
57-
58- Args:
59- orig_chunks: Original array chunks.
60- orig_shape: Original array shape.
61- target_size: Uncompressed, expected size of each chunk. This is much
62- larger than the ideal 1MB because on metadata, after compression,
63- the size goes down by 10x. Default: 32 MB.
64- length: Length of the multidimensional array's dtype.
65- Default is 240-bytes.
66-
67- Returns:
68- Tuple of adjusted chunk sizes.
69- """
70- orig_bytes = prod (orig_chunks ) * length
71-
72- # Size scalar in bytes
73- scalar = target_size / orig_bytes
74-
75- # Divide than into chunks (root of the scalar based on length of dims)
76- # Then round it to the nearest integer.
77- scalar = round (scalar ** (1 / len (orig_chunks )))
78-
79- # Scale chunks by inferred isotropic scalar.
80- new_chunks = [dim_chunk * scalar for dim_chunk in orig_chunks ]
81-
82- # Set it to max if after scaling, it is larger than the max values.
83- new_chunks = [
84- min (dim_new , dim_orig )
85- for dim_new , dim_orig in zip (new_chunks , orig_shape ) # noqa: B905
86- ]
87-
88- # Special case if the new_chunks are larger than 80% the original shape.
89- # In this case we want one chunk.
90- if prod (new_chunks ) > 0.8 * prod (orig_shape ):
91- new_chunks = orig_shape
92-
93- return new_chunks
0 commit comments