|
18 | 18 | from mdio.converters.exceptions import GridTraceSparsityError |
19 | 19 | from mdio.converters.type_converter import to_structured_type |
20 | 20 | from mdio.core.grid import Grid |
| 21 | +from mdio.core.utils_write import MAX_COORDINATES_BYTES |
| 22 | +from mdio.core.utils_write import MAX_SIZE_LIVE_MASK |
| 23 | +from mdio.core.utils_write import get_constrained_chunksize |
| 24 | +from mdio.schemas.chunk_grid import RegularChunkGrid |
| 25 | +from mdio.schemas.chunk_grid import RegularChunkShape |
| 26 | +from mdio.schemas.metadata import ChunkGridMetadata |
| 27 | + |
| 28 | +# from mdio.schemas.v1.dataset_builder import ChunkGridMetadata |
21 | 29 | from mdio.schemas.v1.dataset_serializer import to_xarray_dataset |
22 | 30 | from mdio.schemas.v1.units import AllUnits |
23 | 31 | from mdio.schemas.v1.units import LengthUnitEnum |
24 | 32 | from mdio.schemas.v1.units import LengthUnitModel |
| 33 | +from mdio.schemas.v1.variable import VariableMetadata |
25 | 34 | from mdio.segy import blocked_io |
26 | 35 | from mdio.segy.utilities import get_grid_plan |
27 | 36 |
|
@@ -313,30 +322,27 @@ def _add_text_binary_headers(dataset: Dataset, segy_file: SegyFile) -> None: |
313 | 322 |
|
314 | 323 |
|
315 | 324 | def _chunk_variable(ds: Dataset, variable_name: str) -> None: |
316 | | - from mdio.core.utils_write import MAX_SIZE_LIVE_MASK |
317 | | - from mdio.core.utils_write import get_constrained_chunksize |
318 | | - from mdio.schemas.chunk_grid import RegularChunkGrid |
319 | | - from mdio.schemas.chunk_grid import RegularChunkShape |
320 | | - from mdio.schemas.metadata import ChunkGridMetadata |
321 | | - from mdio.schemas.v1.dataset_builder import ChunkGridMetadata |
322 | | - from mdio.schemas.v1.variable import VariableMetadata |
323 | | - |
| 325 | + """Determins the chunking for a Variable in the dataset.""" |
324 | 326 | # Find the variable by name |
325 | 327 | idx = -1 |
326 | 328 | for i in range(len(ds.variables)): |
327 | 329 | if ds.variables[i].name == variable_name: |
328 | 330 | idx = i |
329 | 331 | break |
330 | 332 | if idx == -1: |
331 | | - raise ValueError(f"Variable '{variable_name}' not found in dataset.") |
| 333 | + # raise ValueError(f"Variable '{variable_name}' not found in dataset.") |
| 334 | + err = f"Variable '{variable_name}' not found in dataset." |
| 335 | + raise ValueError(err) |
| 336 | + |
| 337 | + def determine_target_size(t: str) -> int: |
| 338 | + if t == "bool": |
| 339 | + return MAX_SIZE_LIVE_MASK |
| 340 | + return MAX_COORDINATES_BYTES |
332 | 341 |
|
333 | 342 | # Create the chunk grid metadata |
334 | 343 | t = ds.variables[idx].data_type |
335 | 344 | full_shape = tuple(dim.size for dim in ds.variables[idx].dimensions) |
336 | | - if t == "bool": |
337 | | - target_size = MAX_SIZE_LIVE_MASK |
338 | | - else: |
339 | | - target_size = 128 * 1024**2 |
| 345 | + target_size = determine_target_size(t) |
340 | 346 |
|
341 | 347 | chunks = ChunkGridMetadata( |
342 | 348 | chunk_grid=RegularChunkGrid( |
|
0 commit comments