|
3 | 3 | from __future__ import annotations |
4 | 4 |
|
5 | 5 | import inspect |
| 6 | +import logging |
6 | 7 | from dataclasses import dataclass |
7 | 8 |
|
8 | 9 | import numpy as np |
9 | 10 | import zarr |
10 | 11 |
|
| 12 | +from mdio.constants import UINT32_MAX |
11 | 13 | from mdio.constants import UINT64_MAX |
12 | 14 | from mdio.core import Dimension |
13 | 15 | from mdio.core.serialization import Serializer |
@@ -88,8 +90,25 @@ def build_map(self, index_headers): |
88 | 90 | dim_hdr = index_headers[dim.name] |
89 | 91 | live_dim_indices += (np.searchsorted(dim, dim_hdr),) |
90 | 92 |
|
91 | | - # We set dead traces to uint64 max. Should be far away from actual trace counts. |
92 | | - self.map = zarr.full(self.shape[:-1], dtype="uint64", fill_value=UINT64_MAX) |
| 93 | + # There were cases where ingestion would overflow a signed int32. |
| 94 | + # It's unlikely that we overflow the uint32_max, but this helps |
| 95 | + # prevent any issues while keeping the memory footprint as low as possible. |
| 96 | + grid_size = np.prod(self.shape[:-1]) |
| 97 | + if grid_size > UINT32_MAX-1: |
| 98 | + # We use UINT32_MAX-1 to ensure that the assumption below is not violated. |
| 99 | + # "far away" is relative. |
| 100 | + logging.warning( |
| 101 | + f"Grid size {grid_size} exceeds UINT32_MAX ({UINT32_MAX-1}). " |
| 102 | + "Using uint64 for trace map which will use more memory." |
| 103 | + ) |
| 104 | + dtype = "uint64" |
| 105 | + fill_value = UINT64_MAX |
| 106 | + else: |
| 107 | + dtype = "uint32" |
| 108 | + fill_value = UINT32_MAX |
| 109 | + |
| 110 | + # We set dead traces to max uint32/uint64 value. Should be far away from actual trace counts. |
| 111 | + self.map = zarr.full(self.shape[:-1], dtype=dtype, fill_value=fill_value) |
93 | 112 | self.map.vindex[live_dim_indices] = range(len(live_dim_indices[0])) |
94 | 113 |
|
95 | 114 | self.live_mask = zarr.zeros(self.shape[:-1], dtype="bool") |
|
0 commit comments