Skip to content

Commit aeef0cc

Browse files
ArneDefauwmelonora
andauthored
spatial query rechunk (#989)
* spatial query rechunk * check regular chunks before rechunking --------- Co-authored-by: Wouter-Michiel Vierdag <[email protected]>
1 parent b5239b4 commit aeef0cc

File tree

1 file changed

+15
-23
lines changed

1 file changed

+15
-23
lines changed

src/spatialdata/_core/query/_utils.py

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -134,45 +134,37 @@ def _process_data_tree_query_result(query_result: DataTree) -> DataTree | None:
134134
d = {k: Dataset({"image": d[k]}) for k in scales_to_keep}
135135
result = DataTree.from_dict(d)
136136

137-
# rechunk the data to avoid irregular chunks
138-
coords = list(result["scale0"].coords.keys())
139-
result = result.chunk(dict.fromkeys(coords, "auto"))
140-
141137
from dask.array.core import _check_regular_chunks
142138

143-
# check that the rechunking into regular chunks worked
144-
chunks_still_irregular = False
139+
# rechunk to avoid irregular chunks
145140
for scale in result:
146141
data = result[scale]["image"].data
147-
chunks_still_irregular = chunks_still_irregular or not _check_regular_chunks(data.chunks)
148-
149-
if chunks_still_irregular:
150-
# reported here: https://github.com/scverse/spatialdata/issues/821#issuecomment-2632201695
151-
# seemingly due to this bug: https://github.com/dask/dask/issues/11713
152-
CHUNK_SIZE = 1024
153-
rechunk_strategy = dict.fromkeys(coords, CHUNK_SIZE)
154-
if "c" in coords:
155-
rechunk_strategy["c"] = result["scale0"]["image"].chunks[0][0]
156-
result = result.chunk(rechunk_strategy)
142+
chunks = data.chunks
143+
if not _check_regular_chunks(chunks):
144+
data = data.rechunk(data.chunksize)
145+
if not _check_regular_chunks(data.chunks):
146+
raise ValueError(
147+
f"Chunks are not regular for {scale} of the queried data: {chunks} "
148+
"and could also not be rechunked regularly. Please report this bug."
149+
)
150+
result[scale]["image"].data = data
157151

158-
for scale in result:
159-
data = result[scale]["image"].data
160-
assert _check_regular_chunks(data.chunks), (
161-
f"Chunks are not regular for the {scale} of the queried data: {data.chunks}. Please report this bug."
162-
)
163152
return result
164153

165154

166155
def _process_query_result(
167156
result: DataArray | DataTree, translation_vector: ArrayLike, axes: tuple[str, ...]
168157
) -> DataArray | DataTree | None:
158+
from dask.array.core import _check_regular_chunks
159+
169160
from spatialdata.transformations import get_transformation, set_transformation
170161

171162
if isinstance(result, DataArray):
172163
if 0 in result.shape:
173164
return None
174-
# rechunk the data to avoid irregular chunks
175-
result = result.chunk("auto")
165+
# rechunk to avoid irregular chunks
166+
if not _check_regular_chunks(result.data.chunks):
167+
result.data = result.data.rechunk(result.data.chunksize)
176168
elif isinstance(result, DataTree):
177169
result = _process_data_tree_query_result(result)
178170
if result is None:

0 commit comments

Comments
 (0)