Skip to content

Commit 887f1ce

Browse files
Allow to write a subset of a compressed wkw dataset (#241)
* Allow to write a subset of a compressed wkw dataset * add parameter for compressed write * make chunks in 'for_each_chunk' bounded to their bounding box * reformat code * Update wkcuber/api/View.py Co-authored-by: Jonathan Striebel <[email protected]> * reformat code * change expected Exception Co-authored-by: Jonathan Striebel <[email protected]>
1 parent e794219 commit 887f1ce

File tree

3 files changed

+217
-11
lines changed

3 files changed

+217
-11
lines changed

tests/test_dataset.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,16 @@
77
import numpy as np
88
from shutil import rmtree, copytree
99

10+
from wkw.wkw import WKWException
11+
1012
from wkcuber.api.Dataset import WKDataset, TiffDataset, TiledTiffDataset
1113
from os import path, makedirs
1214

1315
from wkcuber.api.Layer import Layer
1416
from wkcuber.api.Properties.DatasetProperties import TiffProperties, WKProperties
1517
from wkcuber.api.TiffData.TiffMag import TiffReader
1618
from wkcuber.api.bounding_box import BoundingBox
19+
from wkcuber.compress import compress_mag_inplace
1720
from wkcuber.mag import Mag
1821
from wkcuber.utils import get_executor_for_args
1922

@@ -1279,6 +1282,158 @@ def test_view_offsets():
12791282
pass
12801283

12811284

1285+
def test_writing_subset_of_compressed_data_multi_channel():
1286+
delete_dir("./testoutput/compressed_data/")
1287+
1288+
# create uncompressed dataset
1289+
write_data1 = (np.random.rand(3, 20, 40, 60) * 255).astype(np.uint8)
1290+
WKDataset.create(
1291+
os.path.abspath("./testoutput/compressed_data"), scale=(1, 1, 1)
1292+
).add_layer("color", Layer.COLOR_TYPE, num_channels=3).add_mag(
1293+
"1", block_len=8, file_len=8
1294+
).write(
1295+
write_data1
1296+
)
1297+
1298+
# compress data
1299+
compress_mag_inplace(
1300+
os.path.abspath("./testoutput/compressed_data/"),
1301+
layer_name="color",
1302+
mag=Mag("1"),
1303+
)
1304+
1305+
# open compressed dataset
1306+
compressed_mag = (
1307+
WKDataset("./testoutput/compressed_data").get_layer("color").get_mag("1")
1308+
)
1309+
1310+
write_data2 = (np.random.rand(3, 10, 10, 10) * 255).astype(np.uint8)
1311+
compressed_mag.write(
1312+
offset=(10, 20, 30), data=write_data2, allow_compressed_write=True
1313+
)
1314+
1315+
np.array_equal(
1316+
write_data2, compressed_mag.read(offset=(10, 20, 30), size=(10, 10, 10))
1317+
) # the new data was written
1318+
np.array_equal(
1319+
write_data1[:, :10, :20, :30],
1320+
compressed_mag.read(offset=(0, 0, 0), size=(10, 20, 30)),
1321+
) # the old data is still there
1322+
1323+
1324+
def test_writing_subset_of_compressed_data_single_channel():
1325+
delete_dir("./testoutput/compressed_data/")
1326+
1327+
# create uncompressed dataset
1328+
write_data1 = (np.random.rand(20, 40, 60) * 255).astype(np.uint8)
1329+
WKDataset.create(
1330+
os.path.abspath("./testoutput/compressed_data"), scale=(1, 1, 1)
1331+
).add_layer("color", Layer.COLOR_TYPE).add_mag("1", block_len=8, file_len=8).write(
1332+
write_data1
1333+
)
1334+
1335+
# compress data
1336+
compress_mag_inplace(
1337+
os.path.abspath("./testoutput/compressed_data/"),
1338+
layer_name="color",
1339+
mag=Mag("1"),
1340+
)
1341+
1342+
# open compressed dataset
1343+
compressed_mag = (
1344+
WKDataset("./testoutput/compressed_data").get_layer("color").get_mag("1")
1345+
)
1346+
1347+
write_data2 = (np.random.rand(10, 10, 10) * 255).astype(np.uint8)
1348+
compressed_mag.write(
1349+
offset=(10, 20, 30), data=write_data2, allow_compressed_write=True
1350+
)
1351+
1352+
np.array_equal(
1353+
write_data2, compressed_mag.read(offset=(10, 20, 30), size=(10, 10, 10))
1354+
) # the new data was written
1355+
np.array_equal(
1356+
write_data1[:10, :20, :30],
1357+
compressed_mag.read(offset=(0, 0, 0), size=(10, 20, 30)),
1358+
) # the old data is still there
1359+
1360+
1361+
def test_writing_subset_of_compressed_data():
1362+
delete_dir("./testoutput/compressed_data/")
1363+
1364+
# create uncompressed dataset
1365+
WKDataset.create(
1366+
os.path.abspath("./testoutput/compressed_data"), scale=(1, 1, 1)
1367+
).add_layer("color", Layer.COLOR_TYPE).add_mag("1", block_len=8, file_len=8).write(
1368+
(np.random.rand(20, 40, 60) * 255).astype(np.uint8)
1369+
)
1370+
1371+
# compress data
1372+
compress_mag_inplace(
1373+
os.path.abspath("./testoutput/compressed_data/"),
1374+
layer_name="color",
1375+
mag=Mag("1"),
1376+
)
1377+
1378+
# open compressed dataset
1379+
compressed_mag = (
1380+
WKDataset("./testoutput/compressed_data").get_layer("color").get_mag("1")
1381+
)
1382+
1383+
with pytest.raises(WKWException):
1384+
# calling 'write' with unaligned data on compressed data without setting 'allow_compressed_write=True'
1385+
compressed_mag.write(
1386+
offset=(10, 20, 30),
1387+
data=(np.random.rand(10, 10, 10) * 255).astype(np.uint8),
1388+
)
1389+
1390+
1391+
def test_writing_subset_of_chunked_compressed_data():
1392+
delete_dir("./testoutput/compressed_data/")
1393+
1394+
# create uncompressed dataset
1395+
write_data1 = (np.random.rand(100, 200, 300) * 255).astype(np.uint8)
1396+
WKDataset.create(
1397+
os.path.abspath("./testoutput/compressed_data"), scale=(1, 1, 1)
1398+
).add_layer("color", Layer.COLOR_TYPE).add_mag("1", block_len=8, file_len=8).write(
1399+
write_data1
1400+
)
1401+
1402+
# compress data
1403+
compress_mag_inplace(
1404+
os.path.abspath("./testoutput/compressed_data/"),
1405+
layer_name="color",
1406+
mag=Mag("1"),
1407+
)
1408+
1409+
# open compressed dataset
1410+
compressed_view = WKDataset("./testoutput/compressed_data").get_view(
1411+
"color", "1", size=(100, 200, 300), is_bounded=True
1412+
)
1413+
1414+
with pytest.raises(AssertionError):
1415+
# the aligned data (offset=(0,0,0), size=(128, 128, 128)) is NOT fully within the bounding box of the view
1416+
compressed_view.write(
1417+
relative_offset=(10, 20, 30),
1418+
data=(np.random.rand(90, 80, 70) * 255).astype(np.uint8),
1419+
allow_compressed_write=True,
1420+
)
1421+
1422+
# the aligned data (offset=(0,0,0), size=(64, 64, 64)) IS fully within the bounding box of the view
1423+
write_data2 = (np.random.rand(50, 40, 30) * 255).astype(np.uint8)
1424+
compressed_view.write(
1425+
relative_offset=(10, 20, 30), data=write_data2, allow_compressed_write=True
1426+
)
1427+
1428+
np.array_equal(
1429+
write_data2, compressed_view.read(offset=(10, 20, 30), size=(50, 40, 30))
1430+
) # the new data was written
1431+
np.array_equal(
1432+
write_data1[:10, :20, :30],
1433+
compressed_view.read(offset=(0, 0, 0), size=(10, 20, 30)),
1434+
) # the old data is still there
1435+
1436+
12821437
def test_add_symlink_layer():
12831438
delete_dir("./testoutput/wk_dataset_with_symlink")
12841439
delete_dir("./testoutput/simple_wk_dataset_copy")

wkcuber/api/MagDataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ def close(self):
2626
def read(self, size, offset=(0, 0, 0)) -> np.array:
2727
return self.view.read(size, offset)
2828

29-
def write(self, data, offset=(0, 0, 0)):
29+
def write(self, data, offset=(0, 0, 0), allow_compressed_write=False):
3030
self._assert_valid_num_channels(data.shape)
31-
self.view.write(data, offset)
31+
self.view.write(data, offset, allow_compressed_write)
3232
layer_properties = self.layer.dataset.properties.data_layers[self.layer.name]
3333
current_offset_in_mag1 = layer_properties.get_bounding_box_offset()
3434
current_size_in_mag1 = layer_properties.get_bounding_box_size()

wkcuber/api/View.py

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import math
22

33
import numpy as np
4-
from wkw import Dataset
4+
from wkw import Dataset, wkw
55

66
from wkcuber.api.TiffData.TiffMag import TiffMag
77
from wkcuber.api.bounding_box import BoundingBox
@@ -36,7 +36,7 @@ def close(self):
3636
self.dataset = None
3737
self._is_opened = False
3838

39-
def write(self, data, relative_offset=(0, 0, 0)):
39+
def write(self, data, relative_offset=(0, 0, 0), allow_compressed_write=False):
4040
was_opened = self._is_opened
4141
# assert the size of the parameter data is not in conflict with the attribute self.size
4242
assert_non_negative_offset(relative_offset)
@@ -47,6 +47,9 @@ def write(self, data, relative_offset=(0, 0, 0)):
4747
sum(x) for x in zip(self.global_offset, relative_offset)
4848
)
4949

50+
if self._is_compressed() and allow_compressed_write:
51+
absolute_offset, data = self._handle_compressed_write(absolute_offset, data)
52+
5053
if not was_opened:
5154
self.open()
5255

@@ -95,7 +98,7 @@ def check_bounds(self, offset, size) -> bool:
9598
def assert_bounds(self, offset, size):
9699
if not self.check_bounds(offset, size):
97100
raise AssertionError(
98-
f"Writing out of bounds: The passed parameter 'size' {size} exceeds the size of the current view ({self.size})"
101+
f"Accessing data out of bounds: The passed parameter 'size' {size} exceeds the size of the current view ({self.size})"
99102
)
100103

101104
def for_each_chunk(self, work_on_chunk, job_args_per_chunk, chunk_size, executor):
@@ -107,19 +110,22 @@ def for_each_chunk(self, work_on_chunk, job_args_per_chunk, chunk_size, executor
107110
chunk_size, chunk_size
108111
):
109112
relative_offset = np.array(chunk.topleft) - np.array(self.global_offset)
110-
job_args.append(
111-
(
112-
self.get_view(size=chunk.size, relative_offset=relative_offset),
113-
job_args_per_chunk,
114-
)
115-
)
113+
view = self.get_view(size=chunk.size, relative_offset=relative_offset)
114+
view.is_bounded = True
115+
job_args.append((view, job_args_per_chunk))
116116

117117
# execute the work for each chunk
118118
wait_and_ensure_success(executor.map_to_futures(work_on_chunk, job_args))
119119

120120
def _check_chunk_size(self, chunk_size):
121121
raise NotImplementedError
122122

123+
def _is_compressed(self):
124+
return False
125+
126+
def _handle_compressed_write(self, absolute_offset, data):
127+
return absolute_offset, data
128+
123129
def __enter__(self):
124130
return self
125131

@@ -156,6 +162,51 @@ def _check_chunk_size(self, chunk_size):
156162
f"The passed parameter 'chunk_size' {chunk_size} must be a multiple of (32, 32, 32)."
157163
)
158164

165+
def _is_compressed(self):
166+
return (
167+
self.header.block_type == wkw.Header.BLOCK_TYPE_LZ4
168+
or self.header.block_type == wkw.Header.BLOCK_TYPE_LZ4HC
169+
)
170+
171+
def _handle_compressed_write(self, absolute_offset, data):
172+
# calculate aligned bounding box
173+
file_bb = np.full(3, self.header.file_len * self.header.block_len)
174+
absolute_offset_np = np.array(absolute_offset)
175+
margin_to_top_left = absolute_offset_np % file_bb
176+
aligned_offset = absolute_offset_np - margin_to_top_left
177+
bottom_right = absolute_offset_np + np.array(data.shape[-3:])
178+
margin_to_bottom_right = file_bb - (bottom_right % file_bb)
179+
aligned_bottom_right = bottom_right + margin_to_bottom_right
180+
aligned_shape = aligned_bottom_right - aligned_offset
181+
182+
if (
183+
tuple(aligned_offset) != absolute_offset
184+
or tuple(aligned_shape) != data.shape[-3:]
185+
):
186+
# the data is not aligned
187+
# read the aligned bounding box
188+
try:
189+
aligned_data = self.read(offset=aligned_offset, size=aligned_shape)
190+
except AssertionError as e:
191+
raise AssertionError(
192+
f"Writing compressed data failed. The compressed file is not fully inside the bounding box of the view (offset={self.global_offset}, size={self.size}). "
193+
+ str(e)
194+
)
195+
index_slice = (
196+
slice(None, None),
197+
*(
198+
slice(start, end)
199+
for start, end in zip(
200+
margin_to_top_left, bottom_right - aligned_offset
201+
)
202+
),
203+
)
204+
# overwrite the specified data
205+
aligned_data[tuple(index_slice)] = data
206+
return tuple(aligned_offset), aligned_data
207+
else:
208+
return absolute_offset, data
209+
159210

160211
class TiffView(View):
161212
def open(self):

0 commit comments

Comments
 (0)