Skip to content

Commit cd69fb7

Browse files
High level dataset api chunking (#184)
* only log downsampling for each MB ob precessed data * log every gigabyte instead of every megabyte * implement PR feedback * WIP: implement high level dataset api * WIP: implement TiffMag * implement test for dataset api * refactor high level dataset api * add test data for WKDataset * improve high level dataset api and add more tests * use seed in tests to achieve deterministic behaviour * implement a test for writing out of bounds with a wk_slice * add possibility to define pattern for TiffDatasets * implement tiled tiff * improve quality of dataset tests * make naming schema of test files consistent * implement PR feedback * rename tiffs in testdata * Improve error message and reformat code * add type annotation * rename Slice to View * remove comments and reformat code * add scikit-image (which is needed for skimage) as dependency * update version of scikit-image * fix relative paths in tests * add possibility to get data of specific tile * reformat code * add test for png images * reformat code * make TiledTiffDataset a seperate class * add support for opening datasets which do not have num_channels in properties.json * add test for advanced pattern * use more efficient method to detect tile_ranges * rename file path of test case * support largestSegmentationId in properties * restructure properties * add dataType attribute to properties * WIP: implement chunking * implement PR feedback * remove comment * fix relative paths * fix writing data to an unopened dataset and support different orders of the dimensions with patterns * Adjust docstring of method Co-Authored-By: Philipp Otto <[email protected]> * use type to call subclass constructor * reformat code * Support tiled tiffs in high level dataset API (#178) * add possibility to define pattern for TiffDatasets * implement tiled tiff * add possibility to get data of specific tile * add test for png images * reformat code * make TiledTiffDataset a seperate class * add support for opening datasets which do not have num_channels in properties.json * add test for advanced pattern * use more efficient method to detect tile_ranges * rename file path of test case * support largestSegmentationId in properties * restructure properties * add dataType attribute to properties * implement PR feedback * fix writing data to an unopened dataset and support different orders of the dimensions with patterns * Adjust docstring of method Co-Authored-By: Philipp Otto <[email protected]> Co-authored-by: Philipp Otto <[email protected]> * implement get_view for a View; fix check_chunk_size for WKView * reformat code Co-authored-by: Philipp Otto <[email protected]>
1 parent 42f8227 commit cd69fb7

File tree

5 files changed

+579
-25
lines changed

5 files changed

+579
-25
lines changed

tests/test_dataset.py

Lines changed: 243 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,109 @@
1111
from wkcuber.api.Layer import Layer
1212
from wkcuber.api.Properties.DatasetProperties import TiffProperties, WKProperties
1313
from wkcuber.api.TiffData.TiffMag import TiffReader
14+
from wkcuber.api.bounding_box import BoundingBox
1415
from wkcuber.mag import Mag
16+
from wkcuber.utils import get_executor_for_args
1517

1618

1719
def delete_dir(relative_path):
1820
if path.exists(relative_path) and path.isdir(relative_path):
1921
rmtree(relative_path)
2022

2123

24+
def chunk_job(args):
25+
view, additional_args = args
26+
27+
# increment the color value of each voxel
28+
data = view.read(view.size)
29+
if data.shape[0] == 1:
30+
data = data[0, :, :, :]
31+
data += 50
32+
view.write(data)
33+
34+
35+
def advanced_chunk_job(args):
36+
view, additional_args = args
37+
38+
# write different data for each chunk (depending on the global_offset of the chunk)
39+
data = view.read(view.size)
40+
data = np.ones(data.shape, dtype=np.uint8) * np.uint8(sum(view.global_offset))
41+
view.write(data)
42+
43+
44+
def for_each_chunking_with_wrong_chunk_size(view):
45+
with get_executor_for_args(None) as executor:
46+
try:
47+
view.for_each_chunk(
48+
chunk_job,
49+
job_args_per_chunk="test",
50+
chunk_size=(0, 64, 64),
51+
executor=executor,
52+
)
53+
raise Exception(
54+
"The test did not throw an exception even though it should. "
55+
"The chunk_size should not contain zeros"
56+
)
57+
except AssertionError:
58+
pass
59+
60+
try:
61+
view.for_each_chunk(
62+
chunk_job,
63+
job_args_per_chunk="test",
64+
chunk_size=(16, 64, 64),
65+
executor=executor,
66+
)
67+
raise Exception(
68+
"The test did not throw an exception even though it should. "
69+
)
70+
except AssertionError:
71+
pass
72+
73+
try:
74+
view.for_each_chunk(
75+
chunk_job,
76+
job_args_per_chunk="test",
77+
chunk_size=(100, 64, 64),
78+
executor=executor,
79+
)
80+
raise Exception(
81+
"The test did not throw an exception even though it should. "
82+
)
83+
except AssertionError:
84+
pass
85+
86+
87+
def for_each_chunking_advanced(ds, view):
88+
chunk_size = (64, 64, 64)
89+
with get_executor_for_args(None) as executor:
90+
view.for_each_chunk(
91+
advanced_chunk_job,
92+
job_args_per_chunk="test",
93+
chunk_size=chunk_size,
94+
executor=executor,
95+
)
96+
97+
for offset, size in [
98+
((10, 10, 10), (54, 54, 54)),
99+
((10, 64, 10), (54, 64, 54)),
100+
((10, 128, 10), (54, 32, 54)),
101+
((64, 10, 10), (64, 54, 54)),
102+
((64, 64, 10), (64, 64, 54)),
103+
((64, 128, 10), (64, 32, 54)),
104+
((128, 10, 10), (32, 54, 54)),
105+
((128, 64, 10), (32, 64, 54)),
106+
((128, 128, 10), (32, 32, 54)),
107+
]:
108+
chunk = ds.get_view("color", "1", size=size, offset=offset, is_bounded=False)
109+
chunk_data = chunk.read(chunk.size)
110+
assert np.array_equal(
111+
np.ones(chunk_data.shape, dtype=np.uint8)
112+
* np.uint8(sum(chunk.global_offset)),
113+
chunk_data,
114+
)
115+
116+
22117
def get_multichanneled_data(dtype):
23118
data = np.zeros((3, 250, 200, 10), dtype=dtype)
24119
for h in range(10):
@@ -108,7 +203,7 @@ def test_view_read_with_open():
108203
# This test would be the same for TiffDataset
109204

110205
wk_view = WKDataset("./testdata/simple_wk_dataset/").get_view(
111-
"color", "1", size=(32, 32, 32)
206+
"color", "1", size=(16, 16, 16)
112207
)
113208

114209
assert not wk_view._is_opened
@@ -126,7 +221,7 @@ def test_view_read_without_open():
126221
# This test would be the same for TiffDataset
127222

128223
wk_view = WKDataset("./testdata/simple_wk_dataset/").get_view(
129-
"color", "1", size=(32, 32, 32)
224+
"color", "1", size=(16, 16, 16)
130225
)
131226

132227
assert not wk_view._is_opened
@@ -143,7 +238,7 @@ def test_view_wk_write():
143238
copytree("./testdata/simple_wk_dataset/", "./testoutput/simple_wk_dataset/")
144239

145240
wk_view = WKDataset("./testoutput/simple_wk_dataset/").get_view(
146-
"color", "1", size=(100, 100, 100)
241+
"color", "1", size=(16, 16, 16)
147242
)
148243

149244
with wk_view.open():
@@ -161,7 +256,7 @@ def test_view_tiff_write():
161256
copytree("./testdata/simple_tiff_dataset/", "./testoutput/simple_tiff_dataset/")
162257

163258
tiff_view = TiffDataset("./testoutput/simple_tiff_dataset/").get_view(
164-
"color", "1", size=(100, 100, 100)
259+
"color", "1", size=(16, 16, 10)
165260
)
166261

167262
with tiff_view.open():
@@ -182,7 +277,7 @@ def test_view_tiff_write_out_of_bounds():
182277
copytree("./testdata/simple_tiff_dataset/", new_dataset_path)
183278

184279
tiff_view = TiffDataset(new_dataset_path).get_view(
185-
"color", "1", size=(100, 100, 100)
280+
"color", "1", size=(100, 100, 10)
186281
)
187282

188283
with tiff_view.open():
@@ -203,7 +298,7 @@ def test_view_wk_write_out_of_bounds():
203298
delete_dir(new_dataset_path)
204299
copytree("./testdata/simple_wk_dataset/", new_dataset_path)
205300

206-
tiff_view = WKDataset(new_dataset_path).get_view("color", "1", size=(100, 100, 100))
301+
tiff_view = WKDataset(new_dataset_path).get_view("color", "1", size=(16, 16, 16))
207302

208303
with tiff_view.open():
209304
try:
@@ -217,6 +312,32 @@ def test_view_wk_write_out_of_bounds():
217312
pass
218313

219314

315+
def test_wk_view_out_of_bounds():
316+
try:
317+
# The size of the mag is (24, 24, 24). Trying to get an bigger view should throw an error
318+
WKDataset("./testdata/simple_wk_dataset/").get_view(
319+
"color", "1", size=(100, 100, 100)
320+
)
321+
raise Exception(
322+
"The test 'test_view_wk_write_out_of_bounds' did not throw an exception even though it should"
323+
)
324+
except AssertionError:
325+
pass
326+
327+
328+
def test_tiff_view_out_of_bounds():
329+
try:
330+
# The size of the mag is (24, 24, 24). Trying to get an bigger view should throw an error
331+
TiffDataset("./testdata/simple_tiff_dataset/").get_view(
332+
"color", "1", size=(100, 100, 100)
333+
)
334+
raise Exception(
335+
"The test 'test_view_wk_write_out_of_bounds' did not throw an exception even though it should"
336+
)
337+
except AssertionError:
338+
pass
339+
340+
220341
def test_tiff_write_out_of_bounds():
221342
new_dataset_path = "./testoutput/simple_tiff_dataset_out_of_bounds/"
222343

@@ -690,6 +811,120 @@ def test_properties_with_segmentation():
690811
assert input_data == output_data
691812

692813

814+
def test_chunking_wk():
815+
delete_dir("./testoutput/chunking_dataset_wk/")
816+
copytree("./testdata/simple_wk_dataset/", "./testoutput/chunking_dataset_wk/")
817+
818+
view = WKDataset("./testoutput/chunking_dataset_wk/").get_view(
819+
"color", "1", size=(256, 256, 256), is_bounded=False
820+
)
821+
822+
original_data = view.read(view.size)
823+
824+
with get_executor_for_args(None) as executor:
825+
view.for_each_chunk(
826+
chunk_job,
827+
job_args_per_chunk="test",
828+
chunk_size=(64, 64, 64),
829+
executor=executor,
830+
)
831+
832+
assert np.array_equal(original_data + 50, view.read(view.size))
833+
834+
835+
def test_chunking_wk_advanced():
836+
delete_dir("./testoutput/chunking_dataset_wk_advanced/")
837+
copytree(
838+
"./testdata/simple_wk_dataset/", "./testoutput/chunking_dataset_wk_advanced/"
839+
)
840+
841+
ds = WKDataset("./testoutput/chunking_dataset_wk_advanced/")
842+
view = ds.get_view(
843+
"color", "1", size=(150, 150, 54), offset=(10, 10, 10), is_bounded=False
844+
)
845+
for_each_chunking_advanced(ds, view)
846+
847+
848+
def test_chunking_wk_wrong_chunk_size():
849+
delete_dir("./testoutput/chunking_dataset_wk_with_wrong_chunk_size/")
850+
copytree(
851+
"./testdata/simple_wk_dataset/",
852+
"./testoutput/chunking_dataset_wk_with_wrong_chunk_size/",
853+
)
854+
855+
view = WKDataset(
856+
"./testoutput/chunking_dataset_wk_with_wrong_chunk_size/"
857+
).get_view("color", "1", size=(256, 256, 256), is_bounded=False)
858+
859+
for_each_chunking_with_wrong_chunk_size(view)
860+
861+
862+
def test_chunking_tiff():
863+
delete_dir("./testoutput/chunking_dataset_tiff/")
864+
copytree("./testdata/simple_tiff_dataset/", "./testoutput/chunking_dataset_tiff/")
865+
866+
view = TiffDataset("./testoutput/chunking_dataset_tiff/").get_view(
867+
"color", "1", size=(265, 265, 10)
868+
)
869+
870+
original_data = view.read(view.size)
871+
872+
with get_executor_for_args(None) as executor:
873+
view.for_each_chunk(
874+
chunk_job,
875+
job_args_per_chunk="test",
876+
chunk_size=(265, 265, 1),
877+
executor=executor,
878+
)
879+
880+
new_data = view.read(view.size)
881+
assert np.array_equal(original_data + 50, new_data)
882+
883+
884+
def test_chunking_tiff_wrong_chunk_size():
885+
delete_dir("./testoutput/chunking_dataset_tiff_with_wrong_chunk_size/")
886+
copytree(
887+
"./testdata/simple_tiff_dataset/",
888+
"./testoutput/chunking_dataset_tiff_with_wrong_chunk_size/",
889+
)
890+
891+
view = TiffDataset(
892+
"./testoutput/chunking_dataset_tiff_with_wrong_chunk_size/"
893+
).get_view("color", "1", size=(256, 256, 256), is_bounded=False)
894+
895+
for_each_chunking_with_wrong_chunk_size(view)
896+
897+
898+
def test_chunking_tiled_tiff_wrong_chunk_size():
899+
delete_dir("./testoutput/chunking_dataset_tiled_tiff_with_wrong_chunk_size/")
900+
901+
ds = TiledTiffDataset.create(
902+
"./testoutput/chunking_dataset_tiled_tiff_with_wrong_chunk_size/",
903+
scale=(1, 1, 1),
904+
tile_size=(32, 32),
905+
pattern="{xxxx}/{yyyy}/{zzzz}.tif",
906+
)
907+
ds.add_layer("color", Layer.COLOR_TYPE).add_mag("1")
908+
view = ds.get_view("color", "1", size=(256, 256, 256), is_bounded=False)
909+
910+
for_each_chunking_with_wrong_chunk_size(view)
911+
912+
913+
def test_chunking_tiled_tiff_advanced():
914+
delete_dir("./testoutput/chunking_dataset_tiled_tiff_advanced/")
915+
copytree(
916+
"./testdata/simple_wk_dataset/",
917+
"./testoutput/chunking_dataset_tiled_tiff_advanced/",
918+
)
919+
920+
ds = WKDataset("./testoutput/chunking_dataset_tiled_tiff_advanced/")
921+
view = ds.get_view(
922+
"color", "1", size=(150, 150, 54), offset=(10, 10, 10), is_bounded=False
923+
)
924+
925+
for_each_chunking_advanced(ds, view)
926+
927+
693928
def test_tiled_tiff_inverse_pattern():
694929
delete_dir("./testoutput/tiled_tiff_dataset_inverse")
695930
tiled_tiff_ds = TiledTiffDataset.create(
@@ -699,7 +934,7 @@ def test_tiled_tiff_inverse_pattern():
699934
pattern="{zzz}/{xxx}/{yyy}.tif",
700935
)
701936

702-
mag = tiled_tiff_ds.add_layer("color", "color").add_mag("1")
937+
mag = tiled_tiff_ds.add_layer("color", Layer.COLOR_TYPE).add_mag("1")
703938

704939
data = np.zeros((250, 200, 10), dtype=np.uint8)
705940
for h in range(10):
@@ -738,7 +973,7 @@ def test_view_write_without_open():
738973

739974
ds.get_layer("color").add_mag("1")
740975

741-
wk_view = ds.get_view("color", "1", size=(32, 64, 16))
976+
wk_view = ds.get_view("color", "1", size=(32, 64, 16), is_bounded=False)
742977

743978
assert not wk_view._is_opened
744979

wkcuber/api/Dataset.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,12 +112,13 @@ def delete_layer(self, layer_name):
112112
# delete files on disk
113113
rmtree(join(self.path, layer_name))
114114

115-
def get_view(self, layer_name, mag_name, size, global_offset=(0, 0, 0)) -> View:
115+
def get_view(
116+
self, layer_name, mag_name, size, offset=(0, 0, 0), is_bounded=True
117+
) -> View:
116118
layer = self.get_layer(layer_name)
117119
mag = layer.get_mag(mag_name)
118-
mag_file_path = path.join(self.path, layer.name, mag.name)
119120

120-
return mag.get_view(mag_file_path, size=size, global_offset=global_offset)
121+
return mag.get_view(size=size, offset=offset, is_bounded=is_bounded)
121122

122123
def _create_layer(self, layer_name, dtype, num_channels) -> Layer:
123124
raise NotImplementedError

0 commit comments

Comments
 (0)