Skip to content

Commit 54f7bf0

Browse files
fm3Youri K
andauthored
read multi-page tiffs (#252)
* add tifffile dependency, deactivate python 3.6 * add imagecodecs dependency * implement first version of multipage tif support * fix formatting * fix linting * fix reading for single page tiff files * remove unnecessary if * fix image readers * fix linting * correctly read dimensions from tiff series * update formatting * fix linting * fix formatting * fix linting * correctly read image formats * add missing types * correctly read all channels of data * implement feedback Co-authored-by: Youri K <[email protected]>
1 parent c15bddb commit 54f7bf0

File tree

7 files changed

+235
-102
lines changed

7 files changed

+235
-102
lines changed

.github/workflows/main.yml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
strategy:
99
max-parallel: 4
1010
matrix:
11-
python-version: [3.6, 3.7, 3.8]
11+
python-version: [3.7, 3.8]
1212

1313
steps:
1414
- uses: actions/checkout@v1
@@ -25,10 +25,10 @@ jobs:
2525
run: |
2626
pip install poetry
2727
poetry install
28-
28+
2929
- name: Decompress test data
3030
run: tar -xzvf testdata/WT1_wkw.tar.gz
31-
31+
3232
- name: Check formatting
3333
run: poetry run black --check .
3434

@@ -38,10 +38,10 @@ jobs:
3838
- name: Check typing
3939
run: |
4040
./typecheck.sh
41-
41+
4242
- name: Python tests
4343
run: poetry run pytest tests
44-
44+
4545
- name: Smoke test docker
4646
run: |
4747
docker run --rm \
@@ -59,19 +59,19 @@ jobs:
5959

6060
- name: Test tile cubing
6161
run: tests/scripts/tile_cubing.sh
62-
62+
6363
- name: Test simple tiff cubing
6464
run: tests/scripts/simple_tiff_cubing.sh
65-
65+
6666
- name: Test simple tiff cubing (no compression)
6767
run: tests/scripts/simple_tiff_cubing_no_compression.sh
68-
68+
6969
- name: Test metadata generation
7070
run: tests/scripts/meta_generation.sh
71-
71+
7272
- name: Test KNOSSOS conversion
7373
run: tests/scripts/knossos_conversion.sh
74-
74+
7575
- name: Decompress reference magnification data
7676
run: |
7777
mkdir -p testdata/tiff_mag_2_reference
@@ -98,7 +98,7 @@ jobs:
9898
DOCKER_PASS: ${{ secrets.DOCKER_PASS }}
9999
run: |
100100
echo $DOCKER_PASS | docker login -u $DOCKER_USER --password-stdin
101-
101+
102102
- name: Push docker images
103103
run: |
104104
docker push scalableminds/webknossos-cuber:$GITHUB_SHA

poetry.lock

Lines changed: 103 additions & 69 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ readme = "README.md"
77
license = "AGPL-3.0"
88

99
[tool.poetry.dependencies]
10-
python = "^3.6"
10+
python = "^3.7"
1111
scipy = "^1.4.0"
1212
numpy = "^1.17.4"
1313
pillow = "^6.2.1"
@@ -19,6 +19,8 @@ psutil = "^5.6.7"
1919
nibabel = "^2.5.1"
2020
scikit-image = "^0.16.2"
2121
scikit-learn = "^0.24.0"
22+
tifffile = "^2020.11.26"
23+
imagecodecs = "^2020.5.30"
2224

2325
[tool.poetry.dev-dependencies]
2426
pylint = "^2.6.0"

tests/test_utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,16 @@
33
import wkw
44
from wkcuber.mag import Mag
55
import os
6+
from shutil import rmtree
67

78
BLOCK_LEN = 32
89

910

11+
def delete_dir(relative_path):
12+
if os.path.exists(relative_path) and os.path.isdir(relative_path):
13+
rmtree(relative_path)
14+
15+
1016
def test_get_chunks():
1117
source = list(range(0, 48))
1218
target = list(get_chunks(source, 8))
@@ -42,6 +48,8 @@ def test_buffered_slice_writer():
4248
mag = Mag(1)
4349
dataset_path = os.path.join(dataset_dir, layer_name, mag.to_layer_name())
4450

51+
delete_dir(dataset_dir)
52+
4553
with BufferedSliceWriter(dataset_dir, layer_name, dtype, origin, mag=mag) as writer:
4654
for i in range(13):
4755
writer.write_slice(i, test_img)

wkcuber/cubing.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,9 @@ def find_source_filenames(source_path: str) -> List[str]:
109109
return natsorted(source_files)
110110

111111

112-
def read_image_file(file_name: str, dtype: type) -> np.ndarray:
112+
def read_image_file(file_name: str, dtype: type, z_slice: int) -> np.ndarray:
113113
try:
114-
return image_reader.read_array(file_name, dtype)
114+
return image_reader.read_array(file_name, dtype, z_slice)
115115
except Exception as exc:
116116
logging.error("Reading of file={} failed with {}".format(file_name, exc))
117117
raise exc
@@ -176,7 +176,7 @@ def cubing_job(
176176
for z, file_name in zip(z_batch, source_file_batch):
177177
# Image shape will be (x, y, channel_count, z=1)
178178
image = read_image_file(
179-
file_name, target_wkw_info.header.voxel_type
179+
file_name, target_wkw_info.header.voxel_type, z
180180
)
181181

182182
if not pad:
@@ -237,13 +237,19 @@ def cubing(
237237
batch_size: int,
238238
args: Namespace,
239239
) -> dict:
240-
241240
source_files = find_source_filenames(source_path)
242241

243242
# All images are assumed to have equal dimensions
244243
num_x, num_y = image_reader.read_dimensions(source_files[0])
245244
num_channels = image_reader.read_channel_count(source_files[0])
246-
num_z = len(source_files)
245+
num_z_slices_per_file = image_reader.read_z_slices_per_file(source_files[0])
246+
assert (
247+
num_z_slices_per_file == 1 or len(source_files) == 1
248+
), "Multi page TIFF support only for single files"
249+
if num_z_slices_per_file > 1:
250+
num_z = num_z_slices_per_file
251+
else:
252+
num_z = len(source_files)
247253

248254
target_mag = Mag(args.target_mag)
249255
target_wkw_info = WkwDatasetInfo(
@@ -277,14 +283,19 @@ def cubing(
277283
# Prepare z batches
278284
max_z = min(num_z + start_z, z + BLOCK_LEN)
279285
z_batch = list(range(z, max_z))
286+
# Prepare source files array
287+
if len(source_files) > 1:
288+
source_files_array = source_files[z - start_z : max_z - start_z]
289+
else:
290+
source_files_array = source_files * (max_z - z)
280291
# Prepare job
281292
job_args.append(
282293
(
283294
target_wkw_info,
284295
z_batch,
285296
target_mag,
286297
interpolation_mode,
287-
source_files[z - start_z : max_z - start_z],
298+
source_files_array,
288299
batch_size,
289300
(num_x, num_y),
290301
args.pad,

wkcuber/image_readers.py

Lines changed: 92 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,30 @@
77

88
from .vendor.dm3 import DM3
99
from .vendor.dm4 import DM4File, DM4TagHeader
10+
from tifffile import TiffFile
1011

1112
# Disable PIL's maximum image limit.
1213
Image.MAX_IMAGE_PIXELS = None
1314

1415

15-
class PillowImageReader:
16-
def read_array(self, file_name: str, dtype: np.dtype) -> np.ndarray:
16+
class ImageReader:
17+
def read_array(self, file_name: str, dtype: np.dtype, z_slice: int) -> np.ndarray:
18+
pass
19+
20+
def read_dimensions(self, file_name: str) -> Tuple[int, int]:
21+
pass
22+
23+
def read_channel_count(self, file_name: str) -> int:
24+
pass
25+
26+
def read_z_slices_per_file(
27+
self, file_name: str # pylint: disable=unused-argument
28+
) -> int:
29+
return 1
30+
31+
32+
class PillowImageReader(ImageReader):
33+
def read_array(self, file_name: str, dtype: np.dtype, z_slice: int) -> np.ndarray:
1734
this_layer = np.array(Image.open(file_name), dtype)
1835
this_layer = this_layer.swapaxes(0, 1)
1936
this_layer = this_layer.reshape(this_layer.shape + (1,))
@@ -38,8 +55,8 @@ def to_target_datatype(data: np.ndarray, target_dtype: np.dtype) -> np.ndarray:
3855
return (data / factor).astype(target_dtype)
3956

4057

41-
class Dm3ImageReader:
42-
def read_array(self, file_name: str, dtype: np.dtype) -> np.ndarray:
58+
class Dm3ImageReader(ImageReader):
59+
def read_array(self, file_name: str, dtype: np.dtype, z_slice: int) -> np.ndarray:
4360
dm3_file = DM3(file_name)
4461
this_layer = to_target_datatype(dm3_file.imagedata, dtype)
4562
this_layer = this_layer.swapaxes(0, 1)
@@ -55,7 +72,7 @@ def read_channel_count(self, _file_name: str) -> int:
5572
return 1
5673

5774

58-
class Dm4ImageReader:
75+
class Dm4ImageReader(ImageReader):
5976
def _read_tags(self, dm4file: DM4File) -> Tuple[DM4File.DM4TagDir, DM4TagHeader]:
6077
tags = dm4file.read_directory()
6178
image_data_tag = (
@@ -78,7 +95,7 @@ def _read_dimensions(
7895
)
7996
return width, height
8097

81-
def read_array(self, file_name: str, dtype: np.dtype) -> np.ndarray:
98+
def read_array(self, file_name: str, dtype: np.dtype, z_slice: int) -> np.ndarray:
8299
dm4file = DM4File.open(file_name)
83100
image_data_tag, image_tag = self._read_tags(dm4file)
84101
width, height = self._read_dimensions(dm4file, image_data_tag)
@@ -94,7 +111,6 @@ def read_array(self, file_name: str, dtype: np.dtype) -> np.ndarray:
94111
return data
95112

96113
def read_dimensions(self, file_name: str) -> Tuple[int, int]:
97-
98114
dm4file = DM4File.open(file_name)
99115
image_data_tag, _ = self._read_tags(dm4file)
100116
dimensions = self._read_dimensions(dm4file, image_data_tag)
@@ -107,25 +123,83 @@ def read_channel_count(self, _file_name: str) -> int:
107123
return 1
108124

109125

110-
class ImageReader:
126+
def find_count_of_axis(tif_file: TiffFile, axis: str) -> int:
127+
assert len(tif_file.series) == 1, "only single tif series are supported"
128+
tif_series = tif_file.series[0]
129+
index = tif_series.axes.find(axis)
130+
if index == -1:
131+
return 1
132+
else:
133+
return tif_series.shape[index] # pylint: disable=unsubscriptable-object
134+
135+
136+
class TiffImageReader(ImageReader):
137+
def read_array(self, file_name: str, dtype: np.dtype, z_slice: int) -> np.ndarray:
138+
with TiffFile(file_name) as tif_file:
139+
num_channels = self.read_channel_count(file_name)
140+
if len(tif_file.pages) > num_channels:
141+
data = np.array(
142+
list(
143+
map(
144+
lambda x: x.asarray(),
145+
tif_file.pages[
146+
z_slice * num_channels : z_slice * num_channels
147+
+ num_channels
148+
],
149+
)
150+
),
151+
dtype,
152+
)
153+
else:
154+
data = np.array(
155+
list(map(lambda x: x.asarray(), tif_file.pages[0:num_channels])),
156+
dtype,
157+
)
158+
# transpose data to shape(x, y, channel_count)
159+
data = np.transpose(
160+
data,
161+
(
162+
tif_file.pages[0].axes.find("X") + 1,
163+
tif_file.pages[0].axes.find("Y") + 1,
164+
0,
165+
),
166+
)
167+
data = data.reshape(data.shape + (1,))
168+
return data
169+
170+
def read_dimensions(self, file_name: str) -> Tuple[int, int]:
171+
with TiffFile(file_name) as tif_file:
172+
return find_count_of_axis(tif_file, "X"), find_count_of_axis(tif_file, "Y")
173+
174+
def read_channel_count(self, file_name: str) -> int:
175+
with TiffFile(file_name) as tif_file:
176+
return find_count_of_axis(tif_file, "C")
177+
178+
def read_z_slices_per_file(self, file_name: str) -> int:
179+
with TiffFile(file_name) as tif_file:
180+
return find_count_of_axis(tif_file, "Z")
181+
182+
183+
class ImageReaderManager:
111184
def __init__(self) -> None:
112185
self.readers: Dict[
113-
str, Union[PillowImageReader, Dm3ImageReader, Dm4ImageReader]
186+
str,
187+
Union[TiffImageReader, PillowImageReader, Dm3ImageReader, Dm4ImageReader],
114188
] = {
115-
".tif": PillowImageReader(),
116-
".tiff": PillowImageReader(),
189+
".tif": TiffImageReader(),
190+
".tiff": TiffImageReader(),
117191
".jpg": PillowImageReader(),
118192
".jpeg": PillowImageReader(),
119193
".png": PillowImageReader(),
120194
".dm3": Dm3ImageReader(),
121195
".dm4": Dm4ImageReader(),
122196
}
123197

124-
def read_array(self, file_name: str, dtype: np.dtype) -> np.ndarray:
198+
def read_array(self, file_name: str, dtype: np.dtype, z_slice: int) -> np.ndarray:
125199
_, ext = path.splitext(file_name)
126200

127201
# Image shape will be (x, y, channel_count, z=1) or (x, y, z=1)
128-
image = self.readers[ext].read_array(file_name, dtype)
202+
image = self.readers[ext].read_array(file_name, dtype, z_slice)
129203
# Standardize the image shape to (x, y, channel_count, z=1)
130204
if image.ndim == 3:
131205
image = image.reshape(image.shape + (1,))
@@ -140,5 +214,9 @@ def read_channel_count(self, file_name: str) -> int:
140214
_, ext = path.splitext(file_name)
141215
return self.readers[ext].read_channel_count(file_name)
142216

217+
def read_z_slices_per_file(self, file_name: str) -> int:
218+
_, ext = path.splitext(file_name)
219+
return self.readers[ext].read_z_slices_per_file(file_name)
220+
143221

144-
image_reader = ImageReader()
222+
image_reader = ImageReaderManager()

wkcuber/tile_cubing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def tile_cubing_job(
223223
if file_name:
224224
# read the image
225225
image = read_image_file(
226-
file_name, target_wkw_info.header.voxel_type
226+
file_name, target_wkw_info.header.voxel_type, z
227227
)
228228
slices.append(image)
229229
else:

0 commit comments

Comments
 (0)