Skip to content

Commit 65f6d1c

Browse files
leowebulldozer-boy[bot]
authored andcommitted
add recubing script (#140)
* add recubing script * Merge branch 'master' into recubing * add wkw headers in script to accomodate changed open_wkw function, reformatting * move methods cube_addresses, parse_cube_file_name to utils, use open_wkw in with-context * remove convert_nifti file * remove infer_bounding_box, instead usage of detect_bbox * add more reformatting * add multiprocessing to recubing script * Reformatted * Merge branch 'master' into recubing
1 parent 00185b3 commit 65f6d1c

File tree

3 files changed

+196
-14
lines changed

3 files changed

+196
-14
lines changed

wkcuber/downsampling.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,11 @@
2323
wait_and_ensure_success,
2424
add_isotropic_flag,
2525
setup_logging,
26+
cube_addresses,
27+
parse_cube_file_name,
2628
)
2729

2830
DEFAULT_EDGE_LEN = 256
29-
CUBE_REGEX = re.compile(r"z(\d+)/y(\d+)/x(\d+)(\.wkw)$")
30-
31-
32-
def parse_cube_file_name(filename):
33-
m = CUBE_REGEX.search(filename)
34-
return (int(m.group(3)), int(m.group(2)), int(m.group(1)))
3531

3632

3733
def determine_buffer_edge_len(dataset):
@@ -127,14 +123,6 @@ def create_parser():
127123
return parser
128124

129125

130-
def cube_addresses(source_wkw_info):
131-
# Gathers all WKW cubes in the dataset
132-
with open_wkw(source_wkw_info) as source_wkw:
133-
wkw_addresses = list(parse_cube_file_name(f) for f in source_wkw.list_files())
134-
wkw_addresses.sort()
135-
return wkw_addresses
136-
137-
138126
def downsample(
139127
source_wkw_info,
140128
target_wkw_info,

wkcuber/recubing.py

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
import logging
2+
import wkw
3+
import numpy as np
4+
from argparse import ArgumentParser
5+
from itertools import product
6+
7+
from .metadata import detect_bbox
8+
9+
from .utils import (
10+
add_verbose_flag,
11+
open_wkw,
12+
WkwDatasetInfo,
13+
ensure_wkw,
14+
add_distribution_flags,
15+
setup_logging,
16+
get_executor_for_args,
17+
wait_and_ensure_success,
18+
)
19+
20+
21+
def create_parser():
22+
parser = ArgumentParser()
23+
24+
parser.add_argument(
25+
"source_path", help="Directory containing the datasource properties."
26+
)
27+
28+
parser.add_argument(
29+
"target_path", help="Output directory for the generated dataset."
30+
)
31+
32+
parser.add_argument(
33+
"--layer_name",
34+
"-l",
35+
help="Name of the cubed layer (color or segmentation)",
36+
default="color",
37+
)
38+
39+
parser.add_argument(
40+
"--dtype",
41+
"-d",
42+
help="Target datatype (e.g. uint8, uint16, uint32)",
43+
default="uint8",
44+
)
45+
46+
parser.add_argument(
47+
"--wkw_file_len", help="Target file length (default 32)", type=int, default=32
48+
)
49+
50+
parser.add_argument(
51+
"--no_compression",
52+
help="Use compression, default false",
53+
type=bool,
54+
default=False,
55+
)
56+
57+
add_verbose_flag(parser)
58+
add_distribution_flags(parser)
59+
60+
return parser
61+
62+
63+
def next_lower_divisible_by(number, divisor) -> int:
64+
remainder = number % divisor
65+
return number - remainder
66+
67+
68+
def next_higher_divisible_by(number, divisor) -> int:
69+
remainder = number % divisor
70+
return number - remainder + divisor
71+
72+
73+
def recube(
74+
source_path, target_path, layer_name, dtype, wkw_file_len=32, compression=True
75+
):
76+
if compression:
77+
block_type = wkw.Header.BLOCK_TYPE_LZ4
78+
else:
79+
block_type = wkw.Header.BLOCK_TYPE_RAW
80+
81+
target_wkw_header = wkw.Header(
82+
np.dtype(dtype), file_len=wkw_file_len, block_type=block_type
83+
)
84+
target_wkw_info = WkwDatasetInfo(target_path, layer_name, 1, target_wkw_header)
85+
source_wkw_header = wkw.Header(np.dtype(dtype))
86+
source_wkw_info = WkwDatasetInfo(source_path, layer_name, 1, source_wkw_header)
87+
88+
ensure_wkw(target_wkw_info)
89+
90+
bounding_box_dict = detect_bbox(source_wkw_info.dataset_path, layer_name)
91+
bounding_box = (
92+
bounding_box_dict["topLeft"],
93+
[
94+
bounding_box_dict["width"],
95+
bounding_box_dict["height"],
96+
bounding_box_dict["depth"],
97+
],
98+
)
99+
bottom_right = [
100+
coord + size for coord, size in zip(bounding_box[0], bounding_box[1])
101+
]
102+
103+
wkw_cube_size = wkw_file_len * target_wkw_header.block_len
104+
105+
outer_bounding_box_tl = list(
106+
map(lambda lx: next_lower_divisible_by(lx, wkw_cube_size), bounding_box[0])
107+
)
108+
outer_bounding_box_br = list(
109+
map(lambda lx: next_higher_divisible_by(lx, wkw_cube_size), bottom_right)
110+
)
111+
outer_bounding_box_size = [
112+
outer_bounding_box_br[0] - outer_bounding_box_tl[0],
113+
outer_bounding_box_br[1] - outer_bounding_box_tl[1],
114+
outer_bounding_box_br[2] - outer_bounding_box_tl[2],
115+
]
116+
117+
target_cube_addresses = product(
118+
range(0, outer_bounding_box_size[0], wkw_cube_size),
119+
range(0, outer_bounding_box_size[1], wkw_cube_size),
120+
range(0, outer_bounding_box_size[2], wkw_cube_size),
121+
)
122+
123+
with get_executor_for_args(args) as executor:
124+
job_args = []
125+
for target_cube_xyz in target_cube_addresses:
126+
job_args.append(
127+
(
128+
source_wkw_info,
129+
target_wkw_info,
130+
outer_bounding_box_size,
131+
outer_bounding_box_tl,
132+
wkw_cube_size,
133+
target_cube_xyz,
134+
)
135+
)
136+
wait_and_ensure_success(executor.map_to_futures(recubing_cube_job, job_args))
137+
138+
logging.info(f"{layer_name} successfully resampled!")
139+
140+
141+
def recubing_cube_job(args):
142+
(
143+
source_wkw_info,
144+
target_wkw_info,
145+
outer_bounding_box_size,
146+
outer_bounding_box_tl,
147+
wkw_cube_size,
148+
target_cube_xyz,
149+
) = args
150+
151+
with open_wkw(source_wkw_info) as source_wkw_dataset:
152+
with open_wkw(target_wkw_info) as target_wkw_dataset:
153+
top_left = [
154+
outer_bounding_box_tl[0] + target_cube_xyz[0],
155+
outer_bounding_box_tl[1] + target_cube_xyz[1],
156+
outer_bounding_box_tl[2] + target_cube_xyz[2],
157+
]
158+
159+
logging.info("Writing at {}".format(top_left))
160+
161+
data_cube = source_wkw_dataset.read(
162+
top_left, (wkw_cube_size, wkw_cube_size, wkw_cube_size)
163+
)
164+
165+
target_wkw_dataset.write(top_left, data_cube)
166+
167+
168+
if __name__ == "__main__":
169+
args = create_parser().parse_args()
170+
setup_logging(args)
171+
172+
recube(
173+
args.source_path,
174+
args.target_path,
175+
args.layer_name,
176+
args.dtype,
177+
args.wkw_file_len,
178+
not args.no_compression,
179+
)

wkcuber/utils.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
import time
23
import wkw
34
import numpy as np
@@ -29,6 +30,7 @@
2930

3031

3132
BLOCK_LEN = 32
33+
CUBE_REGEX = re.compile(r"z(\d+)/y(\d+)/x(\d+)(\.wkw)$")
3234

3335
logger = getLogger(__name__)
3436

@@ -47,6 +49,19 @@ def ensure_wkw(target_wkw_info):
4749
target_wkw.close()
4850

4951

52+
def cube_addresses(source_wkw_info):
53+
# Gathers all WKW cubes in the dataset
54+
with open_wkw(source_wkw_info) as source_wkw:
55+
wkw_addresses = list(parse_cube_file_name(f) for f in source_wkw.list_files())
56+
wkw_addresses.sort()
57+
return wkw_addresses
58+
59+
60+
def parse_cube_file_name(filename):
61+
m = CUBE_REGEX.search(filename)
62+
return int(m.group(3)), int(m.group(2)), int(m.group(1))
63+
64+
5065
def open_knossos(info):
5166
return KnossosDataset.open(info.dataset_path, np.dtype(info.dtype))
5267

0 commit comments

Comments
 (0)