Skip to content

Commit 90615ea

Browse files
authored
Merge pull request #467 from zivy/updateScript
Update data characterization script.
2 parents a3a36d1 + 86c56c0 commit 90615ea

File tree

3 files changed

+108
-54
lines changed

3 files changed

+108
-54
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"max_processes": 6,
3+
"series_tags": [
4+
"0020|000e",
5+
"0020|000d",
6+
"0020|0011",
7+
"0018|0024",
8+
"0018|0050",
9+
"0028|0010",
10+
"0028|0011"
11+
],
12+
"imageIO": "All",
13+
"metadata_keys": [
14+
"0008|0060",
15+
"0018|5101"
16+
],
17+
"metadata_keys_headings": [
18+
"modality",
19+
"radiographic view"
20+
],
21+
"ignore_problems": true,
22+
"create_summary_image": true,
23+
"thumbnail_sizes": [
24+
64,
25+
64
26+
],
27+
"tile_sizes": [
28+
30,
29+
20
30+
],
31+
"projection_axis": 2,
32+
"interpolator": "sitkLinear"
33+
}

Python/scripts/characterize_data.py

Lines changed: 61 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -45,50 +45,47 @@ def positive_int(i):
4545
raise argparse.ArgumentTypeError(f"Invalid argument ({i}), expected value > 0 .")
4646

4747

48-
# Class for loading optional configuration settings stored in a JSON file
49-
# via the argparse action. The configuration file settings override
50-
# the hard-coded default settings and are overridden by settings
51-
# provided on the commandline.
52-
class LoadOptionalFromJSONFile(argparse.Action):
53-
def __init__(self, option_strings, internal_parser, *args, **kwargs):
54-
super().__init__(option_strings=option_strings, *args, **kwargs)
55-
self.internal_parser = internal_parser
56-
57-
def __call__(self, parser, namespace, values, option_string=None):
58-
config_argv = []
59-
with values as fp:
60-
configuration_dict = json.load(fp)
61-
# Convert dictionary to equivalent commandline entries
62-
# prepend -- to keys as these are optional argparse parameters
63-
for key, val in configuration_dict.items():
64-
# boolean flags added only if they are true
65-
if type(val) == bool:
66-
if val:
67-
config_argv.append(f"--{key}")
68-
elif type(val) != list:
69-
config_argv.append(f"--{key}")
70-
config_argv.append(str(val))
71-
# checking the list length so that errors in the file,
72-
# empty list, are ignored
73-
elif len(val) > 0:
48+
def load_optional_parameters(file_name, parser):
49+
"""
50+
Loading optional argparse parameters from a JSON configuration file.
51+
The contents of the JSON dictionary are run through the parser so that
52+
they adhere to the same constraints as those imposed on the parameters
53+
provided on the commandline.
54+
55+
Parameters
56+
----------
57+
file_name (Union[str, Path]): Name of JSON configuration file.
58+
parser (argparse.Parser): Parser for the optional commandline parameters.
59+
"""
60+
config_argv = []
61+
with open(file_name, "r") as fp:
62+
configuration_dict = json.load(fp)
63+
# Convert dictionary to equivalent commandline entries
64+
# prepend -- to keys as these are optional argparse parameters
65+
for key, val in configuration_dict.items():
66+
# boolean flags added only if they are true
67+
if type(val) == bool:
68+
if val:
7469
config_argv.append(f"--{key}")
75-
config_argv.extend([str(v) for v in val])
76-
# parse the arguments and store in local config_data namespace
77-
# calling parse_known_args and not parse_args so that unexpected arguments
78-
# are ignored. Using parse_args results in an error if there
79-
# are unexpected arguments.
80-
config_data, additional_args = self.internal_parser.parse_known_args(
81-
config_argv, namespace=None
70+
elif type(val) != list:
71+
config_argv.append(f"--{key}")
72+
config_argv.append(str(val))
73+
# checking the list length so that errors in the file,
74+
# empty list, are ignored
75+
elif len(val) > 0:
76+
config_argv.append(f"--{key}")
77+
config_argv.extend([str(v) for v in val])
78+
# parse the arguments and store in local config_data namespace
79+
# calling parse_known_args and not parse_args so that unexpected arguments
80+
# are ignored. Using parse_args results in an error if there
81+
# are unexpected arguments.
82+
config_data, additional_args = parser.parse_known_args(config_argv, namespace=None)
83+
84+
if additional_args:
85+
print(
86+
f"Warning: unexpected arguments found in configuration file ({additional_args})."
8287
)
83-
if additional_args:
84-
print(
85-
f"Warning: unexpected arguments found in configuration file ({additional_args})."
86-
)
87-
# add the contents to the external namespace only if it doesn't already
88-
# contain the attribute (this means it wasn't given on the commandline)
89-
for k, v in vars(config_data).items():
90-
if getattr(namespace, k, None) is None:
91-
setattr(namespace, k, v)
88+
return vars(config_data)
9289

9390

9491
#
@@ -106,6 +103,7 @@ class RawDescriptionAndDefaultHelpFormatter(
106103

107104
def inspect_grayscale_image(sitk_image, image_info):
108105
np_arr_view = sitk.GetArrayViewFromImage(sitk_image)
106+
image_info["MD5 intensity hash"] = hashlib.md5(np_arr_view).hexdigest()
109107
mmfilter = sitk.MinimumMaximumImageFilter()
110108
mmfilter.Execute(sitk_image)
111109
image_info["min intensity"] = mmfilter.GetMinimum()
@@ -148,23 +146,26 @@ def inspect_image(sitk_image, image_info, meta_data_info, thumbnail_settings):
148146
stored in image_info["thumbnail"].
149147
"""
150148
np_arr_view = sitk.GetArrayViewFromImage(sitk_image)
151-
image_info["MD5 intensity hash"] = hashlib.md5(np_arr_view).hexdigest()
152149
image_info["image size"] = sitk_image.GetSize()
153150
image_info["image spacing"] = sitk_image.GetSpacing()
154151
image_info["image origin"] = sitk_image.GetOrigin()
155152
image_info["axis direction"] = sitk_image.GetDirection()
156153

157154
if (
158155
sitk_image.GetNumberOfComponentsPerPixel() == 1
159-
): # greyscale image, get measures of intensity location and spread the min/max pixel values
156+
): # grayscale image, get measures of intensity location and spread the min/max pixel values
160157
image_info["pixel type"] = sitk_image.GetPixelIDTypeAsString() + " gray"
161158
inspect_grayscale_image(sitk_image, image_info)
162-
else: # either a color image or a greyscale image masquerading as a color one
159+
else: # either a color image or a grayscale image masquerading as a color one
163160
pixel_type = sitk_image.GetPixelIDTypeAsString()
164161
channels = [
165162
sitk.VectorIndexSelectionCast(sitk_image, i)
166163
for i in range(sitk_image.GetNumberOfComponentsPerPixel())
167164
]
165+
# if this multi-channel is actually a grayscale image, treat
166+
# it as such, call inspect_grayscale_image on the first channel
167+
# this will compute the intensity statistics and the md5 hash on
168+
# the actual grayscale information
168169
if np.array_equal(
169170
sitk.GetArrayViewFromImage(channels[0]),
170171
sitk.GetArrayViewFromImage(channels[1]),
@@ -178,6 +179,7 @@ def inspect_image(sitk_image, image_info, meta_data_info, thumbnail_settings):
178179
)
179180
inspect_grayscale_image(channels[0], image_info)
180181
else:
182+
image_info["MD5 intensity hash"] = hashlib.md5(np_arr_view).hexdigest()
181183
pixel_type = (
182184
pixel_type
183185
+ f" {sitk_image.GetNumberOfComponentsPerPixel()} channels color"
@@ -651,7 +653,9 @@ def characterize_data(argv=None):
651653
with the summary image. The summary image is a faux volume where each slice is composed
652654
of multiple 2D grayscale thumbnails representing the original images. When the original
653655
image is a color image it is converted to grayscale. When the original image is 3D
654-
it is converted to 2D via maximum intensity projection along a user specified axis.
656+
it is converted to 2D via maximum intensity projection along a user specified axis. To
657+
retain the original image's aspect ratio it is resized and padded to fit in the user
658+
specified thumbnail size image.
655659
656660
Examples:
657661
--------
@@ -732,9 +736,7 @@ def xyz_to_index(x, y, z):
732736
opt_arg_parser = argparse.ArgumentParser(add_help=False)
733737
opt_arg_parser.add_argument(
734738
"--configuration_file",
735-
type=open,
736-
action=LoadOptionalFromJSONFile,
737-
internal_parser=opt_arg_parser, # additional parameter provided to the action
739+
type=file_path,
738740
help="JSON configuration file containing settings for the optional parameters",
739741
)
740742
opt_arg_parser.add_argument(
@@ -859,6 +861,16 @@ def xyz_to_index(x, y, z):
859861
)
860862

861863
args = parser.parse_args(argv)
864+
if args.configuration_file:
865+
new_default_settings = load_optional_parameters(
866+
args.configuration_file, opt_arg_parser
867+
)
868+
# Configuration file overrides the hard coded default settings
869+
# which are then overridden by the commandline settings by
870+
# calling the parse_args again.
871+
parser.set_defaults(**new_default_settings)
872+
args = parser.parse_args(argv)
873+
862874
# keep a copy of the original settings before some are converted to
863875
# internal representations.
864876
save_dict = copy.deepcopy(vars(args))

tests/test_scripts.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,22 +41,24 @@ def files_md5(self, ascii_file_list, binary_file_list):
4141
return md5.hexdigest()
4242

4343
@pytest.mark.parametrize(
44-
"output_file, analysis_type, result_md5hash",
44+
"output_file, analysis_type, user_configuration, result_md5hash",
4545
[
4646
(
4747
"per_file_data_characteristics.csv",
4848
"per_file",
49-
"0ac42f84a5421dc39c85079cd3d5ae91",
49+
"characterize_data_user_defaults.json",
50+
"bdb2f2489287cf43b681afce1b5d00e8",
5051
),
5152
(
5253
"per_series_data_characteristics.csv",
5354
"per_series",
54-
"8ee7820b100e6e9eebf444205c1a93af",
55+
"characterize_data_user_defaults.json",
56+
"1c78bf68faf7f1a8fdb29e14ae276565",
5557
),
5658
],
5759
)
5860
def test_characterize_data(
59-
self, output_file, analysis_type, result_md5hash, tmp_path
61+
self, output_file, analysis_type, user_configuration, result_md5hash, tmp_path
6062
):
6163
# NOTE: For now not testing pdf files. Setting the SOURCE_DATE_EPOCH
6264
# didn't resolve the variability across platforms, getting different
@@ -75,6 +77,8 @@ def test_characterize_data(
7577
str(self.data_path / "CIRS057A_MR_CT_DICOM"),
7678
str(output_dir / output_file),
7779
analysis_type,
80+
"--configuration_file",
81+
str(self.data_path / user_configuration),
7882
]
7983
)
8084
# csv files needs to be modified as follows before comparing to expected values:
@@ -91,9 +95,14 @@ def test_characterize_data(
9195
lambda x: sorted([pathlib.Path(fname).name for fname in eval(x)])
9296
)
9397
df.to_csv(file, index=False)
98+
# Below we convert the generators to lists and concatenate them. A nicer way of
99+
# concatenating iterables with a large number of entries is to use itertools.chain().
100+
# In our case, the number of entries is small (<5) and we don't want to add the dependency
101+
# on the itertools package, so just convert to list.
94102
assert (
95103
self.files_md5(
96-
ascii_file_list=output_dir.glob("*.csv"),
104+
ascii_file_list=list(output_dir.glob("*.csv"))
105+
+ list(output_dir.glob("*.json")),
97106
binary_file_list=[], # output_dir.glob("*.pdf"),
98107
)
99108
== result_md5hash

0 commit comments

Comments
 (0)