Skip to content

Commit 8406750

Browse files
authored
Merge pull request #469 from zivy/changeScriptVisualization
Change the way image sizes and intensity ranges are visualized.
2 parents f82fec1 + 41daad6 commit 8406750

File tree

1 file changed

+31
-68
lines changed

1 file changed

+31
-68
lines changed

Python/scripts/characterize_data.py

Lines changed: 31 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -988,82 +988,45 @@ def xyz_to_index(x, y, z):
988988
f"{os.path.splitext(args.output_file)[0]}_duplicates.csv", index=False
989989
)
990990

991-
size_counts = (
992-
df["image size"]
993-
.astype("string")
994-
.dropna()
995-
.value_counts()
996-
.reset_index(name="count")
991+
fig, ax = plt.subplots()
992+
# There are true 3D images in the dataset, convert 2D sizes
993+
# to faux 3D ones by adding third dimension as 1 and treat all
994+
# images as 3D. Plot as a scatterplot with x and y sizes axes
995+
# and z size encoded via color.
996+
if df["image size"].apply(lambda x: len(x) == 3 and x[2] > 1).any():
997+
sizes = df["image size"].apply(lambda x: x if len(x) == 3 else x + (1,))
998+
x_size, y_size, z_size = zip(*sizes)
999+
sc = ax.scatter(x_size, y_size, c=z_size, cmap="viridis")
1000+
cb = fig.colorbar(sc)
1001+
cb.set_label("z size", rotation=270, verticalalignment="baseline")
1002+
cb.set_ticks(np.linspace(min(z_size), max(z_size), 5, endpoint=True, dtype=int))
1003+
ax.set_xlabel("x size")
1004+
ax.set_ylabel("y size")
1005+
# All images are 2D, but some may be faux 3D, last dimension is 1,
1006+
# convert faux 3D sizes to 2D by removing the last dimension.
1007+
else:
1008+
sizes = df["image size"].apply(lambda x: x if len(x) == 2 else x[0:2])
1009+
x_size, y_size = zip(*sizes)
1010+
ax.scatter(x_size, y_size)
1011+
ax.set_xlabel("x size")
1012+
ax.set_ylabel("y size")
1013+
plt.tight_layout()
1014+
plt.savefig(
1015+
f"{os.path.splitext(args.output_file)[0]}_image_size_scatterplot.pdf",
1016+
bbox_inches="tight",
9971017
)
998-
if not size_counts.empty:
999-
# Compute appropriate size for figure using a specific font size
1000-
# based on stack-overflow: https://stackoverflow.com/questions/35127920/overlapping-yticklabels-is-it-possible-to-control-cell-size-of-heatmap-in-seabo
1001-
fontsize_pt = 8
1002-
dpi = 72.27
1003-
1004-
# compute the matrix height in points and inches
1005-
matrix_height_pt = fontsize_pt * len(size_counts)
1006-
matrix_height_in = matrix_height_pt / dpi
1007-
1008-
# compute the required figure height
1009-
top_margin = 0.04 # in percentage of the figure height
1010-
bottom_margin = 0.04 # in percentage of the figure height
1011-
figure_height = matrix_height_in / (1 - top_margin - bottom_margin)
1012-
1013-
# build the figure instance with the desired height
1014-
fig, ax = plt.subplots(
1015-
figsize=(6, figure_height),
1016-
gridspec_kw=dict(top=1 - top_margin, bottom=bottom_margin),
1017-
)
1018-
1019-
ax.tick_params(axis="y", labelsize=fontsize_pt)
1020-
ax.tick_params(axis="x", labelsize=fontsize_pt)
1021-
ax.xaxis.get_major_locator().set_params(integer=True)
1022-
ax = size_counts.plot.barh(
1023-
x="image size",
1024-
y="count",
1025-
xlabel="image size",
1026-
ylabel="# of images",
1027-
legend=None,
1028-
ax=ax,
1029-
)
1030-
ax.bar_label(
1031-
ax.containers[0], fontsize=fontsize_pt
1032-
) # add the number at the top of each bar
1033-
plt.savefig(
1034-
f"{os.path.splitext(args.output_file)[0]}_image_size_distribution.pdf",
1035-
bbox_inches="tight",
1036-
)
10371018
# there is at least one series/file that is grayscale
10381019
if "min intensity" in df.columns:
10391020
min_intensities = df["min intensity"].dropna()
10401021
max_intensities = df["max intensity"].dropna()
10411022
if not min_intensities.empty:
10421023
fig, ax = plt.subplots()
1043-
fig.set_layout_engine("constrained")
1044-
ax.yaxis.get_major_locator().set_params(integer=True)
1045-
ax.hist(
1046-
min_intensities,
1047-
bins=256,
1048-
alpha=0.5,
1049-
label="min intensity",
1050-
color="blue",
1051-
)
1052-
ax.hist(
1053-
max_intensities,
1054-
bins=256,
1055-
alpha=0.5,
1056-
label="max intensity",
1057-
color="green",
1058-
)
1059-
plt.legend()
1060-
plt.xlabel("intensity")
1061-
plt.ylabel("# of images")
1062-
1063-
# plt.tight_layout()
1024+
ax.scatter(min_intensities, max_intensities)
1025+
ax.set_xlabel("min intensity")
1026+
ax.set_ylabel("max intensity")
10641027
plt.savefig(
1065-
f"{os.path.splitext(args.output_file)[0]}_min_max_intensity_distribution.pdf",
1066-
# bbox_inches="tight",
1028+
f"{os.path.splitext(args.output_file)[0]}_min_max_intensity_scatterplot.pdf",
1029+
bbox_inches="tight",
10671030
)
10681031

10691032
return 0

0 commit comments

Comments
 (0)