@@ -988,82 +988,45 @@ def xyz_to_index(x, y, z):
988988 f"{ os .path .splitext (args .output_file )[0 ]} _duplicates.csv" , index = False
989989 )
990990
991- size_counts = (
992- df ["image size" ]
993- .astype ("string" )
994- .dropna ()
995- .value_counts ()
996- .reset_index (name = "count" )
991+ fig , ax = plt .subplots ()
992+ # There are true 3D images in the dataset, convert 2D sizes
993+ # to faux 3D ones by adding third dimension as 1 and treat all
994+ # images as 3D. Plot as a scatterplot with x and y sizes axes
995+ # and z size encoded via color.
996+ if df ["image size" ].apply (lambda x : len (x ) == 3 and x [2 ] > 1 ).any ():
997+ sizes = df ["image size" ].apply (lambda x : x if len (x ) == 3 else x + (1 ,))
998+ x_size , y_size , z_size = zip (* sizes )
999+ sc = ax .scatter (x_size , y_size , c = z_size , cmap = "viridis" )
1000+ cb = fig .colorbar (sc )
1001+ cb .set_label ("z size" , rotation = 270 , verticalalignment = "baseline" )
1002+ cb .set_ticks (np .linspace (min (z_size ), max (z_size ), 5 , endpoint = True , dtype = int ))
1003+ ax .set_xlabel ("x size" )
1004+ ax .set_ylabel ("y size" )
1005+ # All images are 2D, but some may be faux 3D, last dimension is 1,
1006+ # convert faux 3D sizes to 2D by removing the last dimension.
1007+ else :
1008+ sizes = df ["image size" ].apply (lambda x : x if len (x ) == 2 else x [0 :2 ])
1009+ x_size , y_size = zip (* sizes )
1010+ ax .scatter (x_size , y_size )
1011+ ax .set_xlabel ("x size" )
1012+ ax .set_ylabel ("y size" )
1013+ plt .tight_layout ()
1014+ plt .savefig (
1015+ f"{ os .path .splitext (args .output_file )[0 ]} _image_size_scatterplot.pdf" ,
1016+ bbox_inches = "tight" ,
9971017 )
998- if not size_counts .empty :
999- # Compute appropriate size for figure using a specific font size
1000- # based on stack-overflow: https://stackoverflow.com/questions/35127920/overlapping-yticklabels-is-it-possible-to-control-cell-size-of-heatmap-in-seabo
1001- fontsize_pt = 8
1002- dpi = 72.27
1003-
1004- # compute the matrix height in points and inches
1005- matrix_height_pt = fontsize_pt * len (size_counts )
1006- matrix_height_in = matrix_height_pt / dpi
1007-
1008- # compute the required figure height
1009- top_margin = 0.04 # in percentage of the figure height
1010- bottom_margin = 0.04 # in percentage of the figure height
1011- figure_height = matrix_height_in / (1 - top_margin - bottom_margin )
1012-
1013- # build the figure instance with the desired height
1014- fig , ax = plt .subplots (
1015- figsize = (6 , figure_height ),
1016- gridspec_kw = dict (top = 1 - top_margin , bottom = bottom_margin ),
1017- )
1018-
1019- ax .tick_params (axis = "y" , labelsize = fontsize_pt )
1020- ax .tick_params (axis = "x" , labelsize = fontsize_pt )
1021- ax .xaxis .get_major_locator ().set_params (integer = True )
1022- ax = size_counts .plot .barh (
1023- x = "image size" ,
1024- y = "count" ,
1025- xlabel = "image size" ,
1026- ylabel = "# of images" ,
1027- legend = None ,
1028- ax = ax ,
1029- )
1030- ax .bar_label (
1031- ax .containers [0 ], fontsize = fontsize_pt
1032- ) # add the number at the top of each bar
1033- plt .savefig (
1034- f"{ os .path .splitext (args .output_file )[0 ]} _image_size_distribution.pdf" ,
1035- bbox_inches = "tight" ,
1036- )
10371018 # there is at least one series/file that is grayscale
10381019 if "min intensity" in df .columns :
10391020 min_intensities = df ["min intensity" ].dropna ()
10401021 max_intensities = df ["max intensity" ].dropna ()
10411022 if not min_intensities .empty :
10421023 fig , ax = plt .subplots ()
1043- fig .set_layout_engine ("constrained" )
1044- ax .yaxis .get_major_locator ().set_params (integer = True )
1045- ax .hist (
1046- min_intensities ,
1047- bins = 256 ,
1048- alpha = 0.5 ,
1049- label = "min intensity" ,
1050- color = "blue" ,
1051- )
1052- ax .hist (
1053- max_intensities ,
1054- bins = 256 ,
1055- alpha = 0.5 ,
1056- label = "max intensity" ,
1057- color = "green" ,
1058- )
1059- plt .legend ()
1060- plt .xlabel ("intensity" )
1061- plt .ylabel ("# of images" )
1062-
1063- # plt.tight_layout()
1024+ ax .scatter (min_intensities , max_intensities )
1025+ ax .set_xlabel ("min intensity" )
1026+ ax .set_ylabel ("max intensity" )
10641027 plt .savefig (
1065- f"{ os .path .splitext (args .output_file )[0 ]} _min_max_intensity_distribution .pdf" ,
1066- # bbox_inches="tight",
1028+ f"{ os .path .splitext (args .output_file )[0 ]} _min_max_intensity_scatterplot .pdf" ,
1029+ bbox_inches = "tight" ,
10671030 )
10681031
10691032 return 0
0 commit comments