@@ -441,14 +441,18 @@ def inspect_series(root_dir, series_tags, meta_data_info={}, thumbnail_settings=
441441 all_series_files = {}
442442 additional_series_tags = series_tags - {"0020|000e" , "0020|000d" }
443443 reader = sitk .ImageFileReader ()
444+ # explicitly set ImageIO to GDCMImageIO so that non DICOM files that
445+ # contain DICOM tags (file converted from original DICOM) will be
446+ # ignored.
447+ reader .SetImageIO ("GDCMImageIO" )
444448 # collect the file names of all series into a dictionary with the key being
445449 # study:series. This traversal is faster, O(n), than calling GetGDCMSeriesIDs on each
446450 # directory followed by iterating over the series and calling
447451 # GetGDCMSeriesFileNames with the seriesID on that directory, O(n^2).
448452 for dir_name , subdir_names , file_names in os .walk (root_dir ):
449453 for file in file_names :
450454 try :
451- fname = os .path .join (dir_name , file )
455+ fname = os .path .join (os . path . abspath ( dir_name ) , file )
452456 reader .SetFileName (fname )
453457 reader .ReadImageInformation ()
454458 sid = reader .GetMetaData ("0020|000e" )
@@ -723,27 +727,32 @@ def characterize_data(argv=None):
723727 of using os.walk to traverse the file system (internally os.walk uses os.scandir and that method's
724728 documentation says "The entries are yielded in arbitrary order.").
725729
726- Convert from x, y, z (zero based) indexes from the "summary image" to information from "summary csv" file.
730+ Convert from x, y, z (zero based) indexes from the "summary image" to information from
731+ "summary csv" file. To view the summary image and obtain the x-y-z coordinates for a
732+ specific thumbnail we recommend using one of the following free programs:
733+ Fiji (uses zero based indexing): https://imagej.net/software/fiji/
734+ 3D slicer (uses zero based indexing): https://www.slicer.org/
735+ ITK-SNAP (uses one based indexing, subtract one): http://www.itksnap.org/
736+
727737
728738 import pandas as pd
729739 import SimpleITK as sitk
730740
731- def xyz_to_index(x, y, z):
732- tile_size=[20, 20]
733- thumbnail_size=[64, 64]
734- # add 2 to the index because the csv starts counting lines at 1 and the first
735- # line is the table header
741+ def xyz_to_index(x, y, z, thumbnail_size, tile_size):
736742 return (z * tile_size[0] * tile_size[1]
737743 + int(y / thumbnail_size[1]) * tile_size[0]
738744 + int(x / thumbnail_size[0])
739745 )
740746
741- csv_file_name = "Output/generic_image_data_report.csv"
742- df = pd.read_csv(csv_file_name)
747+ summary_csv_file_name =
748+ df = pd.read_csv(summary_csv_file_name)
749+ # Ensure dataframe matches the read images. If the report included files that
750+ # were not read (non-image or read failures) remove them.
751+ df.dropna(inplace=True, thresh=2)
743752
744- file_names = eval(df["files"].iloc[xyz_to_index(x=xval, y=yval, z=zval)])
745- print(file_names)
746- sitk.Show(sitk.ReadImage(file_names) )
753+ thumbnail_size =
754+ tile_size =
755+ print(df["files"].iloc[xyz_to_index(x, y, z, thumbnail_size, tile_size)] )
747756 """
748757 # Configure argument parser for commandline arguments and set default
749758 # values.
@@ -990,11 +999,12 @@ def xyz_to_index(x, y, z):
990999 df .to_csv (args .output_file , index = False )
9911000
9921001 # minimal analysis on the image information, detect image duplicates and plot the image size
993- # distribution and distribution of min/max intensity values of scalar
994- # images
995- image_counts = (
996- df ["MD5 intensity hash" ].dropna ().value_counts ().reset_index (name = "count" )
997- )
1002+ # distribution and distribution of min/max intensity values of scalar images
1003+ # first drop the rows that correspond to problematic files if they weren't already dropped
1004+ # based on program settings
1005+ if not args .ignore_problems :
1006+ df .dropna (inplace = True , thresh = 2 )
1007+ image_counts = df ["MD5 intensity hash" ].value_counts ().reset_index (name = "count" )
9981008 duplicates = df [
9991009 df ["MD5 intensity hash" ].isin (
10001010 image_counts [image_counts ["count" ] > 1 ]["MD5 intensity hash" ]
0 commit comments