Merge pull request #479 from zivy/optionForConsistentOutputOnAllPlatforms

zivy · web-flow · commit ce23a28f2e95 · 2025-04-24T11:28:13.000-04:00
Add option to specify floating point precision.
diff --git a/Python/scripts/characterize_data.py b/Python/scripts/characterize_data.py
@@ -993,6 +993,12 @@ def xyz_to_index(x, y, z, thumbnail_size, tile_size):
         default="sitkNearestNeighbor",
         help="SimpleITK interpolator used to resize images when creating summary image",
     )
+    opt_arg_parser.add_argument(
+        "--float_precision",
+        type=positive_int,
+        default=None,
+        help="Precision for floating point numbers. Use only if exact numeric equality across platforms is required (i.e. for testing)",
+    )
     # Use the function docstring as the text in the parser description and a custom
     # RawDescriptionAndDefaultHelpFormatter so that the docstring layout
     # is maintained, otherwise it is line-wrapped and the formatting is lost, and the
@@ -1141,7 +1147,21 @@ def xyz_to_index(x, y, z, thumbnail_size, tile_size):
     if args.ignore_problems:
         df.dropna(inplace=True, thresh=2)
     # save the raw information, create directory structure if it doesn't exist
-    df.to_csv(args.output_file, index=False)
+    # if floating point precision was specified, convert the floating point tuples to the
+    # desired precision. the dataframe's to_csv method will format the columns with floating point type.
+    float_format_str = None
+    if args.float_precision:
+        float_format_str = f"%.{args.float_precision}f"
+        df["image spacing"] = df["image spacing"].apply(
+            lambda x: np.round(x, decimals=args.float_precision)
+        )
+        df["image origin"] = df["image origin"].apply(
+            lambda x: np.round(x, decimals=args.float_precision)
+        )
+        df["axis direction "] = df["axis direction"].apply(
+            lambda x: np.round(x, decimals=args.float_precision)
+        )
+    df.to_csv(args.output_file, index=False, float_format=float_format_str)
 
     # minimal analysis on the image information, detect image duplicates and plot the image size,
     # spacing and min/max intensity values of scalar image distributions as scatterplots.
diff --git a/tests/test_scripts.py b/tests/test_scripts.py
@@ -47,13 +47,13 @@ def files_md5(self, ascii_file_list, binary_file_list):
                 "per_file_data_characteristics.csv",
                 "per_file",
                 "characterize_data_user_defaults.json",
-                "fb0338866794ef68c5d5854399ccd22c",
+                "561519272943948754e5a78a043b66dd",
             ),
             (
                 "per_series_data_characteristics.csv",
                 "per_series",
                 "characterize_data_user_defaults.json",
-                "766184c8503a2f08cac6e3b6be57e346",
+                "9c6aa0ff16e78f7e3d808531caf8cd91",
             ),
         ],
     )
@@ -79,6 +79,8 @@ def test_characterize_data(
                 analysis_type,
                 "--configuration_file",
                 str(self.data_path / user_configuration),
+                "--float_precision",
+                "3",
             ]
         )
         # csv files needs to be modified as follows before comparing to expected values:

Original file line number	Diff line number	Diff line change
`@@ -47,13 +47,13 @@ def files_md5(self, ascii_file_list, binary_file_list):`
`47`	`47`	`"per_file_data_characteristics.csv",`
`48`	`48`	`"per_file",`
`49`	`49`	`"characterize_data_user_defaults.json",`
`50`		`- "fb0338866794ef68c5d5854399ccd22c",`
	`50`	`+ "561519272943948754e5a78a043b66dd",`
`51`	`51`	`),`
`52`	`52`	`(`
`53`	`53`	`"per_series_data_characteristics.csv",`
`54`	`54`	`"per_series",`
`55`	`55`	`"characterize_data_user_defaults.json",`
`56`		`- "766184c8503a2f08cac6e3b6be57e346",`
	`56`	`+ "9c6aa0ff16e78f7e3d808531caf8cd91",`
`57`	`57`	`),`
`58`	`58`	`],`
`59`	`59`	`)`
`@@ -79,6 +79,8 @@ def test_characterize_data(`
`79`	`79`	`analysis_type,`
`80`	`80`	`"--configuration_file",`
`81`	`81`	`str(self.data_path / user_configuration),`
	`82`	`+ "--float_precision",`
	`83`	`+ "3",`
`82`	`84`	`]`
`83`	`85`	`)`
`84`	`86`	`# csv files needs to be modified as follows before comparing to expected values:`