Fix Binary Output

den-sq · den-sq · commit dcd42fe214ee · 2025-09-11T21:05:46.000-04:00
- Binary output flag is now read and causes a 2-valued (0/1) uint8 write, as compressed direct boolean tiff writes are incompatible with some software.   This should also match old behavior.
- Handles filesize calculation correctly for this.
- Added np_convert normalization behavior for conversion to float.
- Added safe_bool conversion (2-valued uint8) to np_convert
- Added error handling for loading straightened volume.
- Expanded np_convert tests
diff --git a/python/ouroboros/helpers/files.py b/python/ouroboros/helpers/files.py
@@ -1,13 +1,11 @@
 from functools import partial
 from multiprocessing.pool import ThreadPool
 import os
-import shutil
-from threading import Thread
 
 import numpy as np
 from numpy.typing import ArrayLike
 from pathlib import Path
-from tifffile import imread, TiffWriter, TiffFile
+from tifffile import TiffWriter, TiffFile
 import time
 
 from .shapes import DataShape
@@ -123,24 +121,33 @@ def num_digits_for_n_files(n: int) -> int:
     return len(str(n - 1))
 
 
-def np_convert(dtype: np.dtype, source: ArrayLike, normalize=True):
-    if not normalize:
-        return source.astype(dtype)
-    if np.issubdtype(dtype, np.integer):
-        dtype_range = np.iinfo(dtype).max - np.iinfo(dtype).min
+def np_convert(target_dtype: np.dtype, source: ArrayLike, normalize=True, safe_bool=False):
+    """ TODO: Fix for Negative Values """
+    if safe_bool and target_dtype == bool:
+        return source.astype(target_dtype).astype(np.uint8)
+    elif np.issubdtype(target_dtype, np.integer) and normalize:
+        dtype_range = np.iinfo(target_dtype).max - np.iinfo(target_dtype).min
         source_range = np.max(source) - np.min(source)
 
         # Avoid divide by 0, esp. as numpy segfaults when you do.
         if source_range == 0.0:
             source_range = 1.0
 
-        return (source * max(int(dtype_range / source_range), 1)).astype(dtype)
-    elif np.issubdtype(dtype, np.floating):
-        return source.astype(dtype)
+        return (source * max(int(dtype_range / source_range), 1)).astype(target_dtype)
+    elif np.issubdtype(target_dtype, np.floating) and normalize:
+        source_range = np.max(source) - np.min(source)
+
+        # Avoid divide by 0, esp. as numpy segfaults when you do.
+        if source_range == 0.0:
+            source_range = 1.0
+
+        return (source / source_range).astype(target_dtype)
+    else:
+        return source.astype(target_dtype)
 
 
 def generate_tiff_write(write_func: callable, compression: str | None, micron_resolution: np.ndarray[float],
-               backprojection_offset: np.ndarray, **kwargs):
+                        backprojection_offset: np.ndarray, **kwargs):
     # Volume cache resolution is in voxel size, but .tiff XY resolution is in voxels per unit, so we invert.
     resolution = [1.0 / voxel_size for voxel_size in micron_resolution[:2] * 0.0001]
     resolutionunit = "CENTIMETER"
@@ -217,6 +224,6 @@ def write_conv_vol(writer: callable, source_path, shape, dtype, *args, **kwargs)
     vol = volume_from_intermediates(source_path, shape)
     perf["Merge Volume"] = time.perf_counter() - start
     start = time.perf_counter()
-    writer(*args, data=np_convert(dtype, vol.reshape(shape.Y, shape.X), False), **kwargs)
+    writer(*args, data=np_convert(dtype, vol.reshape(shape.Y, shape.X), normalize=False, safe_bool=True), **kwargs)
     perf["Write Merged"] = time.perf_counter() - start
     return perf
diff --git a/python/ouroboros/pipeline/backproject_pipeline.py b/python/ouroboros/pipeline/backproject_pipeline.py
@@ -127,18 +127,25 @@ def _process(self, input_data: any) -> tuple[any, None] | tuple[None, any]:
         print(f"\nFront Projection Shape: {FPShape}")
         print(f"\nBack Projection Shape (Z/Y/X):{write_shape}")
 
-        pipeline_input.output_file_path = f"{config.output_file_name}_{'_'.join(map(str, full_bounding_box.get_min(np.uint32)))}"
+        pipeline_input.output_file_path = (f"{config.output_file_name}_"
+                                           f"{'_'.join(map(str, full_bounding_box.get_min(np.uint32)))}")
         folder_path = Path(config.output_file_folder, pipeline_input.output_file_path)
         folder_path.mkdir(exist_ok=True, parents=True)
 
         i_path = Path(config.output_file_folder,
                       f"{config.output_file_name}_t_{'_'.join(map(str, full_bounding_box.get_min(np.uint32)))}")
 
         if config.make_single_file:
-            is_big_tiff = calculate_gigabytes_from_dimensions(np.prod(write_shape), np.uint16) > 4     # Check Dtype
+            is_big_tiff = calculate_gigabytes_from_dimensions(
+                            np.prod(write_shape),
+                            np.uint8 if config.make_backprojection_binary else np.uint16) > 4
         else:
-            is_big_tiff = calculate_gigabytes_from_dimensions(np.prod(write_shape[1:]), np.uint16) > 4     # Check Dtype
+            is_big_tiff = calculate_gigabytes_from_dimensions(
+                            np.prod(write_shape[1:]),
+                            np.uint8 if config.make_backprojection_binary else np.uint16) > 4
 
+        # Generate image writing function
+        # Combining compression with binary images can cause issues.
         bp_offset = pipeline_input.backprojection_offset if config.backproject_min_bounding_box else None
         tif_write = partial(generate_tiff_write,
                             compression=config.backprojection_compression,
@@ -205,7 +212,9 @@ def note_written(write_future):
                             write_futures.append(write_executor.submit(
                                 write_conv_vol,
                                 tif_write(tifffile.imwrite), i_path.joinpath(f"i_{index:05}"),
-                                ImgSlice(*write_shape[1:]), np.uint16, folder_path.joinpath(f"{index:05}.tif")
+                                ImgSlice(*write_shape[1:]),
+                                bool if config.make_backprojection_binary else np.uint16,
+                                folder_path.joinpath(f"{index:05}.tif")
                             ))
                             write_futures[-1].add_done_callback(note_written)
 
@@ -285,8 +294,13 @@ def process_chunk(
     start_total = time.perf_counter()
 
     # Load the straightened volume
-    straightened_volume = tifffile.memmap(straightened_volume_path, mode="r")
-    durations["memmap"] = [time.perf_counter() - start_total]
+    try:
+        straightened_volume = tifffile.memmap(straightened_volume_path, mode="r")
+        durations["memmap"] = [time.perf_counter() - start_total]
+    except BaseException as be:
+        print(f"Error loading Volume: {be} : {straightened_volume_path}")
+        traceback.print_tb(be.__traceback__, file=sys.stderr)
+        raise be
 
     # Get the slices from the straightened volume  Dumb but maybe bugfix?
     start = time.perf_counter()
diff --git a/python/test/helpers/test_files.py b/python/test/helpers/test_files.py
@@ -2,7 +2,6 @@
 from pathlib import Path
 
 import numpy as np
-from tifffile import imwrite, TiffFile
 
 from ouroboros.helpers.files import (
     format_backproject_output_file,
@@ -169,14 +168,6 @@ def test_num_digits_for_n_files():
     assert result == 2
 
 
-def test_np_convert():
-    float_data = np.linspace(0, 1, 16)
-    int_data = np_convert(np.uint16, float_data)
-
-    assert np.all(int_data == np.arange(0, np.iinfo(np.uint16).max + 1, np.iinfo(np.uint16).max // 15))
-    assert np.all(np_convert(np.float32, int_data) == int_data.astype(np.float32))
-
-
 def test_generate_tiff_write(tmp_path):
     micron_resolution = np.array([0.7, 0.7, 0.7])
     backprojection_offset = (55, 44, 77)
@@ -284,10 +275,52 @@ def test_increment_volume(tmp_path):
     assert np.allclose(volume[1, mapped_source[0]], np.sum(source_weights[[0, 2]]))
     assert np.all(np.nonzero(volume)[0] == np.array([0, 0, 1, 1]))
     assert np.all(np.nonzero(volume)[1] == np.array([3947, 3952, 3947, 3952]))
-    
+
     assert not sample_path.exists()
 
 
+def test_np_convert_from_int():
+    base = np.random.randint(0, 10, 6400).reshape(80, 80)
+
+    # Direct Conversion
+    assert np.all(np_convert(np.float32, base, normalize=False) == base.astype(np.float32))
+
+    # Normalized Conversion
+    assert np.all(np_convert(np.float32, base) == base.astype(np.float32) / (np.max(base) - np.min(base)))
+
+    # Safe Bool
+    safe_bool = np_convert(bool, base, safe_bool=True)
+    assert safe_bool.dtype == np.uint8
+    assert np.all(safe_bool == (base > 0))
+
+    # Unsafe Bool - Raw bool datatype
+    safe_bool = np_convert(bool, base, safe_bool=False)
+    assert safe_bool.dtype == bool
+    assert np.all(safe_bool == (base > 0))
+
+
+def test_np_convert_from_float():
+    base = np.random.randint(0, 16, 6400).reshape(80, 80) * np.random.rand(80, 80)
+    float_data = np.linspace(0, 1, 16)
+
+    # Direct Conversion
+    assert np.all(np_convert(np.uint16, float_data, normalize=False) == [0] * 15 + [1])
+
+    # Normalized Conversion
+    assert np.all(np_convert(np.uint16, float_data) ==
+                  np.arange(0, np.iinfo(np.uint16).max + 1, np.iinfo(np.uint16).max // 15))
+
+    # Safe Bool
+    safe_bool = np_convert(bool, base, safe_bool=True)
+    assert safe_bool.dtype == np.uint8
+    assert np.all(safe_bool == (base > 0))
+
+    # Unsafe Bool - Raw bool datatype
+    safe_bool = np_convert(bool, base, safe_bool=False)
+    assert safe_bool.dtype == bool
+    assert np.all(safe_bool == (base > 0))
+
+
 def test_volume_from_intermediates():
     pass