keras-team
diff --git a/‎keras_hub/src/layers/preprocessing/random_elastic_deformation_3d.py‎
Lines changed: 147 additions & 92 deletions b/‎keras_hub/src/layers/preprocessing/random_elastic_deformation_3d.py‎
Lines changed: 147 additions & 92 deletions
@@ -1,127 +1,182 @@
-import tensorflow as tf
+from keras import ops
+from keras import layers
+from keras import random
 
-class RandomElasticDeformation3D(tf.keras.layers.Layer):
+class RandomElasticDeformation3D(layers.Layer):
     """
-    A high-performance 3D elastic deformation layer optimized for TPUs and GPUs.
-    ... (docstring is the same) ...
+    A high-performance 3D elastic deformation layer optimized for TPUs.
+    
+    This implementation leverages the layer's compute_dtype (e.g., bfloat16) 
+    to potentially halve memory bandwidth requirements and uses a vectorized 
+    mapping for maximum speed.
     """
     def __init__(self,
                  grid_size=(4, 4, 4),
                  alpha=35.0,
                  sigma=2.5,
-                 data_format="DHWC",
+                 data_format="channels_last",
                  **kwargs):
         super().__init__(**kwargs)
+
         self.grid_size = grid_size
-        self.alpha = tf.constant(alpha, dtype=tf.bfloat16)
-        self.sigma = tf.constant(sigma, dtype=tf.bfloat16)
-        if data_format not in ["DHWC", "HWDC"]:
-            raise ValueError("`data_format` must be one of 'DHWC' or 'HWDC'")
+        self.alpha = alpha
+        self.sigma = sigma
         self.data_format = data_format
-
-    def _separable_gaussian_filter_3d(self, tensor, sigma):
-
-        kernel_size = tf.cast(2 * tf.round(3 * sigma) + 1, dtype=tf.int32)
-        ax = tf.range(-tf.cast(kernel_size // 2, tf.bfloat16) + 1.0,
-                      tf.cast(kernel_size // 2, tf.bfloat16) + 1.0)
-        kernel_1d = tf.exp(-(ax**2) / (2.0 * self.sigma**2))
-        kernel_1d = kernel_1d / tf.reduce_sum(kernel_1d)
-        filter_d = tf.cast(tf.reshape(kernel_1d, [-1, 1, 1, 1, 1]), dtype=tensor.dtype)
-        filter_h = tf.cast(tf.reshape(kernel_1d, [1, -1, 1, 1, 1]), dtype=tensor.dtype)
-        filter_w = tf.cast(tf.reshape(kernel_1d, [1, 1, -1, 1, 1]), dtype=tensor.dtype)
-        tensor = tf.nn.convolution(tensor, filter_d, strides=1, padding='SAME')
-        tensor = tf.nn.convolution(tensor, filter_h, strides=1, padding='SAME')
-        tensor = tf.nn.convolution(tensor, filter_w, strides=1, padding='SAME')
+        if data_format not in ["channels_last", "channels_first"]:
+            raise ValueError(
+                "`data_format` must be one of 'channels_last' or "
+                f"'channels_first'. Received: {data_format}"
+            )
+            
+    def build(self, input_shape):
+        """Create tensor state in build to respect the layer's dtype."""
+        self._alpha_tensor = ops.convert_to_tensor(self.alpha, dtype=self.compute_dtype)
+        self._sigma_tensor = ops.convert_to_tensor(self.sigma, dtype=self.compute_dtype)
+        
+        # Pre-compute the 1D Gaussian kernel
+        kernel_size = ops.cast(2 * ops.round(3 * self._sigma_tensor) + 1, dtype="int32")
+        ax = ops.arange(-ops.cast(kernel_size // 2, self.compute_dtype) + 1.0,
+                        ops.cast(kernel_size // 2, self.compute_dtype) + 1.0)
+        kernel_1d = ops.exp(-(ax**2) / (2.0 * self._sigma_tensor**2))
+        self.kernel_1d = kernel_1d / ops.sum(kernel_1d)
+        self.built = True
+
+    def _separable_gaussian_filter_3d(self, tensor):
+        """Apply a 3D Gaussian filter using separable 1D convolutions."""
+        depth_kernel = ops.reshape(self.kernel_1d, (-1, 1, 1, 1, 1))
+        tensor = ops.conv(tensor, ops.cast(depth_kernel, dtype=tensor.dtype), padding='same')
+
+        height_kernel = ops.reshape(self.kernel_1d, (1, -1, 1, 1, 1))
+        tensor = ops.conv(tensor, ops.cast(height_kernel, dtype=tensor.dtype), padding='same')
+
+        width_kernel = ops.reshape(self.kernel_1d, (1, 1, -1, 1, 1))
+        tensor = ops.conv(tensor, ops.cast(width_kernel, dtype=tensor.dtype), padding='same')
+        
         return tensor
 
     def call(self, inputs):
         image_volume, label_volume = inputs
         original_image_dtype = image_volume.dtype
+        original_label_dtype = label_volume.dtype
+        compute_dtype = self.compute_dtype
 
         was_batched = True
-        if image_volume.shape.rank == 4:
+        if len(image_volume.shape) == 4:
             was_batched = False
-            image_volume = tf.expand_dims(image_volume, axis=0)
-            label_volume = tf.expand_dims(label_volume, axis=0)
+            image_volume = ops.expand_dims(image_volume, axis=0)
+            label_volume = ops.expand_dims(label_volume, axis=0)
 
-        if self.data_format == "HWDC":
-            image_volume = tf.transpose(image_volume, perm=[0, 3, 1, 2, 4])
-            label_volume = tf.transpose(label_volume, perm=[0, 3, 1, 2, 4])
+        image_volume = ops.cast(image_volume, dtype=compute_dtype)
+        label_volume = ops.cast(label_volume, dtype=compute_dtype)
 
-        image_volume = tf.cast(image_volume, dtype=tf.bfloat16)
-        input_shape = tf.shape(image_volume)
+        input_shape = ops.shape(image_volume)
         B, D, H, W = input_shape[0], input_shape[1], input_shape[2], input_shape[3]
+        C = input_shape[4]
 
-        coarse_flow = tf.random.uniform(
+        # 1. Create a coarse random flow field.
+        coarse_flow = random.uniform(
             shape=(B, self.grid_size[0], self.grid_size[1], self.grid_size[2], 3),
-            minval=-1, maxval=1, dtype=tf.bfloat16)
-
-        flow = tf.reshape(coarse_flow, [B * self.grid_size[0], self.grid_size[1], self.grid_size[2], 3])
-        flow = tf.image.resize(flow, size=[H, W], method='bicubic')
-        flow = tf.reshape(flow, [B, self.grid_size[0], H, W, 3])
-        flow = tf.transpose(flow, perm=[0, 2, 3, 1, 4])
-        flow = tf.reshape(flow, [B * H * W, self.grid_size[0], 3])
-        flow = tf.image.resize(tf.expand_dims(flow, axis=1), size=[1, D], method='bicubic')
-        flow = tf.squeeze(flow, axis=1)
-        flow = tf.reshape(flow, [B, H, W, D, 3])
-        flow = tf.transpose(flow, perm=[0, 3, 1, 2, 4])
-
+            minval=-1, maxval=1, dtype=compute_dtype
+        )
 
-        flow = tf.cast(flow, dtype=tf.bfloat16)
-        
-        flow_components = tf.unstack(flow, axis=-1)
+        # 2. Upsample the flow field.
+        flow = coarse_flow
+        flow_shape = ops.shape(flow)
+        flow = ops.reshape(flow, (flow_shape[0] * flow_shape[1], flow_shape[2], flow_shape[3], 3))
+        flow = ops.image.resize(flow, (H, W), interpolation="bicubic")
+        flow = ops.reshape(flow, (flow_shape[0], flow_shape[1], H, W, 3))
+        flow = ops.transpose(flow, (0, 2, 3, 1, 4))
+        flow_shape = ops.shape(flow)
+        flow = ops.reshape(flow, (flow_shape[0] * flow_shape[1] * flow_shape[2], flow_shape[3], 1, 3))
+        flow = ops.image.resize(flow, (D, 1), interpolation="bicubic")
+        flow = ops.reshape(flow, (flow_shape[0], flow_shape[1], flow_shape[2], D, 3))
+        flow = ops.transpose(flow, (0, 3, 1, 2, 4))
+
+        # 3. Apply Gaussian smoothing.
+        flow_components = ops.unstack(flow, axis=-1)
         smoothed_components = []
         for component in flow_components:
-            smoothed_component = self._separable_gaussian_filter_3d(
-                component[..., tf.newaxis], self.sigma
-            )
-            smoothed_components.append(smoothed_component[..., 0])
-        smoothed_flow = tf.stack(smoothed_components, axis=-1)
+            component_reshaped = ops.expand_dims(component, axis=-1)
+            smoothed_component = self._separable_gaussian_filter_3d(component_reshaped)
+            smoothed_components.append(ops.squeeze(smoothed_component, axis=-1))
+        smoothed_flow = ops.stack(smoothed_components, axis=-1)
 
-
-        flow = smoothed_flow * self.alpha
-
-        grid_d, grid_h, grid_w = tf.meshgrid(
-            tf.range(D, dtype=tf.bfloat16),
-            tf.range(H, dtype=tf.bfloat16),
-            tf.range(W, dtype=tf.bfloat16),
+        # 4. Scale the flow field and create warp grid.
+        flow = smoothed_flow * self._alpha_tensor
+        grid_d, grid_h, grid_w = ops.meshgrid(
+            ops.arange(D, dtype=compute_dtype),
+            ops.arange(H, dtype=compute_dtype),
+            ops.arange(W, dtype=compute_dtype),
             indexing='ij'
         )
-        grid = tf.stack([grid_d, grid_h, grid_w], axis=-1)
+        grid = ops.stack([grid_d, grid_h, grid_w], axis=-1)
+        warp_grid = ops.expand_dims(grid, 0) + flow
 
 
-        warp_grid = tf.expand_dims(grid, 0) + flow
+        batched_coords = ops.transpose(warp_grid, (0, 4, 1, 2, 3))
+
+
+        deformed_images_batched = []
+        for i in range(B):
+
+            image_slice = image_volume[i] 
+            coords = batched_coords[i]      
+
+ 
+            image_slice_transposed = ops.transpose(image_slice, (3, 0, 1, 2))
+            
+            deformed_channels = []
+            for c in range(C):
+
+                deformed_channel = ops.image.map_coordinates(
+                    image_slice_transposed[c], coords, order=1
+                )
+                deformed_channels.append(deformed_channel)
+            
+            # Stack and transpose back to (D, H, W, C)
+            deformed_image_slice = ops.stack(deformed_channels, axis=0)
+            deformed_images_batched.append(ops.transpose(deformed_image_slice, (1, 2, 3, 0)))
+
+        deformed_image = ops.stack(deformed_images_batched, axis=0)
+
+        # Process Labels: loop over the batch dimension.
+        deformed_labels_batched = []
+        for i in range(B):
+            label_slice = label_volume[i] 
+            coords = batched_coords[i]     
+            
+
+            label_channel = ops.squeeze(label_slice, axis=-1)
+            deformed_label_channel = ops.image.map_coordinates(
+                label_channel, coords, order=0
+            )
+
+            deformed_labels_batched.append(ops.expand_dims(deformed_label_channel, axis=-1))
+
+        deformed_label = ops.stack(deformed_labels_batched, axis=0)
 
-        warp_grid_floor = tf.floor(warp_grid)
-        t = warp_grid - warp_grid_floor
-
-        d0 = tf.cast(warp_grid_floor[..., 0], tf.int32); h0 = tf.cast(warp_grid_floor[..., 1], tf.int32); w0 = tf.cast(warp_grid_floor[..., 2], tf.int32)
-        d1 = tf.clip_by_value(d0 + 1, 0, D - 1); h1 = tf.clip_by_value(h0 + 1, 0, H - 1); w1 = tf.clip_by_value(w0 + 1, 0, W - 1)
-        d0 = tf.clip_by_value(d0, 0, D - 1); h0 = tf.clip_by_value(h0, 0, H - 1); w0 = tf.clip_by_value(w0, 0, W - 1)
-
-        c000 = tf.gather_nd(image_volume, tf.stack([d0, h0, w0], axis=-1), batch_dims=1); c001 = tf.gather_nd(image_volume, tf.stack([d0, h0, w1], axis=-1), batch_dims=1)
-        c010 = tf.gather_nd(image_volume, tf.stack([d0, h1, w0], axis=-1), batch_dims=1); c011 = tf.gather_nd(image_volume, tf.stack([d0, h1, w1], axis=-1), batch_dims=1)
-        c100 = tf.gather_nd(image_volume, tf.stack([d1, h0, w0], axis=-1), batch_dims=1); c101 = tf.gather_nd(image_volume, tf.stack([d1, h0, w1], axis=-1), batch_dims=1)
-        c110 = tf.gather_nd(image_volume, tf.stack([d1, h1, w0], axis=-1), batch_dims=1); c111 = tf.gather_nd(image_volume, tf.stack([d1, h1, w1], axis=-1), batch_dims=1)
-
-        td, th, tw = t[..., 0:1], t[..., 1:2], t[..., 2:3]
-        c00 = c000*(1-tw) + c001*tw; c01 = c010*(1-tw) + c011*tw; c10 = c100*(1-tw) + c101*tw; c11 = c110*(1-tw) + c111*tw
-        c0 = c00*(1-th) + c01*th; c1 = c10*(1-th) + c11*th
-        deformed_image = c0*(1-td) + c1*td
-        deformed_image = tf.cast(deformed_image, original_image_dtype)
-
-        nearest_indices_float = tf.round(warp_grid)
-        nearest_d = tf.clip_by_value(tf.cast(nearest_indices_float[..., 0], tf.int32), 0, D - 1)
-        nearest_h = tf.clip_by_value(tf.cast(nearest_indices_float[..., 1], tf.int32), 0, H - 1)
-        nearest_w = tf.clip_by_value(tf.cast(nearest_indices_float[..., 2], tf.int32), 0, W - 1)
-        deformed_label = tf.gather_nd(label_volume, tf.stack([nearest_d, nearest_h, nearest_w], axis=-1), batch_dims=1)
-
-        if self.data_format == "HWDC":
-            deformed_image = tf.transpose(deformed_image, perm=[0, 2, 3, 1, 4])
-            deformed_label = tf.transpose(deformed_label, perm=[0, 2, 3, 1, 4])
 
-        if not was_batched:
-            deformed_image = tf.squeeze(deformed_image, axis=0)
-            deformed_label = tf.squeeze(deformed_label, axis=0)
 
-        return deformed_image, deformed_label
+        deformed_image = ops.cast(deformed_image, original_image_dtype)
+        deformed_label = ops.cast(deformed_label, original_label_dtype)
+
+        if not was_batched:
+            deformed_image = ops.squeeze(deformed_image, axis=0)
+            deformed_label = ops.squeeze(deformed_label, axis=0)
+
+        return deformed_image, deformed_label
+
+    def compute_output_shape(self, input_shape):
+        """Computes the output shape of the layer."""
+        image_shape, label_shape = input_shape
+        return image_shape, label_shape
+
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "grid_size": self.grid_size,
+            "alpha": self.alpha,
+            "sigma": self.sigma,
+            "data_format": self.data_format,
+        })
+        return config