open-edge-platform
diff --git a/‎src/otx/data/transform_libs/torchvision.py‎
Lines changed: 53 additions & 0 deletions b/‎src/otx/data/transform_libs/torchvision.py‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎src/otx/recipe/_base_/data/classification.yaml‎
Lines changed: 4 additions & 3 deletions b/‎src/otx/recipe/_base_/data/classification.yaml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/otx/recipe/_base_/data/detection.yaml‎
Lines changed: 4 additions & 3 deletions b/‎src/otx/recipe/_base_/data/detection.yaml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/otx/recipe/_base_/data/detection_tile.yaml‎
Lines changed: 4 additions & 3 deletions b/‎src/otx/recipe/_base_/data/detection_tile.yaml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/otx/recipe/_base_/data/instance_segmentation.yaml‎
Lines changed: 4 additions & 3 deletions b/‎src/otx/recipe/_base_/data/instance_segmentation.yaml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/otx/recipe/_base_/data/keypoint_detection.yaml‎
Lines changed: 2 additions & 0 deletions b/‎src/otx/recipe/_base_/data/keypoint_detection.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/otx/recipe/_base_/data/semantic_segmentation.yaml‎
Lines changed: 2 additions & 0 deletions b/‎src/otx/recipe/_base_/data/semantic_segmentation.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/otx/recipe/_base_/data/semantic_segmentation_tile.yaml‎
Lines changed: 2 additions & 0 deletions b/‎src/otx/recipe/_base_/data/semantic_segmentation_tile.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml‎
Lines changed: 3 additions & 3 deletions b/‎src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml‎
Lines changed: 3 additions & 3 deletions b/‎src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml‎
Lines changed: 3 additions & 3 deletions
@@ -26,6 +26,7 @@
 from scipy.stats import truncnorm
 from torchvision import tv_tensors
 from torchvision._utils import sequence_to_str
+from torchvision.transforms.v2 import GaussianBlur, GaussianNoise
 from torchvision.transforms.v2 import functional as F  # noqa: N812
 
 from otx.data.entity.base import (
@@ -903,6 +904,58 @@ def __repr__(self) -> str:
         return repr_str
 
 
+class RandomGaussianBlur(GaussianBlur):
+    """Modified version of the torchvision GaussianBlur."""
+
+    def __init__(
+        self,
+        kernel_size: int | Sequence[int],
+        sigma: int | tuple[float, float] = (0.1, 2.0),
+        prob: float = 0.5,
+    ) -> None:
+        super().__init__(kernel_size=kernel_size, sigma=sigma)
+        self.prob = prob
+
+    def transform(self, inpt: torch.Tensor, params: dict[str, Any]) -> torch.Tensor:
+        """Main transform function."""
+        if self.prob >= np.random.rand():
+            return super().transform(inpt, params)
+        return inpt
+
+
+class RandomGaussianNoise(GaussianNoise):
+    """Modified version of the torchvision GaussianNoise.
+
+    This augmentation allows to add gaussian noise to unscaled image.
+    Only float32 images are supported for this augmentation.
+    """
+
+    def __init__(self, mean: float = 0.0, sigma: float = 0.1, clip: bool = True, prob: float = 0.5) -> None:
+        super().__init__(mean=mean, sigma=sigma, clip=clip)
+        self.prob = prob
+
+    def _is_scaled(self, tensor: torch.Tensor) -> bool:
+        return torch.max(tensor) <= 1 + 1e-5
+
+    def forward(self, *_inputs: OTXDataItem) -> OTXDataItem:
+        """Main transform function."""
+        assert len(_inputs) == 1, "[tmp] Multiple entity is not supported yet."  # noqa: S101
+        inputs = _inputs[0]
+        if (img := getattr(inputs, "image", None)) is not None and self.prob >= np.random.rand():
+            scaled = self._is_scaled(img)
+            sigma = self.sigma * 255 if not scaled else self.sigma
+            mean = self.mean * 255 if not scaled else self.mean
+            clip = False if not scaled else self.clip
+
+            img = self._call_kernel(F.gaussian_noise, img, mean=mean, sigma=sigma, clip=clip)
+            if not scaled:
+                img = torch.clamp(img, 0, 255)
+
+            inputs.image = img
+
+        return inputs
+
+
 class PhotoMetricDistortion(tvt_v2.Transform, NumpytoTVTensorMixin):
     """Implementation of mmdet.datasets.transforms.PhotoMetricDistortion with torchvision format.
 
 
@@ -26,20 +26,21 @@ train_subset:
         is_numpy_to_tvtensor: true
     - class_path: torchvision.transforms.v2.RandomVerticalFlip
       enable: false
-    - class_path: torchvision.transforms.v2.GaussianBlur
+    - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
       enable: false
       init_args:
         kernel_size: 5
     - class_path: torchvision.transforms.v2.ToDtype
       init_args:
         dtype: ${as_torch_dtype:torch.float32}
         scale: false
+    - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
+      enable: false
     - class_path: torchvision.transforms.v2.Normalize
       init_args:
         mean: [123.675, 116.28, 103.53]
         std: [58.395, 57.12, 57.375]
-    - class_path: torchvision.transforms.v2.GaussianNoise
-      enable: false
+
   sampler:
     class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
 
@@ -27,19 +27,20 @@ train_subset:
         is_numpy_to_tvtensor: true
     - class_path: torchvision.transforms.v2.RandomVerticalFlip
       enable: false
-    - class_path: torchvision.transforms.v2.GaussianBlur
+    - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
       enable: false
       init_args:
         kernel_size: 5
     - class_path: torchvision.transforms.v2.ToDtype
       init_args:
         dtype: ${as_torch_dtype:torch.float32}
+    - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
+      enable: false
     - class_path: torchvision.transforms.v2.Normalize
       init_args:
         mean: [0.0, 0.0, 0.0]
         std: [255.0, 255.0, 255.0]
-    - class_path: torchvision.transforms.v2.GaussianNoise
-      enable: false
+
   sampler:
     class_path: torch.utils.data.RandomSampler
 
 
@@ -30,19 +30,20 @@ train_subset:
         is_numpy_to_tvtensor: true
     - class_path: torchvision.transforms.v2.RandomVerticalFlip
       enable: false
-    - class_path: torchvision.transforms.v2.GaussianBlur
+    - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
       enable: false
       init_args:
         kernel_size: 5
     - class_path: torchvision.transforms.v2.ToDtype
       init_args:
         dtype: ${as_torch_dtype:torch.float32}
+    - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
+      enable: false
     - class_path: torchvision.transforms.v2.Normalize
       init_args:
         mean: [0.0, 0.0, 0.0]
         std: [255.0, 255.0, 255.0]
-    - class_path: torchvision.transforms.v2.GaussianNoise
-      enable: false
+
   sampler:
     class_path: torch.utils.data.RandomSampler
 
 
@@ -33,19 +33,20 @@ train_subset:
         is_numpy_to_tvtensor: true
     - class_path: torchvision.transforms.v2.RandomVerticalFlip
       enable: false
-    - class_path: torchvision.transforms.v2.GaussianBlur
+    - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
       enable: false
       init_args:
         kernel_size: 5
     - class_path: torchvision.transforms.v2.ToDtype
       init_args:
         dtype: ${as_torch_dtype:torch.float32}
+    - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
+      enable: false
     - class_path: torchvision.transforms.v2.Normalize
       init_args:
         mean: [123.675, 116.28, 103.53]
         std: [58.395, 57.12, 57.375]
-    - class_path: torchvision.transforms.v2.GaussianNoise
-      enable: false
+
   sampler:
     class_path: torch.utils.data.RandomSampler
 
 
@@ -18,6 +18,8 @@ train_subset:
     - class_path: torchvision.transforms.v2.ToDtype
       init_args:
         dtype: ${as_torch_dtype:torch.float32}
+    - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
+      enable: false
     - class_path: torchvision.transforms.v2.Normalize
       init_args:
         mean: [123.675, 116.28, 103.53]
 
@@ -32,6 +32,8 @@ train_subset:
     - class_path: torchvision.transforms.v2.ToDtype
       init_args:
         dtype: ${as_torch_dtype:torch.float32}
+    - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
+      enable: false
     - class_path: torchvision.transforms.v2.Normalize
       init_args:
         mean: [123.675, 116.28, 103.53]
 
@@ -34,6 +34,8 @@ train_subset:
     - class_path: torchvision.transforms.v2.ToDtype
       init_args:
         dtype: ${as_torch_dtype:torch.float32}
+    - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
+      enable: false
     - class_path: torchvision.transforms.v2.Normalize
       init_args:
         mean: [123.675, 116.28, 103.53]
 
@@ -68,16 +68,16 @@ overrides:
             is_numpy_to_tvtensor: true
         - class_path: torchvision.transforms.v2.RandomVerticalFlip
           enable: false
-        - class_path: torchvision.transforms.v2.GaussianBlur
+        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
           enable: false
           init_args:
             kernel_size: 5
-        - class_path: torchvision.transforms.v2.GaussianNoise
-          enable: false
         - class_path: torchvision.transforms.v2.ToDtype
           init_args:
             dtype: ${as_torch_dtype:torch.float32}
             scale: false
+        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
+          enable: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
             mean: [123.675, 116.28, 103.53]
 
@@ -72,16 +72,16 @@ overrides:
             is_numpy_to_tvtensor: true
         - class_path: torchvision.transforms.v2.RandomVerticalFlip
           enable: false
-        - class_path: torchvision.transforms.v2.GaussianBlur
+        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
           enable: false
           init_args:
             kernel_size: 5
-        - class_path: torchvision.transforms.v2.GaussianNoise
-          enable: false
         - class_path: torchvision.transforms.v2.ToDtype
           init_args:
             dtype: ${as_torch_dtype:torch.float32}
             scale: false
+        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
+          enable: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
             mean: [123.675, 116.28, 103.53]