open-edge-platform
diff --git a/‎otx/algorithms/common/configs/training_base.py‎
Lines changed: 31 additions & 2 deletions b/‎otx/algorithms/common/configs/training_base.py‎
Lines changed: 31 additions & 2 deletions
diff --git a/‎otx/algorithms/detection/adapters/mmdet/datasets/dataset.py‎
Lines changed: 16 additions & 20 deletions b/‎otx/algorithms/detection/adapters/mmdet/datasets/dataset.py‎
Lines changed: 16 additions & 20 deletions
diff --git a/‎otx/algorithms/detection/adapters/mmdet/datasets/tiling.py‎
Lines changed: 54 additions & 31 deletions b/‎otx/algorithms/detection/adapters/mmdet/datasets/tiling.py‎
Lines changed: 54 additions & 31 deletions
diff --git a/‎otx/algorithms/detection/adapters/mmdet/hooks/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎otx/algorithms/detection/adapters/mmdet/hooks/__init__.py‎
Lines changed: 2 additions & 1 deletion
@@ -220,6 +220,13 @@ class BasePostprocessing(ParameterGroup):
             affects_outcome_of=ModelLifecycle.INFERENCE,
         )
 
+        use_ellipse_shapes = configurable_boolean(
+            default_value=False,
+            header="Use ellipse shapes",
+            description="Use direct ellipse shape in inference instead of polygon from mask",
+            affects_outcome_of=ModelLifecycle.INFERENCE,
+        )
+
     @attrs
     class BaseNNCFOptimization(ParameterGroup):
         """BaseNNCFOptimization for OTX Algorithms."""
@@ -350,7 +357,7 @@ class BaseTilingParameters(ParameterGroup):
             description="Tile Image Size",
             default_value=400,
             min_value=100,
-            max_value=1024,
+            max_value=4096,
             affects_outcome_of=ModelLifecycle.NONE,
         )
 
@@ -368,7 +375,7 @@ class BaseTilingParameters(ParameterGroup):
             description="Max object per image",
             default_value=1500,
             min_value=1,
-            max_value=10000,
+            max_value=5000,
             affects_outcome_of=ModelLifecycle.NONE,
         )
 
@@ -388,4 +395,26 @@ class BaseTilingParameters(ParameterGroup):
             affects_outcome_of=ModelLifecycle.NONE,
         )
 
+        tile_sampling_ratio = configurable_float(
+            header="Sampling Ratio for entire tiling",
+            description="Since tiling train and validation to all tile from large image, "
+            "usually it takes lots of time than normal training."
+            "The tile_sampling_ratio is ratio for sampling entire tile dataset."
+            "Sampling tile dataset would save lots of time for training and validation time."
+            "Note that sampling will be applied to training and validation dataset, not test dataset.",
+            default_value=1.0,
+            min_value=0.000001,
+            max_value=1.0,
+            affects_outcome_of=ModelLifecycle.NONE,
+        )
+
+        object_tile_ratio = configurable_float(
+            header="Object tile ratio",
+            description="The desired ratio of min object size and tile size.",
+            default_value=0.03,
+            min_value=0.00,
+            max_value=1.00,
+            affects_outcome_of=ModelLifecycle.NONE,
+        )
+
     tiling_parameters = add_parameter_group(BaseTilingParameters)
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions
 # and limitations under the License.
 
-import tempfile
 from collections import OrderedDict
 from copy import copy
 from typing import Any, Dict, List, Sequence, Tuple, Union
@@ -32,6 +31,7 @@
 from otx.api.entities.dataset_item import DatasetItemEntity
 from otx.api.entities.datasets import DatasetEntity
 from otx.api.entities.label import Domain, LabelEntity
+from otx.api.entities.subset import Subset
 from otx.api.utils.shape_factory import ShapeFactory
 
 from .tiling import Tile
@@ -270,6 +270,7 @@ def evaluate(  # pylint: disable=too-many-branches
             if metric not in allowed_metrics:
                 raise KeyError(f"metric {metric} is not supported")
             annotations = [self.get_ann_info(i) for i in range(len(self))]
+            assert len(annotations) == len(results), "annotation length does not match prediction results"
             iou_thrs = [iou_thr] if isinstance(iou_thr, float) else iou_thr
             if metric == "mAP":
                 assert isinstance(iou_thrs, list)
@@ -302,7 +303,7 @@ def evaluate(  # pylint: disable=too-many-branches
 
 # pylint: disable=too-many-arguments
 @DATASETS.register_module()
-class ImageTilingDataset:
+class ImageTilingDataset(OTXDetDataset):
     """A wrapper of tiling dataset.
 
     Suitable for training small object dataset. This wrapper composed of `Tile`
@@ -326,6 +327,8 @@ class ImageTilingDataset:
             after NMS, only top max_per_img will be kept. Defaults to 200.
         max_annotation (int, optional): Limit the number of ground truth by
             randomly select 5000 due to RAM OOM. Defaults to 5000.
+        sampling_ratio (flaot): Ratio for sampling entire tile dataset.
+        include_full_img (bool): Whether to include full image in the dataset.
     """
 
     def __init__(
@@ -340,28 +343,29 @@ def __init__(
         max_annotation=5000,
         filter_empty_gt=True,
         test_mode=False,
+        sampling_ratio=1.0,
+        include_full_img=False,
     ):
         self.dataset = build_dataset(dataset)
         self.CLASSES = self.dataset.CLASSES
-        self.tmp_dir = tempfile.TemporaryDirectory()  # pylint: disable=consider-using-with
 
         self.tile_dataset = Tile(
             self.dataset,
             pipeline,
-            tmp_dir=self.tmp_dir,
             tile_size=tile_size,
             overlap=overlap_ratio,
             min_area_ratio=min_area_ratio,
             iou_threshold=iou_threshold,
             max_per_img=max_per_img,
             max_annotation=max_annotation,
-            filter_empty_gt=False if test_mode else filter_empty_gt,
+            filter_empty_gt=filter_empty_gt if self.dataset.otx_dataset[0].subset != Subset.TESTING else False,
+            sampling_ratio=sampling_ratio if self.dataset.otx_dataset[0].subset != Subset.TESTING else 1.0,
+            include_full_img=include_full_img if self.dataset.otx_dataset[0].subset != Subset.TESTING else True,
         )
         self.flag = np.zeros(len(self), dtype=np.uint8)
         self.pipeline = Compose(pipeline)
         self.test_mode = test_mode
         self.num_samples = len(self.dataset)  # number of original samples
-        self.merged_results: Union[List[Tuple[np.ndarray, list]], List[np.ndarray]] = []
 
     def __len__(self) -> int:
         """Get the length of the dataset."""
@@ -379,18 +383,16 @@ def __getitem__(self, idx: int) -> Dict:
         """
         return self.pipeline(self.tile_dataset[idx])
 
-    def evaluate(self, results, **kwargs) -> Dict[str, float]:
-        """Evaluation on Tile dataset.
+    def get_ann_info(self, idx):
+        """Get annotation information of a tile.
 
         Args:
-            results (list[list | tuple]): Testing results of the dataset.
-            **kwargs: Addition keyword arguments.
+            idx (int): Index of data.
 
         Returns:
-            dict[str, float]: evaluation metric.
+            dict: Annotation information of a tile.
         """
-        self.merged_results = self.tile_dataset.merge(results)
-        return self.dataset.evaluate(self.merged_results, **kwargs)
+        return self.tile_dataset.get_ann_info(idx)
 
     def merge(self, results) -> Union[List[Tuple[np.ndarray, list]], List[np.ndarray]]:
         """Merge tile-level results to image-level results.
@@ -401,10 +403,4 @@ def merge(self, results) -> Union[List[Tuple[np.ndarray, list]], List[np.ndarray
         Returns:
             merged_results (list[list | tuple]): Merged results of the dataset.
         """
-        self.merged_results = self.tile_dataset.merge(results)
-        return self.merged_results
-
-    def __del__(self):
-        """Delete the temporary directory when the object is deleted."""
-        if getattr(self, "tmp_dir", False):
-            self.tmp_dir.cleanup()
+        return self.tile_dataset.merge(results)
@@ -4,10 +4,10 @@
 #
 
 import copy
-import tempfile
 import uuid
 from itertools import product
 from multiprocessing import Pool
+from random import sample
 from time import time
 from typing import Callable, Dict, List, Tuple, Union
 
@@ -61,21 +61,24 @@ class Tile:
             only works when `test_mode=False`, i.e., we never filter images
             during tests. Defaults to True.
         nproc (int, optional): Processes used for processing masks. Default: 4.
+        sampling_ratio (float): Ratio for sampling entire tile dataset. Default: 1.0.(No sample)
+        include_full_img (bool): Whether to include full-size image for inference or training. Default: False.
     """
 
     def __init__(
         self,
         dataset,
         pipeline,
-        tmp_dir: tempfile.TemporaryDirectory,
         tile_size: int = 400,
         overlap: float = 0.2,
         min_area_ratio: float = 0.9,
         iou_threshold: float = 0.45,
         max_per_img: int = 1500,
-        max_annotation: int = 5000,
+        max_annotation: int = 2000,
         filter_empty_gt: bool = True,
         nproc: int = 2,
+        sampling_ratio: float = 1.0,
+        include_full_img: bool = False,
     ):
         self.min_area_ratio = min_area_ratio
         self.filter_empty_gt = filter_empty_gt
@@ -88,7 +91,6 @@ def __init__(
         self.num_images = len(dataset)
         self.num_classes = len(dataset.CLASSES)
         self.CLASSES = dataset.CLASSES  # pylint: disable=invalid-name
-        self.tmp_folder = tmp_dir.name
         self.nproc = nproc
         self.img2fp32 = False
         for p in pipeline:
@@ -97,15 +99,21 @@ def __init__(
                 break
 
         self.dataset = dataset
-        self.tiles, self.cached_results = self.gen_tile_ann()
+        self.tiles_all, self.cached_results = self.gen_tile_ann(include_full_img)
+        self.sample_num = max(int(len(self.tiles_all) * sampling_ratio), 1)
+        if sampling_ratio < 1.0:
+            self.tiles = sample(self.tiles_all, self.sample_num)
+        else:
+            self.tiles = self.tiles_all
 
     @timeit
-    def gen_tile_ann(self) -> Tuple[List[Dict], List[Dict]]:
+    def gen_tile_ann(self, include_full_img) -> Tuple[List[Dict], List[Dict]]:
         """Generate tile annotations and cache the original image-level annotations.
 
         Returns:
             tiles: a list of tile annotations with some other useful information for data pipeline.
             cache_result: a list of original image-level annotations.
+            include_full_img: whether to include full-size image for inference or training.
         """
         tiles = []
         cache_result = []
@@ -114,7 +122,8 @@ def gen_tile_ann(self) -> Tuple[List[Dict], List[Dict]]:
 
         pbar = tqdm(total=len(self.dataset) * 2, desc="Generating tile annotations...")
         for idx, result in enumerate(cache_result):
-            tiles.append(self.gen_single_img(result, dataset_idx=idx))
+            if include_full_img:
+                tiles.append(self.gen_single_img(result, dataset_idx=idx))
             pbar.update(1)
 
         for idx, result in enumerate(cache_result):
@@ -165,19 +174,19 @@ def gen_tiles_single_img(self, result: Dict, dataset_idx: int) -> List[Dict]:
         height, width = img_shape[:2]
         _tile = self.prepare_result(result)
 
-        num_patches_h = int((height - self.tile_size) / self.stride) + 1
-        num_patches_w = int((width - self.tile_size) / self.stride) + 1
+        num_patches_h = (height + self.stride - 1) // self.stride
+        num_patches_w = (width + self.stride - 1) // self.stride
         for (_, _), (loc_i, loc_j) in zip(
             product(range(num_patches_h), range(num_patches_w)),
             product(
-                range(0, height - self.tile_size + 1, self.stride),
-                range(0, width - self.tile_size + 1, self.stride),
+                range(0, height, self.stride),
+                range(0, width, self.stride),
             ),
         ):
             x_1 = loc_j
-            x_2 = loc_j + self.tile_size
+            x_2 = min(loc_j + self.tile_size, width)
             y_1 = loc_i
-            y_2 = loc_i + self.tile_size
+            y_2 = min(loc_i + self.tile_size, height)
             tile = copy.deepcopy(_tile)
             tile["original_shape_"] = img_shape
             tile["ori_shape"] = (y_2 - y_1, x_2 - x_1, 3)
@@ -191,6 +200,9 @@ def gen_tiles_single_img(self, result: Dict, dataset_idx: int) -> List[Dict]:
             if self.filter_empty_gt and len(tile["gt_labels"]) == 0:
                 continue
             tile_list.append(tile)
+        if dataset_idx == 0:
+            print(f"image: {height}x{width} ~ tile_size: {self.tile_size}")
+            print(f"{num_patches_h}x{num_patches_w} tiles -> {len(tile_list)} tiles after filtering")
         return tile_list
 
     def prepare_result(self, result: Dict) -> Dict:
@@ -233,12 +245,11 @@ def tile_ann_assignment(
             gt_labels (np.ndarray): the original image-level labels
         """
         x_1, y_1 = tile_box[0][:2]
-        overlap_ratio = self.tile_boxes_overlap(tile_box, gt_bboxes)
-        match_idx = np.where((overlap_ratio[0] >= self.min_area_ratio))[0]
+        matched_indices = self.tile_boxes_overlap(tile_box, gt_bboxes)
 
-        if len(match_idx):
-            tile_lables = gt_labels[match_idx][:]
-            tile_bboxes = gt_bboxes[match_idx][:]
+        if len(matched_indices):
+            tile_lables = gt_labels[matched_indices][:]
+            tile_bboxes = gt_bboxes[matched_indices][:]
             tile_bboxes[:, 0] -= x_1
             tile_bboxes[:, 1] -= y_1
             tile_bboxes[:, 2] -= x_1
@@ -249,7 +260,7 @@ def tile_ann_assignment(
             tile_bboxes[:, 3] = np.minimum(self.tile_size, tile_bboxes[:, 3])
             tile_result["gt_bboxes"] = tile_bboxes
             tile_result["gt_labels"] = tile_lables
-            tile_result["gt_masks"] = gt_masks[match_idx].crop(tile_box[0]) if gt_masks is not None else []
+            tile_result["gt_masks"] = gt_masks[matched_indices].crop(tile_box[0]) if gt_masks is not None else []
         else:
             tile_result.pop("bbox_fields")
             tile_result.pop("mask_fields")
@@ -270,18 +281,12 @@ def tile_boxes_overlap(self, tile_box: np.ndarray, boxes: np.ndarray) -> np.ndar
             boxes (np.ndarray): boxes in shape (N, 4).
 
         Returns:
-            np.ndarray: overlapping ratio over boxes
+            np.ndarray: matched indices.
         """
-        box_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
-
-        width_height = np.minimum(tile_box[:, None, 2:], boxes[:, 2:]) - np.maximum(tile_box[:, None, :2], boxes[:, :2])
-
-        width_height = width_height.clip(min=0)  # [N,M,2]
-        inter = width_height.prod(2)
-
-        # handle empty boxes
-        tile_box_ratio = np.where(inter > 0, inter / box_area, np.zeros(1, dtype=inter.dtype))
-        return tile_box_ratio
+        x1, y1, x2, y2 = tile_box[0]
+        match_indices = (boxes[:, 0] > x1) & (boxes[:, 1] > y1) & (boxes[:, 2] < x2) & (boxes[:, 3] < y2)
+        match_indices = np.argwhere(match_indices == 1).flatten()
+        return match_indices
 
     def multiclass_nms(
         self, boxes: np.ndarray, scores: np.ndarray, idxs: np.ndarray, iou_threshold: float, max_num: int
@@ -431,7 +436,7 @@ def merge(self, results: List[List]) -> Union[List[Tuple[np.ndarray, list]], Lis
 
         merged_bbox_results: List[np.ndarray] = [np.empty((0, 5), dtype=dtype) for _ in range(self.num_images)]
         merged_mask_results: List[List] = [[] for _ in range(self.num_images)]
-        merged_label_results: List[Union[List, np.ndarray]] = [[] for _ in range(self.num_images)]
+        merged_label_results: List[Union[List, np.ndarray]] = [np.array([]) for _ in range(self.num_images)]
 
         for result, tile in zip(results, self.tiles):
             tile_x1, tile_y1, _, _ = tile["tile_box"]
@@ -477,3 +482,21 @@ def merge(self, results: List[List]) -> Union[List[Tuple[np.ndarray, list]], Lis
         if detection:
             return list(merged_bbox_results)
         return list(zip(merged_bbox_results, merged_mask_results))
+
+    def get_ann_info(self, idx):
+        """Get annotation by index.
+
+        Args:
+            idx (int): Index of data.
+
+        Returns:
+            dict: Annotation info of specified index.
+        """
+        ann = {}
+        if "gt_bboxes" in self.tiles[idx]:
+            ann["bboxes"] = self.tiles[idx]["gt_bboxes"]
+        if "gt_masks" in self.tiles[idx]:
+            ann["masks"] = self.tiles[idx]["gt_masks"]
+        if "gt_labels" in self.tiles[idx]:
+            ann["labels"] = self.tiles[idx]["gt_labels"]
+        return ann
@@ -4,5 +4,6 @@
 #
 
 from .det_class_probability_map_hook import DetClassProbabilityMapHook
+from .tile_sampling_hook import TileSamplingHook
 
-__all__ = ["DetClassProbabilityMapHook"]
+__all__ = ["DetClassProbabilityMapHook", "TileSamplingHook"]
Original file line number	Diff line number	Diff line change
`@@ -4,5 +4,6 @@`
`4`	`4`	`#`
`5`	`5`
`6`	`6`	`from .det_class_probability_map_hook import DetClassProbabilityMapHook`
	`7`	`+from .tile_sampling_hook import TileSamplingHook`
`7`	`8`
`8`		`-__all__ = ["DetClassProbabilityMapHook"]`
	`9`	`+__all__ = ["DetClassProbabilityMapHook", "TileSamplingHook"]`