Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
9bd3fa7
Classification Dataset refactor (#4606)
AlbertvanHouten Sep 4, 2025
373a046
Merge branch 'develop' of https://github.com/open-edge-platform/train…
AlbertvanHouten Sep 10, 2025
9f26bb8
Detection dataset updated to use new datumaro version (#4675)
AlbertvanHouten Sep 11, 2025
8b2fe8b
Update the segmentation dataset to use new Datumaro dataset (#4676)
gdlg Sep 12, 2025
3d84b50
Instance segmentation support with the new Datumaro dataset (#4686)
gdlg Sep 25, 2025
d1ff0dc
Migrate OTX Keypoint detection to use the new experimental dataset (#…
AlbertvanHouten Sep 26, 2025
d27ff02
Merge branch 'develop' of https://github.com/open-edge-platform/train…
AlbertvanHouten Sep 26, 2025
a3d82dd
Move new dataset files to library folder
AlbertvanHouten Sep 26, 2025
c1b8084
Update OTX classification task to use legacy converter (#4769)
AlbertvanHouten Sep 29, 2025
9fd7abc
Merge branch 'develop' of https://github.com/open-edge-platform/train…
AlbertvanHouten Oct 2, 2025
4751cb2
Migrate anomaly task to use the new Datumaro dataset (#4770)
gdlg Oct 3, 2025
5ff3b59
Merge branch 'develop' of https://github.com/open-edge-platform/train…
AlbertvanHouten Oct 15, 2025
6088088
Point datumaro deps to the dev branch
AlbertvanHouten Oct 15, 2025
7827ca5
Tilling support with the experimental Datumaro dataset (#4858)
gdlg Oct 17, 2025
64e58ca
Use new datumaro for multi label and hierarchical classification (#4815)
AlbertvanHouten Oct 20, 2025
fdb6afe
Merge branch 'feature/datumaro' of https://github.com/open-edge-platf…
AlbertvanHouten Oct 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions library/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ classifiers = [
"Programming Language :: Python :: 3.12",
]
dependencies = [
"datumaro==1.10.0",
"datumaro[experimental] @ git+https://github.com/open-edge-platform/datumaro.git@develop",
"omegaconf==2.3.0",
"rich==14.0.0",
"jsonargparse==4.35.0",
Expand All @@ -37,7 +37,6 @@ dependencies = [
"docstring_parser==0.16", # CLI help-formatter
"rich_argparse==1.7.0", # CLI help-formatter
"einops==0.8.1",
"decord==0.6.0",
"typeguard>=4.3,<4.5",
# TODO(ashwinvaidya17): https://github.com/openvinotoolkit/anomalib/issues/2126
"setuptools<70",
Expand All @@ -51,6 +50,8 @@ dependencies = [
"onnxconverter-common==1.14.0",
"nncf==2.17.0",
"anomalib[core]==1.1.3",
"numpy<2.0.0",
"tensorboardX>=1.8",
]

[project.optional-dependencies]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _get_and_log_device_stats(
batch_size (int): batch size.
"""
device = trainer.strategy.root_device
if device.type in ["cpu", "xpu"]:
if device.type in ["cpu", "xpu", "mps"]:
return

device_stats = trainer.accelerator.get_device_stats(device)
Expand Down
5 changes: 5 additions & 0 deletions library/src/otx/backend/native/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@

"""Module for OTX custom models."""

import multiprocessing

if multiprocessing.get_start_method(allow_none=True) is None:
multiprocessing.set_start_method("forkserver")

from .anomaly import Padim, Stfpm, Uflow
from .classification import (
EfficientNet,
Expand Down
9 changes: 5 additions & 4 deletions library/src/otx/backend/native/models/detection/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from otx.types.task import OTXTaskType

if TYPE_CHECKING:
from datumaro.experimental.fields import TileInfo
from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable

from otx.backend.native.models.detection.detectors import SingleStageDetector
Expand Down Expand Up @@ -262,21 +263,21 @@ def forward_tiles(self, inputs: OTXTileBatchDataEntity) -> OTXPredBatch:
DetBatchPredEntity: Merged detection prediction.
"""
tile_preds: list[OTXPredBatch] = []
tile_attrs: list[list[dict[str, int | str]]] = []
tile_infos: list[list[TileInfo]] = []
merger = DetectionTileMerge(
inputs.imgs_info,
self.num_classes,
self.tile_config,
self.explain_mode,
)
for batch_tile_attrs, batch_tile_input in inputs.unbind():
for batch_tile_infos, batch_tile_input in inputs.unbind():
output = self.forward_explain(batch_tile_input) if self.explain_mode else self.forward(batch_tile_input)
if isinstance(output, OTXBatchLossEntity):
msg = "Loss output is not supported for tile merging"
raise TypeError(msg)
tile_preds.append(output)
tile_attrs.append(batch_tile_attrs)
pred_entities = merger.merge(tile_preds, tile_attrs)
tile_infos.append(batch_tile_infos)
pred_entities = merger.merge(tile_preds, tile_infos)

pred_entity = OTXPredBatch(
batch_size=inputs.batch_size,
Expand Down
39 changes: 27 additions & 12 deletions library/src/otx/backend/native/models/detection/ssd.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from typing import TYPE_CHECKING, Any, ClassVar, Literal

import numpy as np
from datumaro.components.annotation import Bbox
from datumaro.experimental.dataset import Dataset as DmDataset

from otx.backend.native.exporter.base import OTXModelExporter
from otx.backend.native.exporter.native import OTXNativeModelExporter
Expand All @@ -30,6 +30,7 @@
from otx.backend.native.models.utils.support_otx_v1 import OTXv1Helper
from otx.backend.native.models.utils.utils import load_checkpoint
from otx.config.data import TileConfig
from otx.data.entity.sample import DetectionSample
from otx.metrics.fmeasure import MeanAveragePrecisionFMeasureCallable

if TYPE_CHECKING:
Expand Down Expand Up @@ -231,7 +232,7 @@ def _get_new_anchors(self, dataset: OTXDataset, anchor_generator: SSDAnchorGener
return self._get_anchor_boxes(wh_stats, group_as)

@staticmethod
def _get_sizes_from_dataset_entity(dataset: OTXDataset, target_wh: list[int]) -> list[tuple[int, int]]:
def _get_sizes_from_dataset_entity(dataset: OTXDataset, target_wh: list[int]) -> np.ndarray:
"""Function to get width and height size of items in OTXDataset.

Args:
Expand All @@ -240,20 +241,34 @@ def _get_sizes_from_dataset_entity(dataset: OTXDataset, target_wh: list[int]) ->
Return
list[tuple[int, int]]: tuples with width and height of each instance
"""
wh_stats: list[tuple[int, int]] = []
wh_stats = np.empty((0, 2), dtype=np.float32)
if not isinstance(dataset.dm_subset, DmDataset):
exc_str = "The variable dataset.dm_subset must be an instance of DmDataset"
raise TypeError(exc_str)

for item in dataset.dm_subset:
for ann in item.annotations:
if isinstance(ann, Bbox):
x1, y1, x2, y2 = ann.points
x1 = x1 / item.media.size[1] * target_wh[0]
y1 = y1 / item.media.size[0] * target_wh[1]
x2 = x2 / item.media.size[1] * target_wh[0]
y2 = y2 / item.media.size[0] * target_wh[1]
wh_stats.append((x2 - x1, y2 - y1))
if not isinstance(item, DetectionSample):
exc_str = "The variable item must be an instance of DetectionSample"
raise TypeError(exc_str)

if item.img_info is None:
exc_str = "The image info must not be None"
raise RuntimeError(exc_str)

height, width = item.img_info.img_shape
x1 = item.bboxes[:, 0]
y1 = item.bboxes[:, 1]
x2 = item.bboxes[:, 2]
y2 = item.bboxes[:, 3]

w = (x2 - x1) / width * target_wh[0]
h = (y2 - y1) / height * target_wh[1]

wh_stats = np.concatenate((wh_stats, np.stack((w, h), axis=1)), axis=0)
return wh_stats

@staticmethod
def _get_anchor_boxes(wh_stats: list[tuple[int, int]], group_as: list[int]) -> tuple:
def _get_anchor_boxes(wh_stats: np.ndarray, group_as: list[int]) -> tuple:
"""Get new anchor box widths & heights using KMeans."""
from sklearn.cluster import KMeans

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from otx.types.task import OTXTaskType

if TYPE_CHECKING:
from datumaro.experimental.fields import TileInfo
from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable
from torch import nn

Expand Down Expand Up @@ -208,21 +209,21 @@ def forward_tiles(self, inputs: OTXTileBatchDataEntity) -> OTXPredBatch:
TorchPredBatch: Merged instance segmentation prediction.
"""
tile_preds: list[OTXPredBatch] = []
tile_attrs: list[list[dict[str, int | str]]] = []
tile_infos: list[list[TileInfo]] = []
merger = InstanceSegTileMerge(
inputs.imgs_info,
self.num_classes,
self.tile_config,
self.explain_mode,
)
for batch_tile_attrs, batch_tile_input in inputs.unbind():
for batch_tile_infos, batch_tile_input in inputs.unbind():
output = self.forward_explain(batch_tile_input) if self.explain_mode else self.forward(batch_tile_input)
if isinstance(output, OTXBatchLossEntity):
msg = "Loss output is not supported for tile merging"
raise TypeError(msg)
tile_preds.append(output)
tile_attrs.append(batch_tile_attrs)
pred_entities = merger.merge(tile_preds, tile_attrs)
tile_infos.append(batch_tile_infos)
pred_entities = merger.merge(tile_preds, tile_infos)

pred_entity = OTXPredBatch(
batch_size=inputs.batch_size,
Expand Down Expand Up @@ -458,7 +459,7 @@ def _convert_pred_entity_to_compute_metric(

rles = (
[encode_rle(mask) for mask in masks.data]
if len(masks)
if masks is not None
else polygon_to_rle(polygons, *imgs_info.ori_shape) # type: ignore[union-attr,arg-type]
)
target_info.append(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
from otx.data.utils.structures.mask import mask_target

if TYPE_CHECKING:
from datumaro import Polygon
import numpy as np


def maskrcnn_loss(
mask_logits: Tensor,
proposals: list[Tensor],
gt_masks: list[list[Tensor]] | list[list[Polygon]],
gt_masks: list[list[Tensor]] | list[np.ndarray],
gt_labels: list[Tensor],
mask_matched_idxs: list[Tensor],
image_shapes: list[tuple[int, int]],
Expand All @@ -31,7 +31,7 @@ def maskrcnn_loss(
Args:
mask_logits (Tensor): the mask predictions.
proposals (list[Tensor]): the region proposals.
gt_masks (list[list[Tensor]] | list[list[Polygon]]): the ground truth masks.
gt_masks (list[list[Tensor]] | list[np.ndarray]): the ground truth masks as ragged arrays.
gt_labels (list[Tensor]): the ground truth labels.
mask_matched_idxs (list[Tensor]): the matched indices.
image_shapes (list[tuple[int, int]]): the image shapes.
Expand Down Expand Up @@ -142,7 +142,9 @@ def forward(
raise ValueError(msg)

gt_masks = (
[t["masks"] for t in targets] if len(targets[0]["masks"]) else [t["polygons"] for t in targets]
[t["masks"] for t in targets]
if targets[0]["masks"] is not None
else [t["polygons"] for t in targets]
)
gt_labels = [t["labels"] for t in targets]
rcnn_loss_mask = maskrcnn_loss(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import numpy as np
import torch
import torch.nn.functional
from datumaro import Polygon
from torch import Tensor, nn

from otx.backend.native.models.common.utils.nms import batched_nms, multiclass_nms
Expand Down Expand Up @@ -644,7 +643,7 @@ def prepare_loss_inputs(self, x: tuple[Tensor], entity: OTXDataBatch) -> dict:
)

# Convert polygon masks to bitmap masks
if isinstance(batch_gt_instances[0].masks[0], Polygon):
if isinstance(batch_gt_instances[0].masks, np.ndarray):
for gt_instances, img_meta in zip(batch_gt_instances, batch_img_metas):
ndarray_masks = polygon_to_bitmap(gt_instances.masks, *img_meta["img_shape"])
if len(ndarray_masks) == 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,27 @@
"""Rotated Detection Prediction Mixin."""

import cv2
import numpy as np
import torch
from datumaro import Polygon
from torchvision import tv_tensors

from otx.data.entity.torch.torch import OTXPredBatch


def get_polygon_area(points: np.ndarray) -> float:
"""Calculate polygon area using the shoelace formula.

Args:
points: Array of polygon vertices with shape (N, 2)

Returns:
float: Area of the polygon
"""
x = points[:, 0]
y = points[:, 1]
return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))


def convert_masks_to_rotated_predictions(preds: OTXPredBatch) -> OTXPredBatch:
"""Convert masks to rotated bounding boxes and polygons.

Expand Down Expand Up @@ -58,8 +72,10 @@ def convert_masks_to_rotated_predictions(preds: OTXPredBatch) -> OTXPredBatch:
for contour, hierarchy in zip(contours, hierarchies[0]):
if hierarchy[3] != -1 or len(contour) <= 2:
continue
rbox_points = Polygon(cv2.boxPoints(cv2.minAreaRect(contour)).reshape(-1))
rbox_polygons.append((rbox_points, rbox_points.get_area()))
# Get rotated bounding box points and convert to ragged array format
box_points = cv2.boxPoints(cv2.minAreaRect(contour)).astype(np.float32)
area = get_polygon_area(box_points)
rbox_polygons.append((box_points, area))

if rbox_polygons:
rbox_polygons.sort(key=lambda x: x[1], reverse=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def unpack_inst_seg_entity(entity: OTXDataBatch) -> tuple:
}
batch_img_metas.append(metainfo)

gt_masks = mask if len(mask) else polygon
gt_masks = mask if mask is not None else polygon

batch_gt_instances.append(
InstanceData(
Expand Down
11 changes: 6 additions & 5 deletions library/src/otx/backend/native/models/segmentation/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from otx.types.task import OTXTaskType

if TYPE_CHECKING:
from datumaro.experimental.fields import TileInfo
from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable
from torch import Tensor

Expand Down Expand Up @@ -223,15 +224,15 @@ def forward_tiles(self, inputs: OTXTileBatchDataEntity) -> OTXPredBatch:
raise NotImplementedError(msg)

tile_preds: list[OTXPredBatch] = []
tile_attrs: list[list[dict[str, int | str]]] = []
tile_infos: list[list[TileInfo]] = []
merger = SegmentationTileMerge(
inputs.imgs_info,
self.num_classes,
self.tile_config,
self.explain_mode,
)
for batch_tile_attrs, batch_tile_input in inputs.unbind():
tile_size = batch_tile_attrs[0]["tile_size"]
for batch_tile_infos, batch_tile_input in inputs.unbind():
tile_size = (batch_tile_infos[0].height, batch_tile_infos[0].width)
output = self.model(
inputs=batch_tile_input.images,
img_metas=batch_tile_input.imgs_info,
Expand All @@ -245,8 +246,8 @@ def forward_tiles(self, inputs: OTXTileBatchDataEntity) -> OTXPredBatch:
msg = "Loss output is not supported for tile merging"
raise TypeError(msg)
tile_preds.append(output)
tile_attrs.append(batch_tile_attrs)
pred_entities = merger.merge(tile_preds, tile_attrs)
tile_infos.append(batch_tile_infos)
pred_entities = merger.merge(tile_preds, tile_infos)

pred_entity = OTXPredBatch(
batch_size=inputs.batch_size,
Expand Down
Loading