diff --git a/src/datumaro/experimental/converter_registry.py b/src/datumaro/experimental/converter_registry.py index 840426078d..dda420fad6 100644 --- a/src/datumaro/experimental/converter_registry.py +++ b/src/datumaro/experimental/converter_registry.py @@ -48,7 +48,7 @@ class ConversionPaths(NamedTuple): """ Container for separated batch and lazy conversion paths. - The batch converters can be applied immediately to the entire DataFrame, + The converters can be applied immediately to the entire DataFrame, while lazy converters must be deferred and applied at sample access time. """ diff --git a/src/datumaro/experimental/converters.py b/src/datumaro/experimental/converters.py index 7a890194de..3369d3a498 100644 --- a/src/datumaro/experimental/converters.py +++ b/src/datumaro/experimental/converters.py @@ -22,6 +22,9 @@ from .converter_registry import AttributeSpec, Converter, converter from .fields import ( BBoxField, + BBoxFormat, + EllipseField, + EllipseFormat, ImageBytesField, ImageCallableField, ImageField, @@ -33,7 +36,9 @@ MaskCallableField, MaskField, PolygonField, + PolygonFormat, RotatedBBoxField, + RotatedBBoxFormat, ) from .type_registry import polars_to_numpy_dtype @@ -1011,10 +1016,10 @@ def convert(self, df: pl.DataFrame) -> pl.DataFrame: ) # Format according to output bbox format - if self.output_bbox.field.format == "x1y1x2y2": + if self.output_bbox.field.format == BBoxFormat.X1Y1X2Y2: # Already in this format pass - elif self.output_bbox.field.format == "xywh": + elif self.output_bbox.field.format == BBoxFormat.XYWH: df = df.with_columns( pl.col(output_column_name).list.eval( pl.concat_arr( @@ -1029,8 +1034,7 @@ def convert(self, df: pl.DataFrame) -> pl.DataFrame: ) else: raise NotImplementedError( - f"This conversion is not yet implemented " - f"for the format {self.output_bbox.field.format}." + f"This conversion is not yet implemented for the format {self.output_bbox.field.format}." ) return df @@ -1361,3 +1365,400 @@ def rotate_corner(expr: pl.Expr): ) return df + + +@converter +class BBoxFormatConverter(Converter): + """ + Converter for switching between different bounding box coordinate formats. + + Supports conversion between: + - X1Y1X2Y2: (x1, y1, x2, y2) - top-left and bottom-right corners + - XYWH: (x, y, w, h) - top-left corner and dimensions + """ + + input_bbox: AttributeSpec[BBoxField] + output_bbox: AttributeSpec[BBoxField] + + def filter_output_spec(self) -> bool: + """ + Check if this converter should be applied for bbox format conversion. + + Returns True if input and output have different bbox formats. + """ + input_format = self.input_bbox.field.format + output_format = self.output_bbox.field.format + + # Only apply if formats are different + if input_format == output_format: + return False + + # Configure output specification + self.output_bbox = AttributeSpec( + name=self.output_bbox.name, + field=BBoxField( + semantic=self.input_bbox.field.semantic, + dtype=self.input_bbox.field.dtype, + format=output_format, + normalize=self.input_bbox.field.normalize, + ), + ) + + # Check if conversion is supported + supported_conversions = { + (BBoxFormat.X1Y1X2Y2, BBoxFormat.XYWH), + (BBoxFormat.XYWH, BBoxFormat.X1Y1X2Y2), + } + + return (input_format, output_format) in supported_conversions + + def convert(self, df: pl.DataFrame) -> pl.DataFrame: + """ + Convert between bbox coordinate formats. + + Args: + df: DataFrame with bbox coordinates in input format + + Returns: + DataFrame with bbox coordinates in output format + """ + input_col = self.input_bbox.name + output_col = self.output_bbox.name + input_format = self.input_bbox.field.format + output_format = self.output_bbox.field.format + + if input_format == BBoxFormat.X1Y1X2Y2 and output_format == BBoxFormat.XYWH: + # Convert (x1, y1, x2, y2) to (x, y, w, h) + df = df.with_columns( + pl.col(input_col) + .list.eval( + pl.concat_arr( + [ + pl.element().arr.get(0), # x = x1 + pl.element().arr.get(1), # y = y1 + pl.element().arr.get(2) - pl.element().arr.get(0), # w = x2 - x1 + pl.element().arr.get(3) - pl.element().arr.get(1), # h = y2 - y1 + ] + ) + ) + .alias(output_col) + ) + elif input_format == BBoxFormat.XYWH and output_format == BBoxFormat.X1Y1X2Y2: + # Convert (x, y, w, h) to (x1, y1, x2, y2) + df = df.with_columns( + pl.col(input_col) + .list.eval( + pl.concat_arr( + [ + pl.element().arr.get(0), # x1 = x + pl.element().arr.get(1), # y1 = y + pl.element().arr.get(0) + pl.element().arr.get(2), # x2 = x + w + pl.element().arr.get(1) + pl.element().arr.get(3), # y2 = y + h + ] + ) + ) + .alias(output_col) + ) + else: + raise NotImplementedError( + f"Conversion from {input_format} to {output_format} is not yet implemented" + ) + + return df + + +@converter +class EllipseFormatConverter(Converter): + """ + Converter for switching between different ellipse coordinate formats. + + Supports conversion between: + - X1Y1X2Y2: (x1, y1, x2, y2) - top-left and bottom-right corners + - CXCYWH: (cx, cy, w, h) - center coordinate and dimensions + """ + + input_ellipse: AttributeSpec[EllipseField] + output_ellipse: AttributeSpec[EllipseField] + + def filter_output_spec(self) -> bool: + """ + Check if this converter should be applied for ellipse format conversion. + + Returns True if input and output have different ellipse formats. + """ + input_format = self.input_ellipse.field.format + output_format = self.output_ellipse.field.format + + # Only apply if formats are different + if input_format == output_format: + return False + + # Configure output specification + self.output_ellipse = AttributeSpec( + name=self.output_ellipse.name, + field=EllipseField( + semantic=self.input_ellipse.field.semantic, + dtype=self.input_ellipse.field.dtype, + format=output_format, + normalize=self.input_ellipse.field.normalize, + ), + ) + + # Check if conversion is supported + supported_conversions = { + (EllipseFormat.X1Y1X2Y2, EllipseFormat.CXCYWH), + (EllipseFormat.CXCYWH, EllipseFormat.X1Y1X2Y2), + } + + return (input_format, output_format) in supported_conversions + + def convert(self, df: pl.DataFrame) -> pl.DataFrame: + """ + Convert between ellipse coordinate formats. + + Args: + df: DataFrame with ellipse coordinates in input format + + Returns: + DataFrame with ellipse coordinates in output format + """ + input_col = self.input_ellipse.name + output_col = self.output_ellipse.name + input_format = self.input_ellipse.field.format + output_format = self.output_ellipse.field.format + divider = pl.lit(2).cast(self.input_ellipse.field.dtype) + + if input_format == EllipseFormat.X1Y1X2Y2 and output_format == EllipseFormat.CXCYWH: + # Convert (x1, y1, x2, y2) to (cx, cy, w, h) + df = df.with_columns( + pl.col(input_col) + .list.eval( + pl.concat_arr( + [ + (pl.element().arr.get(0) + pl.element().arr.get(2)) + / divider, # x = x2-x1/2 + (pl.element().arr.get(1) + pl.element().arr.get(3)) + / divider, # y = y1-y2/2 + pl.element().arr.get(2) - pl.element().arr.get(0), # w = x2 - x1 + pl.element().arr.get(1) - pl.element().arr.get(3), # h = y1 - y2 + ] + ) + ) + .alias(output_col) + ) + elif input_format == EllipseFormat.CXCYWH and output_format == EllipseFormat.X1Y1X2Y2: + # Convert (cx, cy, w, h) to (x1, y1, x2, y2) + df = df.with_columns( + pl.col(input_col) + .list.eval( + pl.concat_arr( + [ + pl.element().arr.get(0) + - pl.element().arr.get(2) / divider, # x1 = cx - w/2 + pl.element().arr.get(1) + + pl.element().arr.get(3) / divider, # y1 = y + h/2 + pl.element().arr.get(0) + + pl.element().arr.get(2) / divider, # x2 = cx + w/2 + pl.element().arr.get(1) + - pl.element().arr.get(3) / divider, # y2 = y - h/2 + ] + ) + ) + .alias(output_col) + ) + else: + raise NotImplementedError( + f"Conversion from {input_format} to {output_format} is not yet implemented" + ) + + return df + + +@converter +class RotatedBBoxFormatConverter(Converter): + """ + Converter for switching between different rotated bounding box formats. + + Supports conversion between: + - CXCYWHR: (cx, cy, w, h, r) - rotation in radians + - CXCYWHA: (cx, cy, w, h, a) - rotation in degrees + """ + + input_rotated_bbox: AttributeSpec[RotatedBBoxField] + output_rotated_bbox: AttributeSpec[RotatedBBoxField] + + def filter_output_spec(self) -> bool: + """ + Check if this converter should be applied for rotated bbox format conversion. + + Returns True if input and output have different rotated bbox formats. + """ + input_format = self.input_rotated_bbox.field.format + output_format = self.output_rotated_bbox.field.format + + # Only apply if formats are different + if input_format == output_format: + return False + + # Configure output specification + self.output_rotated_bbox = AttributeSpec( + name=self.output_rotated_bbox.name, + field=RotatedBBoxField( + semantic=self.input_rotated_bbox.field.semantic, + dtype=self.input_rotated_bbox.field.dtype, + format=output_format, + normalize=self.input_rotated_bbox.field.normalize, + ), + ) + + # Check if conversion is supported + supported_conversions = { + (RotatedBBoxFormat.CXCYWHR, RotatedBBoxFormat.CXCYWHA), + (RotatedBBoxFormat.CXCYWHA, RotatedBBoxFormat.CXCYWHR), + } + + return (input_format, output_format) in supported_conversions + + def convert(self, df: pl.DataFrame) -> pl.DataFrame: + """ + Convert between rotated bbox formats (radians ↔ degrees). + + Args: + df: DataFrame with rotated bbox coordinates in input format + + Returns: + DataFrame with rotated bbox coordinates in output format + """ + input_col = self.input_rotated_bbox.name + output_col = self.output_rotated_bbox.name + input_format = self.input_rotated_bbox.field.format + output_format = self.output_rotated_bbox.field.format + + if input_format == RotatedBBoxFormat.CXCYWHR and output_format == RotatedBBoxFormat.CXCYWHA: + # Convert radians to degrees: multiply rotation by 180/π + df = df.with_columns( + pl.col(input_col) + .list.eval( + pl.concat_arr( + [ + pl.element().arr.get(0), # cx unchanged + pl.element().arr.get(1), # cy unchanged + pl.element().arr.get(2), # w unchanged + pl.element().arr.get(3), # h unchanged + pl.element().arr.get(4) * 180.0 / np.pi, # r (radians) -> a (degrees) + ] + ) + ) + .alias(output_col) + ) + elif ( + input_format == RotatedBBoxFormat.CXCYWHA and output_format == RotatedBBoxFormat.CXCYWHR + ): + # Convert degrees to radians: multiply rotation by π/180 + df = df.with_columns( + pl.col(input_col) + .list.eval( + pl.concat_arr( + [ + pl.element().arr.get(0), # cx unchanged + pl.element().arr.get(1), # cy unchanged + pl.element().arr.get(2), # w unchanged + pl.element().arr.get(3), # h unchanged + pl.element().arr.get(4) * np.pi / 180.0, # a (degrees) -> r (radians) + ] + ) + ) + .alias(output_col) + ) + else: + raise NotImplementedError( + f"Conversion from {input_format} to {output_format} is not yet implemented" + ) + + return df + + +@converter +class PolygonFormatConverter(Converter): + """ + Converter for switching between different polygon coordinate formats. + + Supports conversion between: + - XY: (x, y) coordinate pairs + - YX: (y, x) coordinate pairs (swaps x and y coordinates) + """ + + input_polygon: AttributeSpec[PolygonField] + output_polygon: AttributeSpec[PolygonField] + + def filter_output_spec(self) -> bool: + """ + Check if this converter should be applied for polygon format conversion. + + Returns True if input and output have different polygon formats. + """ + input_format = self.input_polygon.field.format + output_format = self.output_polygon.field.format + + # Only apply if formats are different + if input_format == output_format: + return False + + # Configure output specification + self.output_polygon = AttributeSpec( + name=self.output_polygon.name, + field=PolygonField( + semantic=self.input_polygon.field.semantic, + dtype=self.input_polygon.field.dtype, + format=output_format, + normalize=self.input_polygon.field.normalize, + ), + ) + + # Check if conversion is supported + supported_conversions = { + (PolygonFormat.XY, PolygonFormat.YX), + (PolygonFormat.YX, PolygonFormat.XY), + } + + return (input_format, output_format) in supported_conversions + + def convert(self, df: pl.DataFrame) -> pl.DataFrame: + """ + Convert between polygon coordinate formats by swapping x and y coordinates. + + Args: + df: DataFrame with polygon coordinates in input format + + Returns: + DataFrame with polygon coordinates in output format + """ + input_col = self.input_polygon.name + output_col = self.output_polygon.name + input_format = self.input_polygon.field.format + output_format = self.output_polygon.field.format + + if (input_format == PolygonFormat.XY and output_format == PolygonFormat.YX) or ( + input_format == PolygonFormat.YX and output_format == PolygonFormat.XY + ): + # Swap x and y coordinates: [x, y] ↔ [y, x] + df = df.with_columns( + pl.col(input_col) + .list.eval( + pl.element().list.eval( + pl.concat_arr( + [ + pl.element().arr.get(1), # y becomes first + pl.element().arr.get(0), # x becomes second + ] + ) + ) + ) + .alias(output_col) + ) + else: + raise NotImplementedError( + f"Conversion from {input_format} to {output_format} is not yet implemented" + ) + + return df diff --git a/src/datumaro/experimental/fields.py b/src/datumaro/experimental/fields.py index 398da26197..26899ca183 100644 --- a/src/datumaro/experimental/fields.py +++ b/src/datumaro/experimental/fields.py @@ -21,6 +21,41 @@ from .type_registry import from_polars_data, to_numpy +class BBoxFormat(Enum): + """Enumeration of bounding box coordinate formats.""" + + X1Y1X2Y2 = "x1y1x2y2" # (x1, y1, x2, y2) - top-left and bottom-right corners + XYWH = "xywh" # (x, y, w, h) - top-left corner and dimensions + + +class RotatedBBoxFormat(Enum): + """Enumeration of rotated bounding box coordinate formats.""" + + CXCYWHR = "cxcywhr" # (cx, cy, w, h, r) - center point, dimensions, rotation in radians + CXCYWHA = "cxcywha" # (cx, cy, w, h, a) - center point, dimensions, rotation in degrees + + +class EllipseFormat(Enum): + """Enumeration of ellipse coordinate formats.""" + + X1Y1X2Y2 = "x1y1x2y2" # (x1, y1, x2, y2) - top-left and bottom-right corners of encapsulating bounding box + CXCYWH = "xywh" # (x, y, w, h) - center coordinates and dimensions + + +class ImageFormat(Enum): + """Enumeration of image color formats.""" + + RGB = "RGB" # Red, Green, Blue + BGR = "BGR" # Blue, Green, Red + + +class PolygonFormat(Enum): + """Enumeration of polygon coordinate formats.""" + + XY = "xy" # (x, y) coordinate pairs + YX = "yx" # (y, x) coordinate pairs + + class Subset(Enum): """Standard dataset subset values.""" @@ -187,15 +222,15 @@ class ImageField(TensorField): color format (RGB, BGR, etc.). Attributes: - format: Image color format (e.g., "RGB", "BGR", "RGBA") + format: Image color format (e.g., ImageFormat.RGB, ImageFormat.BGR) """ - format: str = "RGB" + format: ImageFormat = ImageFormat.RGB def image_field( dtype: Any, - format: str = "RGB", + format: ImageFormat = ImageFormat.RGB, channels_first: bool = False, semantic: Semantic = Semantic.Default, ) -> Any: @@ -204,7 +239,7 @@ def image_field( Args: dtype: Polars data type for pixel values - format: Image color format (defaults to "RGB") + format: Image color format (defaults to ImageFormat.RGB) semantic: Semantic tags describing the image's purpose (optional) Returns: @@ -270,13 +305,13 @@ class BBoxField(Field): Attributes: semantic: Semantic tags describing the bounding box purpose dtype: Polars data type for coordinate values - format: Coordinate format (e.g., "x1y1x2y2", "xywh") + format: Coordinate format (e.g., BBoxFormat.X1Y1X2Y2, BBoxFormat.XYWH) normalize: Whether coordinates are normalized to [0,1] range """ semantic: Semantic dtype: PolarsDataType = pl.Float32() - format: str = "x1y1x2y2" + format: BBoxFormat = BBoxFormat.X1Y1X2Y2 normalize: bool = False def to_polars_schema(self, name: str) -> dict[str, pl.DataType]: @@ -308,7 +343,7 @@ def from_polars(self, name: str, row_index: int, df: pl.DataFrame, target_type: def bbox_field( dtype: Any, - format: str = "x1y1x2y2", + format: BBoxFormat = BBoxFormat.X1Y1X2Y2, normalize: bool = False, semantic: Semantic = Semantic.Default, ) -> Any: @@ -317,7 +352,7 @@ def bbox_field( Args: dtype: Polars data type for coordinate values - format: Coordinate format (defaults to "x1y1x2y2") + format: Coordinate format (defaults to BBoxFormat.X1Y1X2Y2) normalize: Whether coordinates are normalized (defaults to False) semantic: Semantic tags describing the bounding box purpose (optional) @@ -339,13 +374,13 @@ class RotatedBBoxField(Field): Attributes: semantic: Semantic tags describing the rotated bounding box purpose dtype: Polars data type for coordinate values - format: Coordinate format (e.g., "cxcywhr", "cxcywha" for angle in degrees) + format: Coordinate format (e.g., RotatedBBoxFormat.CXCYWHR, RotatedBBoxFormat.CXCYWHA) normalize: Whether coordinates are normalized to [0,1] range """ semantic: Semantic dtype: PolarsDataType = pl.Float32() - format: str = "cxcywhr" + format: RotatedBBoxFormat = RotatedBBoxFormat.CXCYWHR normalize: bool = False def to_polars_schema(self, name: str) -> dict[str, pl.DataType]: @@ -377,7 +412,7 @@ def from_polars(self, name: str, row_index: int, df: pl.DataFrame, target_type: def rotated_bbox_field( dtype: Any, - format: str = "cxcywhr", + format: RotatedBBoxFormat = RotatedBBoxFormat.CXCYWHR, normalize: bool = False, semantic: Semantic = Semantic.Default, ) -> Any: @@ -386,7 +421,7 @@ def rotated_bbox_field( Args: dtype: Polars data type for coordinate values - format: Coordinate format (defaults to "cxcywhr" for cx,cy,w,h,rotation_radians) + format: Coordinate format (defaults to RotatedBBoxFormat.CXCYWHR) normalize: Whether coordinates are normalized (defaults to False) semantic: Semantic tags describing the rotated bounding box purpose (optional) @@ -665,13 +700,13 @@ class PolygonField(Field): Attributes: semantic: Semantic tags describing the polygon purpose dtype: Polars data type for coordinate values - format: Coordinate format (e.g., "xy", "yx") + format: Coordinate format (e.g., PolygonFormat.XY, PolygonFormat.YX) normalize: Whether coordinates are normalized to [0,1] range """ semantic: Semantic dtype: PolarsDataType = pl.Float32() - format: str = "xy" + format: PolygonFormat = PolygonFormat.XY normalize: bool = False def to_polars_schema(self, name: str) -> dict[str, pl.DataType]: @@ -694,7 +729,7 @@ def from_polars(self, name: str, row_index: int, df: pl.DataFrame, target_type: def polygon_field( dtype: Any, - format: str = "xy", + format: PolygonFormat = PolygonFormat.XY, normalize: bool = False, semantic: Semantic = Semantic.Default, ) -> Any: @@ -703,7 +738,7 @@ def polygon_field( Args: dtype: Polars data type for coordinate values - format: Coordinate format (defaults to "xy") + format: Coordinate format (defaults to PolygonFormat.XY) normalize: Whether coordinates are normalized (defaults to False) semantic: Semantic tags describing the polygon purpose (optional) @@ -876,11 +911,11 @@ class ImageCallableField(Field): Attributes: semantic: Semantic tags describing the callable's purpose - format: Expected image color format (e.g., "RGB", "BGR", "RGBA") + format: Expected image color format (e.g., ImageFormat.RGB, ImageFormat.BGR) """ semantic: Semantic - format: str = "RGB" + format: ImageFormat = ImageFormat.RGB def to_polars_schema(self, name: str) -> dict[str, pl.DataType]: """Return schema with Object type to store callable.""" @@ -902,12 +937,14 @@ def from_polars( return value -def image_callable_field(format: str = "RGB", semantic: Semantic = Semantic.Default) -> Any: +def image_callable_field( + format: ImageFormat = ImageFormat.RGB, semantic: Semantic = Semantic.Default +) -> Any: """ Create an ImageCallableField instance for storing image-generating callables. Args: - format: Expected image color format (defaults to "RGB") + format: Expected image color format (defaults to ImageFormat.RGB) semantic: Semantic tags describing the callable's purpose (optional) Returns: @@ -1203,13 +1240,13 @@ class EllipseField(Field): Attributes: semantic: Semantic tags describing the ellipses purpose dtype: Polars data type for coordinate values - format: Coordinate format (e.g., "x1y1x2y2", "xywh") + format: Coordinate format (e.g., "x1y1x2y2", "cxcywh") normalize: Whether coordinates are normalized to [0,1] range """ semantic: Semantic dtype: PolarsDataType = pl.Float32() - format: str = "x1y1x2y2" + format: EllipseFormat = EllipseFormat.X1Y1X2Y2 normalize: bool = False def to_polars_schema(self, name: str) -> dict[str, pl.DataType]: diff --git a/src/datumaro/experimental/tiling/tilers.py b/src/datumaro/experimental/tiling/tilers.py index 52e35e92a9..b684838eb9 100644 --- a/src/datumaro/experimental/tiling/tilers.py +++ b/src/datumaro/experimental/tiling/tilers.py @@ -17,6 +17,7 @@ from ..converter_registry import AttributeSpec from ..fields import ( BBoxField, + BBoxFormat, ImageField, ImageInfoField, InstanceMaskField, @@ -150,8 +151,10 @@ def tile(self, df: pl.DataFrame, tiles_df: pl.DataFrame, slice_offset: int = 0) """Process bounding boxes for each tile.""" column_name = self.field_spec.name - if self.field_spec.field.format != "x1y1x2y2": - raise RuntimeError(f"The format {self.field_spec.field.format} is not supported.") + if self.field_spec.field.format != BBoxFormat.X1Y1X2Y2: + raise RuntimeError( + f"The format {self.field_spec.field.format} is not supported for tiling." + ) results = [] diff --git a/src/datumaro/experimental/type_registry.py b/src/datumaro/experimental/type_registry.py index 4ab8f37d67..3f88f2fe3b 100644 --- a/src/datumaro/experimental/type_registry.py +++ b/src/datumaro/experimental/type_registry.py @@ -88,6 +88,10 @@ def points_to_numpy(x: Points) -> np.ndarray: bytes: lambda x: np.array(x), types.NoneType: lambda _: None, list: lambda x: np.array(x), + int: lambda x: np.array(x), + float: lambda x: np.array(x), + bool: lambda x: np.array(x), + str: lambda x: np.array(x), Points: lambda x: points_to_numpy(x), } diff --git a/tests/unit/experimental/test_converters.py b/tests/unit/experimental/test_converters.py index 466f7f9d83..decb5ff6a8 100644 --- a/tests/unit/experimental/test_converters.py +++ b/tests/unit/experimental/test_converters.py @@ -21,25 +21,33 @@ ) from datumaro.experimental.converters import ( BBoxCoordinateConverter, + BBoxFormatConverter, + EllipseFormatConverter, ImageBytesToImageConverter, ImageCallableToImageConverter, ImagePathToImageConverter, InstanceMaskCallableToInstanceMaskConverter, LabelIndexConverter, MaskCallableToMaskConverter, + PolygonFormatConverter, PolygonToBBoxConverter, PolygonToInstanceMaskConverter, PolygonToMaskConverter, RGBToBGRConverter, + RotatedBBoxFormatConverter, RotatedBBoxToPolygonConverter, UInt8ToFloat32Converter, ) from datumaro.experimental.fields import ( BBoxField, + BBoxFormat, + EllipseField, + EllipseFormat, Field, ImageBytesField, ImageCallableField, ImageField, + ImageFormat, ImageInfoField, ImagePathField, InstanceMaskCallableField, @@ -48,13 +56,12 @@ MaskCallableField, MaskField, PolygonField, + PolygonFormat, RotatedBBoxField, + RotatedBBoxFormat, bbox_field, image_field, image_info_field, - mask_callable_field, - mask_field, - rotated_bbox_field, ) from datumaro.experimental.schema import AttributeInfo, Schema, Semantic @@ -136,8 +143,8 @@ def test_uint8_to_float32_converter(): ) # Set up converter attributes - input_field = ImageField(dtype=pl.UInt8, format="RGB", semantic=Semantic.Default) - output_field = ImageField(dtype=pl.Float32, format="RGB", semantic=Semantic.Default) + input_field = ImageField(dtype=pl.UInt8, format=ImageFormat.RGB, semantic=Semantic.Default) + output_field = ImageField(dtype=pl.Float32, format=ImageFormat.RGB, semantic=Semantic.Default) setattr( converter_instance, @@ -181,12 +188,14 @@ def test_bbox_coordinate_converter(): # Set up converter for absolute to normalized conversion input_bbox_field = BBoxField( - dtype=pl.Float32, format="x1y1x2y2", normalize=False, semantic=Semantic.Default + dtype=pl.Float32, format=BBoxFormat.X1Y1X2Y2, normalize=False, semantic=Semantic.Default ) output_bbox_field = BBoxField( - dtype=pl.Float32, format="x1y1x2y2", normalize=True, semantic=Semantic.Default + dtype=pl.Float32, format=BBoxFormat.X1Y1X2Y2, normalize=True, semantic=Semantic.Default + ) + input_image_field = ImageField( + dtype=pl.UInt8, format=ImageFormat.RGB, semantic=Semantic.Default ) - input_image_field = ImageField(dtype=pl.UInt8, format="RGB", semantic=Semantic.Default) setattr( converter_instance, @@ -238,7 +247,7 @@ def test_image_path_to_image_converter(): # Set up converter attributes input_field = ImagePathField(semantic=Semantic.Default) - output_field = ImageField(dtype=pl.UInt8, format="RGB", semantic=Semantic.Default) + output_field = ImageField(dtype=pl.UInt8, format=ImageFormat.RGB, semantic=Semantic.Default) output_info_field = ImageInfoField(semantic=Semantic.Default) setattr( @@ -287,7 +296,6 @@ def test_image_bytes_to_image_converter(): df = pl.DataFrame({"image_bytes": [image_bytes]}) # Set up converter attributes - from datumaro.experimental.fields import image_bytes_field input_field = ImageBytesField(semantic=Semantic.Default) output_field = ImageField(dtype=pl.UInt8, format="RGB", semantic=Semantic.Default) @@ -939,7 +947,7 @@ def test_attribute_renaming(): source_schema = Schema( { "input_image": AttributeInfo( - type=str, field=image_field(dtype=pl.UInt8(), format="RGB") + type=str, field=image_field(dtype=pl.UInt8(), format=ImageFormat.RGB) ), } ) @@ -948,7 +956,7 @@ def test_attribute_renaming(): target_schema = Schema( { "output_image": AttributeInfo( - type=str, field=image_field(dtype=pl.UInt8(), format="RGB") + type=str, field=image_field(dtype=pl.UInt8(), format=ImageFormat.RGB) ), } ) @@ -989,9 +997,11 @@ def test_combined_rename_and_delete(): # Create source schema with three fields source_schema = Schema( { - "old_image": AttributeInfo(type=str, field=image_field(dtype=pl.UInt8(), format="RGB")), + "old_image": AttributeInfo( + type=str, field=image_field(dtype=pl.UInt8(), format=ImageFormat.RGB) + ), "bbox": AttributeInfo( - type=str, field=bbox_field(dtype=pl.Float32(), format="x1y1x2y2") + type=str, field=bbox_field(dtype=pl.Float32(), format=BBoxFormat.X1Y1X2Y2) ), "extra_field": AttributeInfo(type=str, field=image_info_field()), } @@ -1000,7 +1010,9 @@ def test_combined_rename_and_delete(): # Create target schema with renamed image and no bbox or extra_field target_schema = Schema( { - "new_image": AttributeInfo(type=str, field=image_field(dtype=pl.UInt8(), format="RGB")), + "new_image": AttributeInfo( + type=str, field=image_field(dtype=pl.UInt8(), format=ImageFormat.RGB) + ), } ) @@ -1100,7 +1112,7 @@ def test_polygon_to_mask_converter(): # Set up converter attributes input_polygon_field = PolygonField( - dtype=pl.Float32, format="xy", normalize=False, semantic=Semantic.Default + dtype=pl.Float32, format=PolygonFormat.XY, normalize=False, semantic=Semantic.Default ) input_labels_field = LabelField(dtype=pl.Int32, semantic=Semantic.Default, multi_label=True) image_info_field = ImageInfoField(semantic=Semantic.Default) @@ -1191,7 +1203,7 @@ def test_polygon_to_mask_converter_normalized(): # Set up converter attributes with normalization enabled input_polygon_field = PolygonField( dtype=pl.Float32, - format="xy", + format=PolygonFormat.XY, normalize=True, # Enable normalization semantic=Semantic.Default, ) @@ -1313,7 +1325,7 @@ def test_polygon_to_instance_mask_converter(): # Set up field specs input_polygon_field = PolygonField( - dtype=pl.Float32, format="xy", normalize=False, semantic=Semantic.Default + dtype=pl.Float32, format=PolygonFormat.XY, normalize=False, semantic=Semantic.Default ) image_info_field = ImageInfoField(semantic=Semantic.Default) output_instance_mask_field = InstanceMaskField(dtype=pl.Boolean, semantic=Semantic.Default) @@ -1394,7 +1406,7 @@ def test_polygon_to_instance_mask_converter_normalized(): # Set up field specs input_polygon_field = PolygonField( - dtype=pl.Float32, format="xy", normalize=True, semantic=Semantic.Default + dtype=pl.Float32, format=PolygonFormat.XY, normalize=True, semantic=Semantic.Default ) image_info_field = ImageInfoField(semantic=Semantic.Default) output_instance_mask_field = InstanceMaskField(dtype=pl.Boolean, semantic=Semantic.Default) @@ -1647,10 +1659,10 @@ def test_polygon_to_bbox_converter(): # Set up field specs input_polygon_field = PolygonField( - dtype=pl.Float32, format="xy", normalize=False, semantic=Semantic.Default + dtype=pl.Float32, format=PolygonFormat.XY, normalize=False, semantic=Semantic.Default ) output_bbox_field = BBoxField( - dtype=pl.Float32, format="x1y1x2y2", normalize=False, semantic=Semantic.Default + dtype=pl.Float32, format=BBoxFormat.X1Y1X2Y2, normalize=False, semantic=Semantic.Default ) setattr( @@ -1712,10 +1724,10 @@ def test_polygon_to_bbox_converter_xywh(): # Set up field specs for xywh format input_polygon_field = PolygonField( - dtype=pl.Float32, format="xy", normalize=False, semantic=Semantic.Default + dtype=pl.Float32, format=PolygonFormat.XY, normalize=False, semantic=Semantic.Default ) output_bbox_field = BBoxField( - dtype=pl.Float32, format="xywh", normalize=False, semantic=Semantic.Default + dtype=pl.Float32, format=BBoxFormat.XYWH, normalize=False, semantic=Semantic.Default ) setattr( @@ -1761,10 +1773,10 @@ def test_polygon_to_bbox_converter_normalized(): # Set up field specs with normalized coordinates input_polygon_field = PolygonField( - dtype=pl.Float32, format="xy", normalize=True, semantic=Semantic.Default + dtype=pl.Float32, format=PolygonFormat.XY, normalize=True, semantic=Semantic.Default ) output_bbox_field = BBoxField( - dtype=pl.Float32, format="x1y1x2y2", normalize=True, semantic=Semantic.Default + dtype=pl.Float32, format=BBoxFormat.X1Y1X2Y2, normalize=True, semantic=Semantic.Default ) setattr( @@ -2243,3 +2255,347 @@ def test_rotated_bbox_to_polygon_converter(): # Check second polygon (45-degree rotation) polygon2 = polygons[1] assert len(polygon2) == 4 # Four corners + + +def test_bbox_format_converter_x1y1x2y2_to_xywh(): + """Test BBoxFormatConverter conversion from X1Y1X2Y2 to XYWH format.""" + # Create test data: (x1, y1, x2, y2) format + test_data = [[10.0, 20.0, 50.0, 70.0], [0.0, 0.0, 100.0, 200.0]] + + df = pl.DataFrame({"bboxes": [test_data]}, schema={"bboxes": pl.List(pl.Array(pl.Float32, 4))}) + + converter_instance = BBoxFormatConverter() + + # Set up converter attributes + input_bbox_field = BBoxField( + dtype=pl.Float32, format=BBoxFormat.X1Y1X2Y2, normalize=False, semantic=Semantic.Default + ) + output_bbox_field = BBoxField( + dtype=pl.Float32, format=BBoxFormat.XYWH, normalize=False, semantic=Semantic.Default + ) + + setattr(converter_instance, "input_bbox", AttributeSpec(name="bboxes", field=input_bbox_field)) + setattr( + converter_instance, + "output_bbox", + AttributeSpec(name="bboxes_xywh", field=output_bbox_field), + ) + + # Test filter + assert converter_instance.filter_output_spec() is True + + # Test conversion + result_df = converter_instance.convert(df) + + assert "bboxes_xywh" in result_df.columns + result_bboxes = result_df["bboxes_xywh"][0] + + # First bbox: (10, 20, 50, 70) -> (10, 20, 40, 50) + expected_bbox1 = [10.0, 20.0, 40.0, 50.0] # x, y, w=50-10, h=70-20 + # Second bbox: (0, 0, 100, 200) -> (0, 0, 100, 200) + expected_bbox2 = [0.0, 0.0, 100.0, 200.0] # x, y, w=100-0, h=200-0 + + assert np.allclose(result_bboxes[0], expected_bbox1) + assert np.allclose(result_bboxes[1], expected_bbox2) + + +def test_bbox_format_converter_xywh_to_x1y1x2y2(): + """Test BBoxFormatConverter conversion from XYWH to X1Y1X2Y2 format.""" + # Create test data: (x, y, w, h) format + test_data = [[10.0, 20.0, 40.0, 50.0], [0.0, 0.0, 100.0, 200.0]] + + df = pl.DataFrame({"bboxes": [test_data]}, schema={"bboxes": pl.List(pl.Array(pl.Float32, 4))}) + + converter_instance = BBoxFormatConverter() + + # Set up converter attributes + input_bbox_field = BBoxField( + dtype=pl.Float32, format=BBoxFormat.XYWH, normalize=False, semantic=Semantic.Default + ) + output_bbox_field = BBoxField( + dtype=pl.Float32, format=BBoxFormat.X1Y1X2Y2, normalize=False, semantic=Semantic.Default + ) + + setattr(converter_instance, "input_bbox", AttributeSpec(name="bboxes", field=input_bbox_field)) + setattr( + converter_instance, + "output_bbox", + AttributeSpec(name="bboxes_xyxy", field=output_bbox_field), + ) + + # Test filter + assert converter_instance.filter_output_spec() is True + + # Test conversion + result_df = converter_instance.convert(df) + + assert "bboxes_xyxy" in result_df.columns + result_bboxes = result_df["bboxes_xyxy"][0] + + # First bbox: (10, 20, 40, 50) -> (10, 20, 50, 70) + expected_bbox1 = [10.0, 20.0, 50.0, 70.0] # x1, y1, x2=x+w, y2=y+h + # Second bbox: (0, 0, 100, 200) -> (0, 0, 100, 200) + expected_bbox2 = [0.0, 0.0, 100.0, 200.0] # x1, y1, x2=x+w, y2=y+h + + assert np.allclose(result_bboxes[0], expected_bbox1) + assert np.allclose(result_bboxes[1], expected_bbox2) + + +def test_ellipse_format_converter_x1y1x2y2_to_cxcywh(): + """Test EllipseFormatConverter conversion from X1Y1X2Y2 to CXCYWH format.""" + # Create test data: (x1, y1, x2, y2) format + test_data = [[10.0, 70.0, 50.0, 20.0], [0.0, 200.0, 100.0, 0.0]] + + df = pl.DataFrame( + {"ellipses": [test_data]}, schema={"ellipses": pl.List(pl.Array(pl.Float32, 4))} + ) + + converter_instance = EllipseFormatConverter() + + # Set up converter attributes + input_ellipse_field = EllipseField( + dtype=pl.Float32, format=EllipseFormat.X1Y1X2Y2, normalize=False, semantic=Semantic.Default + ) + output_ellipse_field = EllipseField( + dtype=pl.Float32, format=EllipseFormat.CXCYWH, normalize=False, semantic=Semantic.Default + ) + + setattr( + converter_instance, + "input_ellipse", + AttributeSpec(name="ellipses", field=input_ellipse_field), + ) + setattr( + converter_instance, + "output_ellipse", + AttributeSpec(name="ellipses_cxcywh", field=output_ellipse_field), + ) + + # Test filter + assert converter_instance.filter_output_spec() is True + + # Test conversion + result_df = converter_instance.convert(df) + + assert "ellipses_cxcywh" in result_df.columns + result_ellipses = result_df["ellipses_cxcywh"][0] + + # First ellipse: (10, 70, 50, 20) -> (30, 45, 40, 50) + expected_ellipse1 = [30.0, 45.0, 40.0, 50.0] # x, y, w=50-10, h=70-20 + # Second ellipse: (0, 200, 100, 0) -> (50, 100, 100, 200) + expected_ellipse2 = [50.0, 100.0, 100.0, 200.0] # x, y, w=100-0, h=200-0 + + assert np.allclose(result_ellipses[0], expected_ellipse1) + assert np.allclose(result_ellipses[1], expected_ellipse2) + + +def test_ellipse_format_converter_cxcywh_to_x1y1x2y2(): + """Test EllipseFormatConverter conversion from CXCYWH to X1Y1X2Y2 format.""" + # Create test data: (cx, cy, w, h) format + test_data = [[20.0, 30.0, 40.0, 50.0], [50.0, 100.0, 100.0, 200.0]] + + df = pl.DataFrame( + {"ellipses": [test_data]}, schema={"ellipses": pl.List(pl.Array(pl.Float32, 4))} + ) + + converter_instance = EllipseFormatConverter() + + # Set up converter attributes + input_ellipse_field = EllipseField( + dtype=pl.Float32, format=EllipseFormat.CXCYWH, normalize=False, semantic=Semantic.Default + ) + output_ellipse_field = EllipseField( + dtype=pl.Float32, format=EllipseFormat.X1Y1X2Y2, normalize=False, semantic=Semantic.Default + ) + + setattr( + converter_instance, + "input_ellipse", + AttributeSpec(name="ellipses", field=input_ellipse_field), + ) + setattr( + converter_instance, + "output_ellipse", + AttributeSpec(name="ellipses_x1y1x2y2", field=output_ellipse_field), + ) + + # Test filter + assert converter_instance.filter_output_spec() is True + + # Test conversion + result_df = converter_instance.convert(df) + + assert "ellipses_x1y1x2y2" in result_df.columns + result_ellipses = result_df["ellipses_x1y1x2y2"][0] + + # First ellipse: (20, 30, 40, 50) -> (0, 55, 40, 5) + expected_ellipse1 = [0.0, 55.0, 40.0, 5.0] # cx +/- w/2 and cy +/- h/2 + # Seond ellipse: (50, 100, 100, 200) -> (0, 200, 100, 0) + expected_ellipse2 = [0.0, 200.0, 100.0, 0.0] # cx +/- w/2 and cy +/- h/2 + + assert np.allclose(result_ellipses[0], expected_ellipse1) + assert np.allclose(result_ellipses[1], expected_ellipse2) + + +def test_rotated_bbox_format_converter_radians_to_degrees(): + """Test RotatedBBoxFormatConverter conversion from radians to degrees.""" + # Create test data: (cx, cy, w, h, r) format with radians + test_data = [ + [50.0, 60.0, 30.0, 20.0, 0.0], # 0 radians + [100.0, 120.0, 40.0, 25.0, np.pi / 2], # π/2 radians = 90 degrees + [150.0, 180.0, 50.0, 30.0, np.pi], # π radians = 180 degrees + ] + + df = pl.DataFrame( + {"rotated_bboxes": [test_data]}, schema={"rotated_bboxes": pl.List(pl.Array(pl.Float32, 5))} + ) + + converter_instance = RotatedBBoxFormatConverter() + + # Set up converter attributes + input_field = RotatedBBoxField( + dtype=pl.Float32, + format=RotatedBBoxFormat.CXCYWHR, + normalize=False, + semantic=Semantic.Default, + ) + output_field = RotatedBBoxField( + dtype=pl.Float32, + format=RotatedBBoxFormat.CXCYWHA, + normalize=False, + semantic=Semantic.Default, + ) + + setattr( + converter_instance, + "input_rotated_bbox", + AttributeSpec(name="rotated_bboxes", field=input_field), + ) + setattr( + converter_instance, + "output_rotated_bbox", + AttributeSpec(name="rotated_bboxes_degrees", field=output_field), + ) + + # Test filter + assert converter_instance.filter_output_spec() is True + + # Test conversion + result_df = converter_instance.convert(df) + + assert "rotated_bboxes_degrees" in result_df.columns + result_bboxes = result_df["rotated_bboxes_degrees"][0] + + # Check conversions + expected_results = [ + [50.0, 60.0, 30.0, 20.0, 0.0], # 0 radians -> 0 degrees + [100.0, 120.0, 40.0, 25.0, 90.0], # π/2 radians -> 90 degrees + [150.0, 180.0, 50.0, 30.0, 180.0], # π radians -> 180 degrees + ] + + for i, expected in enumerate(expected_results): + assert np.allclose(result_bboxes[i], expected, atol=1e-6) + + +def test_rotated_bbox_format_converter_degrees_to_radians(): + """Test RotatedBBoxFormatConverter conversion from degrees to radians.""" + # Create test data: (cx, cy, w, h, a) format with degrees + test_data = [ + [50.0, 60.0, 30.0, 20.0, 0.0], # 0 degrees + [100.0, 120.0, 40.0, 25.0, 90.0], # 90 degrees + [150.0, 180.0, 50.0, 30.0, 180.0], # 180 degrees + ] + + df = pl.DataFrame( + {"rotated_bboxes": [test_data]}, schema={"rotated_bboxes": pl.List(pl.Array(pl.Float32, 5))} + ) + + converter_instance = RotatedBBoxFormatConverter() + + # Set up converter attributes + input_field = RotatedBBoxField( + dtype=pl.Float32, + format=RotatedBBoxFormat.CXCYWHA, + normalize=False, + semantic=Semantic.Default, + ) + output_field = RotatedBBoxField( + dtype=pl.Float32, + format=RotatedBBoxFormat.CXCYWHR, + normalize=False, + semantic=Semantic.Default, + ) + + setattr( + converter_instance, + "input_rotated_bbox", + AttributeSpec(name="rotated_bboxes", field=input_field), + ) + setattr( + converter_instance, + "output_rotated_bbox", + AttributeSpec(name="rotated_bboxes_radians", field=output_field), + ) + + # Test filter + assert converter_instance.filter_output_spec() is True + + # Test conversion + result_df = converter_instance.convert(df) + + assert "rotated_bboxes_radians" in result_df.columns + result_bboxes = result_df["rotated_bboxes_radians"][0] + + # Check conversions + expected_results = [ + [50.0, 60.0, 30.0, 20.0, 0.0], # 0 degrees -> 0 radians + [100.0, 120.0, 40.0, 25.0, np.pi / 2], # 90 degrees -> π/2 radians + [150.0, 180.0, 50.0, 30.0, np.pi], # 180 degrees -> π radians + ] + + for i, expected in enumerate(expected_results): + assert np.allclose(result_bboxes[i], expected, atol=1e-6) + + +def test_polygon_format_converter_xy_to_yx(): + """Test PolygonFormatConverter conversion from XY to YX format.""" + # Create test polygon data: [(x1, y1), (x2, y2), (x3, y3)] + polygon1 = [[10.0, 20.0], [30.0, 40.0], [50.0, 60.0]] + polygon2 = [[100.0, 200.0], [300.0, 400.0]] + + df = pl.DataFrame( + {"polygons": [[polygon1, polygon2]]}, + schema={"polygons": pl.List(pl.List(pl.Array(pl.Float32, 2)))}, + ) + + converter_instance = PolygonFormatConverter() + + # Set up converter attributes + input_field = PolygonField( + dtype=pl.Float32, format=PolygonFormat.XY, normalize=False, semantic=Semantic.Default + ) + output_field = PolygonField( + dtype=pl.Float32, format=PolygonFormat.YX, normalize=False, semantic=Semantic.Default + ) + + setattr(converter_instance, "input_polygon", AttributeSpec(name="polygons", field=input_field)) + setattr( + converter_instance, "output_polygon", AttributeSpec(name="polygons_yx", field=output_field) + ) + + # Test filter + assert converter_instance.filter_output_spec() is True + + # Test conversion + result_df = converter_instance.convert(df) + + assert "polygons_yx" in result_df.columns + result_polygons = result_df["polygons_yx"][0] + + # Check that coordinates are swapped: (x, y) -> (y, x) + expected_polygon1 = [[20.0, 10.0], [40.0, 30.0], [60.0, 50.0]] # Swapped coordinates + expected_polygon2 = [[200.0, 100.0], [400.0, 300.0]] # Swapped coordinates + + assert len(result_polygons) == 2 + assert np.allclose(result_polygons[0], expected_polygon1) + assert np.allclose(result_polygons[1], expected_polygon2) diff --git a/tests/unit/experimental/test_schema.py b/tests/unit/experimental/test_schema.py index 1ae51c1f34..60c0eae2ff 100644 --- a/tests/unit/experimental/test_schema.py +++ b/tests/unit/experimental/test_schema.py @@ -333,17 +333,6 @@ def test_rotated_bbox_field_creation(): assert field.semantic == Semantic.Default -def test_rotated_bbox_field_creation_defaults(): - """Test RotatedBBoxField creation with default values.""" - field = rotated_bbox_field(dtype=pl.Float32) - - assert isinstance(field, RotatedBBoxField) - assert field.dtype == pl.Float32 - assert field.format == "cxcywhr" # Default format - assert field.normalize is False # Default normalization - assert field.semantic == Semantic.Default # Default semantic - - def test_rotated_bbox_field_polars_schema(): """Test RotatedBBoxField Polars schema generation.""" field = rotated_bbox_field(dtype=pl.Float32) @@ -651,17 +640,6 @@ def test_polygon_field_creation(): assert field.semantic == Semantic.Default -def test_polygon_field_creation_defaults(): - """Test PolygonField creation with default values.""" - field = polygon_field(dtype=pl.Float32) - - assert isinstance(field, PolygonField) - assert field.dtype == pl.Float32 - assert field.format == "xy" # Default format - assert field.normalize is False # Default normalization - assert field.semantic == Semantic.Default # Default semantic - - def test_polygon_field_polars_schema(): """Test PolygonField Polars schema generation.""" field = polygon_field(dtype=pl.Float32)