Fix vertical flip orientation

AntoineSimoulin · AntoineSimoulin · commit 3b4100c072f3 · 2025-05-27T15:17:15.000-07:00
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -588,14 +588,14 @@ def affine_rotated_bounding_boxes(bounding_boxes):
         transformed_points = np.matmul(points, affine_matrix.astype(points.dtype).T)
         output = torch.Tensor(
             [
+                float(transformed_points[1, 0]),
+                float(transformed_points[1, 1]),
                 float(transformed_points[0, 0]),
                 float(transformed_points[0, 1]),
                 float(transformed_points[3, 0]),
                 float(transformed_points[3, 1]),
                 float(transformed_points[2, 0]),
                 float(transformed_points[2, 1]),
-                float(transformed_points[1, 0]),
-                float(transformed_points[1, 1]),
             ]
         )
 
@@ -618,13 +618,20 @@ def affine_rotated_bounding_boxes(bounding_boxes):
         return output.to(dtype=dtype, device=device)
 
     return tv_tensors.BoundingBoxes(
-        torch.cat([affine_rotated_bounding_boxes(b) for b in bounding_boxes.reshape(-1, 5 if format != tv_tensors.BoundingBoxFormat.XYXYXYXY else 8).unbind()], dim=0).reshape(
-            bounding_boxes.shape
-        ),
+        torch.cat(
+            [
+                affine_rotated_bounding_boxes(b)
+                for b in bounding_boxes.reshape(
+                    -1, 5 if format != tv_tensors.BoundingBoxFormat.XYXYXYXY else 8
+                ).unbind()
+            ],
+            dim=0,
+        ).reshape(bounding_boxes.shape),
         format=format,
         canvas_size=canvas_size,
     )
 
+
 class TestResize:
     INPUT_SIZE = (17, 11)
     OUTPUT_SIZES = [17, [17], (17,), None, [12, 13], (12, 13)]
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
@@ -163,15 +163,20 @@ def vertical_flip_bounding_boxes(
         bounding_boxes[:, 1].sub_(canvas_size[0]).neg_()
     elif format == tv_tensors.BoundingBoxFormat.XYXYXYXY:
         bounding_boxes[:, 1::2].sub_(canvas_size[0]).neg_()
-        bounding_boxes = bounding_boxes[:, [0, 1, 6, 7, 4, 5, 2, 3]]
+        bounding_boxes = bounding_boxes[:, [2, 3, 0, 1, 6, 7, 4, 5]]
     elif format == tv_tensors.BoundingBoxFormat.XYWHR:
-        bounding_boxes[:, 1].sub_(canvas_size[0]).neg_()
-        bounding_boxes = bounding_boxes[:, [0, 1, 3, 2, 4]]
-        bounding_boxes[:, -1].sub_(90).neg_()
+        dtype = bounding_boxes.dtype
+        if not torch.is_floating_point(bounding_boxes):
+            # Casting to float to support cos and sin computations.
+            bounding_boxes = bounding_boxes.to(torch.float64)
+        angle_rad = bounding_boxes[:, 4].mul(torch.pi).div(180)
+        bounding_boxes[:, 1].sub_(bounding_boxes[:, 2].mul(angle_rad.sin())).sub_(canvas_size[0]).neg_()
+        bounding_boxes[:, 0].add_(bounding_boxes[:, 2].mul(angle_rad.cos()))
+        bounding_boxes[:, 4].neg_().add_(180)
+        bounding_boxes = bounding_boxes.to(dtype)
     else:  # format == tv_tensors.BoundingBoxFormat.CXCYWHR:
         bounding_boxes[:, 1].sub_(canvas_size[0]).neg_()
-        bounding_boxes = bounding_boxes[:, [0, 1, 3, 2, 4]]
-        bounding_boxes[:, -1].sub_(90).neg_()
+        bounding_boxes[:, 4].neg_().add_(180)
 
     return bounding_boxes.reshape(shape)