Implement OBB

james-kwong · james-kwong · commit 6195d2d7cb6f · 2025-09-28T20:58:31.000-07:00
diff --git a/docs/source/docs/objectDetection/opi.md b/docs/source/docs/objectDetection/opi.md
@@ -6,7 +6,7 @@ PhotonVision runs object detection on the Orange Pi 5 by use of the RKNN model a
 
 ## Supported models
 
-PhotonVision currently ONLY supports 640x640 Ultralytics YOLOv5, YOLOv8, and YOLOv11 models trained and converted to `.rknn` format for RK3588 SOCs! Other models require different post-processing code and will NOT work.
+PhotonVision currently ONLY supports 640x640 Ultralytics YOLOv5, YOLOv8, YOLOv11, and YOLOv11OBB models trained and converted to `.rknn` format for RK3588 SOCs! Other models require different post-processing code and will NOT work.
 
 ## Converting Custom Models
 
diff --git a/photon-client/src/components/settings/ObjectDetectionCard.vue b/photon-client/src/components/settings/ObjectDetectionCard.vue
@@ -304,8 +304,8 @@ const handleBulkImport = () => {
               <v-card-text>
                 <span v-if="useSettingsStore().general.supportedBackends?.includes('RKNN')"
                   >Upload a new object detection model to this device that can be used in a pipeline. Note that ONLY
-                  640x640 YOLOv5, YOLOv8, and YOLOv11 models trained and converted to `.rknn` format for RK3588 SOCs are
-                  currently supporter!</span
+                  640x640 YOLOv5, YOLOv8, YOLOv11, and YOLOv11 OBB models trained and converted to `.rknn` format for RK3588 SOCs are
+                  currently supported!</span
                 >
                 <span v-else-if="useSettingsStore().general.supportedBackends?.includes('RUBIK')"
                   >Upload a new object detection model to this device that can be used in a pipeline. Note that ONLY
@@ -344,7 +344,7 @@ const handleBulkImport = () => {
                     label="Model Version"
                     :items="
                       useSettingsStore().general.supportedBackends?.includes('RKNN')
-                        ? ['YOLOv5', 'YOLOv8', 'YOLO11']
+                        ? ['YOLOv5', 'YOLOv8', 'YOLO11', 'YOLO11OBB']
                         : ['YOLOv8', 'YOLO11']
                     "
                   />
diff --git a/photon-client/src/types/SettingTypes.ts b/photon-client/src/types/SettingTypes.ts
@@ -21,7 +21,7 @@ export interface ObjectDetectionModelProperties {
   resolutionWidth: number;
   resolutionHeight: number;
   family: "RKNN" | "RUBIK";
-  version: "YOLOV5" | "YOLOV8" | "YOLOV11";
+  version: "YOLOV5" | "YOLOV8" | "YOLOV11" | "YOLOV11OBB";
 }
 
 export interface MetricData {
diff --git a/photon-core/src/main/java/org/photonvision/common/configuration/NeuralNetworkModelManager.java b/photon-core/src/main/java/org/photonvision/common/configuration/NeuralNetworkModelManager.java
@@ -244,7 +244,8 @@ public String extension() {
     public enum Version {
         YOLOV5,
         YOLOV8,
-        YOLOV11
+        YOLOV11,
+        YOLOV11OBB
     }
 
     /**
diff --git a/photon-core/src/main/java/org/photonvision/vision/objects/Letterbox.java b/photon-core/src/main/java/org/photonvision/vision/objects/Letterbox.java
@@ -21,7 +21,9 @@
 import java.util.List;
 import org.opencv.core.Core;
 import org.opencv.core.Mat;
+import org.opencv.core.Point;
 import org.opencv.core.Rect2d;
+import org.opencv.core.RotatedRect;
 import org.opencv.core.Scalar;
 import org.opencv.core.Size;
 import org.opencv.imgproc.Imgproc;
@@ -86,19 +88,25 @@ public static Letterbox letterbox(Mat frame, Mat letterboxed, Size newShape, Sca
      * @return The resized detections
      */
     public List<NeuralNetworkPipeResult> resizeDetections(List<NeuralNetworkPipeResult> unscaled) {
-        var ret = new ArrayList<NeuralNetworkPipeResult>();
+        var ret = new ArrayList<NeuralNetworkPipeResult>(unscaled.size());
 
         for (var t : unscaled) {
             var scale = 1.0 / this.scale;
             var boundingBox = t.bbox();
-            double x = (boundingBox.x - this.dx) * scale;
-            double y = (boundingBox.y - this.dy) * scale;
-            double width = boundingBox.width * scale;
-            double height = boundingBox.height * scale;
+
+            double cx = (boundingBox.center.x - this.dx) * scale;
+            double cy = (boundingBox.center.y - this.dy) * scale;
+            double width = boundingBox.size.width * scale;
+            double height = boundingBox.size.height * scale;
+
+            Point center = new Point(cx, cy);
+            Size size = new Size(width, height);
+
+            // angle is unchanged from letterbox transformation
 
             ret.add(
                     new NeuralNetworkPipeResult(
-                            new Rect2d(x, y, width, height), t.classIdx(), t.confidence()));
+                            new RotatedRect(center, size, boundingBox.angle), t.classIdx(), t.confidence()));
         }
 
         return ret;
diff --git a/photon-core/src/main/java/org/photonvision/vision/objects/RknnModel.java b/photon-core/src/main/java/org/photonvision/vision/objects/RknnModel.java
@@ -53,8 +53,9 @@ public RknnModel(ModelProperties properties) throws IllegalArgumentException {
 
         if (properties.version() != Version.YOLOV5
                 && properties.version() != Version.YOLOV8
-                && properties.version() != Version.YOLOV11) {
-            throw new IllegalArgumentException("Model version must be YOLOV5, YOLOV8, or YOLOV11");
+                && properties.version() != Version.YOLOV11
+                && properties.version() != Version.YOLOV11OBB) {
+            throw new IllegalArgumentException("Model version must be YOLOV5, YOLOV8, YOLOV11, or YOLOV11OBB");
         }
 
         this.properties = properties;
diff --git a/photon-core/src/main/java/org/photonvision/vision/opencv/Contour.java b/photon-core/src/main/java/org/photonvision/vision/opencv/Contour.java
@@ -57,6 +57,17 @@ public Contour(Rect2d box) {
                         new Point(box.x, box.y + box.height));
     }
 
+    public Contour(RotatedRect obb) {
+        Point[] pts = new Point[4];
+        for (int i = 0; i < 4; ++i) pts[i] = new Point();
+
+        obb.points(pts);
+
+        // target: tl tr br bl
+        // pts array: "The order is bottomLeft, topLeft, topRight, bottomRight."
+        this.mat = new MatOfPoint(pts[1], pts[2], pts[3], pts[0]);
+    }
+
     public MatOfPoint2f getMat2f() {
         if (mat2f == null) {
             mat2f = new MatOfPoint2f(mat.toArray());
diff --git a/photon-core/src/main/java/org/photonvision/vision/pipe/impl/FilterObjectDetectionsPipe.java b/photon-core/src/main/java/org/photonvision/vision/pipe/impl/FilterObjectDetectionsPipe.java
@@ -22,6 +22,7 @@
 import org.photonvision.common.util.numbers.DoubleCouple;
 import org.photonvision.vision.frame.FrameStaticProperties;
 import org.photonvision.vision.pipe.CVPipe;
+import org.photonvision.vision.target.TargetCalculations;
 
 public class FilterObjectDetectionsPipe
         extends CVPipe<
@@ -42,15 +43,16 @@ protected List<NeuralNetworkPipeResult> process(List<NeuralNetworkPipeResult> in
 
     private void filterContour(NeuralNetworkPipeResult contour) {
         var boc = contour.bbox();
-
+        
         // Area filtering
-        double areaPercentage = boc.area() / params.frameStaticProperties().imageArea * 100.0;
+        double areaPercentage = boc.size.area() / params.frameStaticProperties().imageArea * 100.0;
         double minAreaPercentage = params.area().getFirst();
         double maxAreaPercentage = params.area().getSecond();
         if (areaPercentage < minAreaPercentage || areaPercentage > maxAreaPercentage) return;
 
-        // Aspect ratio filtering; much simpler since always axis-aligned
-        double aspectRatio = boc.width / boc.height;
+        // Aspect Ratio Filtering.
+        double aspectRatio =
+                TargetCalculations.getAspectRatio(boc, params.isLandscape());
         if (aspectRatio < params.ratio().getFirst() || aspectRatio > params.ratio().getSecond()) return;
 
         m_filteredContours.add(contour);
diff --git a/photon-core/src/main/java/org/photonvision/vision/pipe/impl/NeuralNetworkPipeResult.java b/photon-core/src/main/java/org/photonvision/vision/pipe/impl/NeuralNetworkPipeResult.java
@@ -17,6 +17,22 @@
 
 package org.photonvision.vision.pipe.impl;
 
+import org.opencv.core.Point;
 import org.opencv.core.Rect2d;
+import org.opencv.core.RotatedRect;
+import org.opencv.core.Size;
 
-public record NeuralNetworkPipeResult(Rect2d bbox, int classIdx, double confidence) {}
+public record NeuralNetworkPipeResult(RotatedRect bbox, int classIdx, double confidence) {
+    public NeuralNetworkPipeResult(Rect2d rect, int classIdx, double confidence) {        
+        // turn the axis-aligned rect into a RotatedRect with angle 0 degrees
+        this(
+            new RotatedRect(
+                new Point(rect.x + (rect.width) / 2, rect.y + (rect.height) / 2), 
+                new Size(rect.width, rect.height), 
+                0.0
+            ),
+            classIdx,
+            confidence
+        );
+    }
+}
diff --git a/photon-server/src/main/java/org/photonvision/server/RequestHandler.java b/photon-server/src/main/java/org/photonvision/server/RequestHandler.java
@@ -570,6 +570,7 @@ public static void onImportObjectDetectionModelRequest(Context ctx) {
                         case "YOLOv5" -> NeuralNetworkModelManager.Version.YOLOV5;
                         case "YOLOv8" -> NeuralNetworkModelManager.Version.YOLOV8;
                         case "YOLO11" -> NeuralNetworkModelManager.Version.YOLOV11;
+                        case "YOLO11OBB" -> NeuralNetworkModelManager.Version.YOLOV11OBB;
                             // Add more versions as necessary for new models
                         default -> {
                             ctx.status(400);

Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ export interface ObjectDetectionModelProperties {`
`21`	`21`	`resolutionWidth: number;`
`22`	`22`	`resolutionHeight: number;`
`23`	`23`	`family: "RKNN" \| "RUBIK";`
`24`		`- version: "YOLOV5" \| "YOLOV8" \| "YOLOV11";`
	`24`	`+ version: "YOLOV5" \| "YOLOV8" \| "YOLOV11" \| "YOLOV11OBB";`
`25`	`25`	`}`
`26`	`26`
`27`	`27`	`export interface MetricData {`
Original file line number	Diff line number	Diff line change
`@@ -244,7 +244,8 @@ public String extension() {`
`244`	`244`	`public enum Version {`
`245`	`245`	`YOLOV5,`
`246`	`246`	`YOLOV8,`
`247`		`- YOLOV11`
	`247`	`+ YOLOV11,`
	`248`	`+ YOLOV11OBB`
`248`	`249`	`}`
`249`	`250`
`250`	`251`	`/**`