Add custom models (#1687)

samfreund · web-flow · commit 27684eef6006 · 2025-01-08T11:44:06.000-07:00
diff --git a/docs/source/docs/objectDetection/about-object-detection.md b/docs/source/docs/objectDetection/about-object-detection.md
@@ -4,16 +4,16 @@
 
 PhotonVision supports object detection using neural network accelerator hardware built into Orange Pi 5/5+ coprocessors. The Neural Processing Unit, or NPU, is [used by PhotonVision](https://github.com/PhotonVision/rknn_jni/tree/main) to massively accelerate certain math operations like those needed for running ML-based object detection.
 
-For the 2025 season, PhotonVision does not currently ship with a pre-trained detector.  If teams are interested in using object detection, they can follow the custom process outlined {ref}`below <docs/objectDetection/about-object-detection:Uploading Custom Models>`.
+For the 2025 season, PhotonVision does not currently ship with a pre-trained detector. If teams are interested in using object detection, they can follow the custom process outlined {ref}`below <docs/objectDetection/about-object-detection:Uploading Custom Models>`.
 
 ## Tracking Objects
 
 Before you get started with object detection, ensure that you have followed the previous sections on installation, wiring, and networking. Next, open the Web UI, go to the top right card, and switch to the “Object Detection” type. You should see a screen similar to the image above.
 
-PhotonVision currently ships with a NOTE detector based on a [YOLOv5 model](https://docs.ultralytics.com/yolov5/). This model is trained to detect one or more object "classes" (such as cars, stoplights, or in our case, NOTES) in an input image. For each detected object, the model outputs a bounding box around where in the image the object is located, what class the object belongs to, and a unitless confidence between 0 and 1.
+PhotonVision does not currently ship with a pretrained model. Models are trained to detect one or more object "classes" (such as cars, stoplights) in an input image. For each detected object, the model outputs a bounding box around where in the image the object is located, what class the object belongs to, and a unitless confidence between 0 and 1.
 
 :::{note}
-This model output means that while its fairly easy to say that "this rectangle probably contains a NOTE", we don't have any information about the NOTE's orientation or location. Further math in user code would be required to make estimates about where an object is physically located relative to the camera.
+This model output means that while its fairly easy to say that "this rectangle probably contains an object", we don't have any information about the object's orientation or location. Further math in user code would be required to make estimates about where an object is physically located relative to the camera.
 :::
 
 ## Tuning and Filtering
@@ -40,7 +40,11 @@ Coming soon!
 ## Uploading Custom Models
 
 :::{warning}
-PhotonVision currently ONLY supports YOLOv5 models trained and converted to `.rknn` format for RK3588 CPUs! Other models require different post-processing code and will NOT work. The model conversion process is also highly particular. Proceed with care.
+PhotonVision currently ONLY supports 640x640 YOLOv5 & YOLOv8 models trained and converted to `.rknn` format for RK3588 CPUs! Other models require different post-processing code and will NOT work. The model conversion process is also highly particular. Proceed with care.
 :::
 
-Use a program like WinSCP or FileZilla to access your coprocessor's filesystem, and copy the new `.rknn` model file into /home/pi. Next, SSH into the coprocessor and `sudo mv /path/to/new/model.rknn /opt/photonvision/photonvision_config/models/NEW-MODEL-NAME.rknn`. Repeat this process with the labels file, which should contain one line per label the model outputs with no training newline. Next, restart PhotonVision via the web UI.
+In the settings, under `Device Control`, there's an option to upload a new object detection model. Naming convention
+should be `name-verticalResolution-horizontalResolution-modelType`. Additionally, the labels
+file ought to have the same name as the RKNN file, with `-labels` appended to the end. For example, if the
+RKNN file is named `note-640-640-yolov5s.rknn`, the labels file should be named
+`note-640-640-yolov5s-labels.txt`.
diff --git a/photon-client/src/components/settings/ObjectDetectionCard.vue b/photon-client/src/components/settings/ObjectDetectionCard.vue
@@ -0,0 +1,188 @@
+<script setup lang="ts">
+import { ref, computed } from "vue";
+import axios from "axios";
+import { useStateStore } from "@/stores/StateStore";
+import { useSettingsStore } from "@/stores/settings/GeneralSettingsStore";
+
+const showObjectDetectionImportDialog = ref(false);
+const importRKNNFile = ref<File | null>(null);
+const importLabelsFile = ref<File | null>(null);
+
+const handleObjectDetectionImport = () => {
+  if (importRKNNFile.value === null || importLabelsFile.value === null) return;
+
+  const formData = new FormData();
+  formData.append("rknn", importRKNNFile.value);
+  formData.append("labels", importLabelsFile.value);
+
+  useStateStore().showSnackbarMessage({
+    message: "Importing Object Detection Model...",
+    color: "secondary",
+    timeout: -1
+  });
+
+  axios
+    .post("/utils/importObjectDetectionModel", formData, {
+      headers: { "Content-Type": "multipart/form-data" }
+    })
+    .then((response) => {
+      useStateStore().showSnackbarMessage({
+        message: response.data.text || response.data,
+        color: "success"
+      });
+    })
+    .catch((error) => {
+      if (error.response) {
+        useStateStore().showSnackbarMessage({
+          color: "error",
+          message: error.response.data.text || error.response.data
+        });
+      } else if (error.request) {
+        useStateStore().showSnackbarMessage({
+          color: "error",
+          message: "Error while trying to process the request! The backend didn't respond."
+        });
+      } else {
+        useStateStore().showSnackbarMessage({
+          color: "error",
+          message: "An error occurred while trying to process the request."
+        });
+      }
+    });
+
+  showObjectDetectionImportDialog.value = false;
+  importRKNNFile.value = null;
+  importLabelsFile.value = null;
+};
+
+// Filters out models that are not supported by the current backend, and returns a flattened list.
+const supportedModels = computed(() => {
+  const { availableModels, supportedBackends } = useSettingsStore().general;
+  return supportedBackends.flatMap((backend) => availableModels[backend] || []);
+});
+</script>
+
+<template>
+  <v-card dark class="mb-3" style="background-color: #006492">
+    <v-card-title class="pa-6">Object Detection</v-card-title>
+    <div class="pa-6 pt-0">
+      <v-row>
+        <v-col cols="12 ">
+          <v-btn color="secondary" @click="() => (showObjectDetectionImportDialog = true)" class="justify-center">
+            <v-icon left class="open-icon"> mdi-import </v-icon>
+            <span class="open-label">Import New Model</span>
+          </v-btn>
+          <v-dialog
+            v-model="showObjectDetectionImportDialog"
+            width="600"
+            @input="
+              () => {
+                importRKNNFile = null;
+                importLabelsFile = null;
+              }
+            "
+          >
+            <v-card color="primary" dark>
+              <v-card-title>Import New Object Detection Model</v-card-title>
+              <v-card-text>
+                Upload a new object detection model to this device that can be used in a pipeline. Naming convention
+                should be <code>name-verticalResolution-horizontalResolution-modelType</code>. Additionally, the labels
+                file ought to have the same name as the RKNN file, with <code>-labels</code> appended to the end. For
+                example, if the RKNN file is named <code>note-640-640-yolov5s.rknn</code>, the labels file should be
+                named <code>note-640-640-yolov5s-labels.txt</code>. Note that ONLY 640x640 YOLOv5 & YOLOv8 models
+                trained and converted to `.rknn` format for RK3588 CPUs are currently supported!
+                <v-row class="mt-6 ml-4 mr-8">
+                  <v-file-input label="RKNN File" v-model="importRKNNFile" accept=".rknn" />
+                </v-row>
+                <v-row class="mt-6 ml-4 mr-8">
+                  <v-file-input label="Labels File" v-model="importLabelsFile" accept=".txt" />
+                </v-row>
+                <v-row
+                  class="mt-12 ml-8 mr-8 mb-1"
+                  style="display: flex; align-items: center; justify-content: center"
+                  align="center"
+                >
+                  <v-btn
+                    color="secondary"
+                    :disabled="importRKNNFile === null || importLabelsFile === null"
+                    @click="handleObjectDetectionImport"
+                  >
+                    <v-icon left class="open-icon"> mdi-import </v-icon>
+                    <span class="open-label">Import Object Detection Model</span>
+                  </v-btn>
+                </v-row>
+              </v-card-text>
+            </v-card>
+          </v-dialog>
+        </v-col>
+      </v-row>
+      <v-row>
+        <v-col cols="12">
+          <v-simple-table fixed-header height="100%" dense dark>
+            <thead style="font-size: 1.25rem">
+              <tr>
+                <th class="text-left">Available Models</th>
+              </tr>
+            </thead>
+            <tbody>
+              <tr v-for="model in supportedModels" :key="model">
+                <td>{{ model }}</td>
+              </tr>
+            </tbody>
+          </v-simple-table>
+        </v-col>
+      </v-row>
+    </div>
+  </v-card>
+</template>
+
+<style scoped lang="scss">
+.v-btn {
+  width: 100%;
+}
+@media only screen and (max-width: 351px) {
+  .open-icon {
+    margin: 0 !important;
+  }
+  .open-label {
+    display: none;
+  }
+}
+.v-data-table {
+  width: 100%;
+  height: 100%;
+  text-align: center;
+  background-color: #006492 !important;
+
+  th,
+  td {
+    background-color: #006492 !important;
+    font-size: 1rem !important;
+    color: white !important;
+  }
+
+  td {
+    font-family: monospace !important;
+  }
+
+  tbody :hover td {
+    background-color: #005281 !important;
+  }
+
+  ::-webkit-scrollbar {
+    width: 0;
+    height: 0.55em;
+    border-radius: 5px;
+  }
+
+  ::-webkit-scrollbar-track {
+    -webkit-box-shadow: inset 0 0 6px rgba(0, 0, 0, 0.3);
+    border-radius: 10px;
+  }
+
+  ::-webkit-scrollbar-thumb {
+    background-color: #ffd843;
+    border-radius: 10px;
+  }
+}
+</style>
diff --git a/photon-client/src/views/GeneralSettingsView.vue b/photon-client/src/views/GeneralSettingsView.vue
@@ -1,6 +1,7 @@
 <script setup lang="ts">
 import MetricsCard from "@/components/settings/MetricsCard.vue";
 import DeviceControlCard from "@/components/settings/DeviceControlCard.vue";
+import ObjectDetectionCard from "@/components/settings/ObjectDetectionCard.vue";
 import NetworkingCard from "@/components/settings/NetworkingCard.vue";
 import LightingControlCard from "@/components/settings/LEDControlCard.vue";
 import { useSettingsStore } from "@/stores/settings/GeneralSettingsStore";
@@ -12,6 +13,7 @@ import ApriltagControlCard from "@/components/settings/ApriltagControlCard.vue";
     <MetricsCard />
     <DeviceControlCard />
     <NetworkingCard />
+    <ObjectDetectionCard v-if="useSettingsStore().general.supportedBackends.length > 0" />
     <LightingControlCard v-if="useSettingsStore().lighting.supported" />
     <ApriltagControlCard />
   </div>
diff --git a/photon-server/src/main/java/org/photonvision/server/RequestHandler.java b/photon-server/src/main/java/org/photonvision/server/RequestHandler.java
@@ -29,6 +29,7 @@
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Optional;
+import java.util.regex.Pattern;
 import javax.imageio.ImageIO;
 import org.apache.commons.io.FileUtils;
 import org.opencv.core.Mat;
@@ -37,6 +38,7 @@
 import org.opencv.imgcodecs.Imgcodecs;
 import org.photonvision.common.configuration.ConfigManager;
 import org.photonvision.common.configuration.NetworkConfig;
+import org.photonvision.common.configuration.NeuralNetworkModelManager;
 import org.photonvision.common.dataflow.DataChangeDestination;
 import org.photonvision.common.dataflow.DataChangeService;
 import org.photonvision.common.dataflow.events.IncomingWebSocketEvent;
@@ -98,7 +100,8 @@ public static void onSettingsImportRequest(Context ctx) {
 
         ConfigManager.getInstance().setWriteTaskEnabled(false);
         ConfigManager.getInstance().disableFlushOnShutdown();
-        // We want to delete the -whole- zip file, so we need to teardown loggers for now
+        // We want to delete the -whole- zip file, so we need to teardown loggers for
+        // now
         logger.info("Writing new settings zip (logs may be truncated)...");
         Logger.closeAllLoggers();
         if (ConfigManager.saveUploadedSettingsZip(tempFilePath.get())) {
@@ -543,6 +546,72 @@ public static void onProgramRestartRequest(Context ctx) {
         restartProgram();
     }
 
+    public static void onObjectDetectionModelImportRequest(Context ctx) {
+        try {
+            // Retrieve the uploaded files
+            var modelFile = ctx.uploadedFile("rknn");
+            var labelsFile = ctx.uploadedFile("labels");
+
+            if (modelFile == null || labelsFile == null) {
+                ctx.status(400);
+                ctx.result(
+                        "No File was sent with the request. Make sure that the model and labels files are sent at the keys 'rknn' and 'labels'");
+                logger.error(
+                        "No File was sent with the request. Make sure that the model and labels files are sent at the keys 'rknn' and 'labels'");
+                return;
+            }
+
+            if (!modelFile.extension().contains("rknn") || !labelsFile.extension().contains("txt")) {
+                ctx.status(400);
+                ctx.result(
+                        "The uploaded files were not of type 'rknn' and 'txt'. The uploaded files should be a .rknn and .txt file.");
+                logger.error(
+                        "The uploaded files were not of type 'rknn' and 'txt'. The uploaded files should be a .rknn and .txt file.");
+                return;
+            }
+
+            // verify naming convention
+            // this check will need to be modified if different model types are added
+
+            Pattern modelPattern = Pattern.compile("^[a-zA-Z0-9]+-\\d+-\\d+-yolov[58][a-z]*\\.rknn$");
+
+            Pattern labelsPattern =
+                    Pattern.compile("^[a-zA-Z0-9]+-\\d+-\\d+-yolov[58][a-z]*-labels\\.txt$");
+
+            if (!modelPattern.matcher(modelFile.filename()).matches()
+                    || !labelsPattern.matcher(labelsFile.filename()).matches()) {
+                ctx.status(400);
+                ctx.result("The uploaded files were not named correctly.");
+                logger.error("The uploaded object detection model files were not named correctly.");
+                return;
+            }
+
+            // TODO move into neural network manager
+
+            var modelPath =
+                    Paths.get(
+                            ConfigManager.getInstance().getModelsDirectory().toString(), modelFile.filename());
+            var labelsPath =
+                    Paths.get(
+                            ConfigManager.getInstance().getModelsDirectory().toString(), labelsFile.filename());
+
+            try (FileOutputStream out = new FileOutputStream(modelPath.toFile())) {
+                modelFile.content().transferTo(out);
+            }
+
+            try (FileOutputStream out = new FileOutputStream(labelsPath.toFile())) {
+                labelsFile.content().transferTo(out);
+            }
+
+            NeuralNetworkModelManager.getInstance()
+                    .discoverModels(ConfigManager.getInstance().getModelsDirectory());
+
+            ctx.status(200).result("Successfully uploaded object detection model");
+        } catch (Exception e) {
+            ctx.status(500).result("Error processing files: " + e.getMessage());
+        }
+    }
+
     public static void onDeviceRestartRequest(Context ctx) {
         ctx.status(HardwareManager.getInstance().restartDevice() ? 204 : 500);
     }
@@ -602,7 +671,8 @@ public static void onCalibrationSnapshotRequest(Context ctx) {
             return;
         }
 
-        // encode as jpeg to save even more space. reduces size of a 1280p image from 300k to 25k
+        // encode as jpeg to save even more space. reduces size of a 1280p image from
+        // 300k to 25k
         var jpegBytes = new MatOfByte();
         Mat img = null;
         try {
diff --git a/photon-server/src/main/java/org/photonvision/server/Server.java b/photon-server/src/main/java/org/photonvision/server/Server.java
@@ -127,6 +127,9 @@ private static void start(int port) {
 
         // Utilities
         app.post("/api/utils/offlineUpdate", RequestHandler::onOfflineUpdateRequest);
+        app.post(
+                "/api/utils/importObjectDetectionModel",
+                RequestHandler::onObjectDetectionModelImportRequest);
         app.get("/api/utils/photonvision-journalctl.txt", RequestHandler::onLogExportRequest);
         app.post("/api/utils/restartProgram", RequestHandler::onProgramRestartRequest);
         app.post("/api/utils/restartDevice", RequestHandler::onDeviceRestartRequest);