Fix memory leak and input sizing for SelfieSegmentation (#264)

nasif-co · web-flow · commit 34969ad87852 · 2025-07-31T10:14:01.000-04:00
* Dispose selfiesegmentation tensors

Adds tensor disposal code taken from the official tensorflow examples. Without it, ml5.tf.memory().numTensors shows the number of tensors in memory when running the SelfieSegmentation model kept increasing indefinitely. This wasn't an issue with the BodyPix model.

* Control selfieSegmentation input size

Added a helper function in the imageUtilities that resizes an image from an HTML img/video element to the given dimensions, and returns it as a tensor. Used it in SelfieSegmentation to make sure the input is the right size and therefore the output received by the user is not of an unexpected size.

* Return resized tensor immediately

* Adapt to suggested comment format

* Add more descriptive comment to the fix
diff --git a/src/BodySegmentation/index.js b/src/BodySegmentation/index.js
@@ -16,6 +16,7 @@ import BODYPIX_PALETTE from "./BODYPIX_PALETTE";
 import { mediaReady } from "../utils/imageUtilities";
 import handleOptions from "../utils/handleOptions";
 import { handleModelName } from "../utils/handleOptions";
+import { resizeImageAsTensor } from "../utils/imageUtilities";
 
 class BodySegmentation {
   /**
@@ -209,11 +210,27 @@ class BodySegmentation {
 
     await mediaReady(image, false);
 
+    let inputForSegmenter = image;
+
+    // If using SelfieSegmentation, make sure the input is actually the size the user expects
+    // this addresses a sizing bug not present in BodyPix.
+    if (
+      this.modelName == "SelfieSegmentation" &&
+      (inputForSegmenter instanceof HTMLVideoElement ||
+        inputForSegmenter instanceof HTMLImageElement)
+    ) {
+      inputForSegmenter = resizeImageAsTensor(image, image.width, image.height);
+    }
+
     const segmentation = await this.model.segmentPeople(
-      image,
+      inputForSegmenter,
       this.runtimeConfig
     );
 
+    if (inputForSegmenter.dispose) {
+      inputForSegmenter.dispose();
+    }
+
     const result = {};
 
     // add array of raw values to output
@@ -259,6 +276,11 @@ class BodySegmentation {
     }
     result.mask = this.generateP5Image(result.maskImageData);
 
+    // Dispose segmentation tensors
+    segmentation.map((singleSegmentation) =>
+      singleSegmentation.mask.toTensor().then((tensor) => tensor.dispose())
+    );
+
     if (callback) callback(result);
     return result;
   }
@@ -310,11 +332,31 @@ class BodySegmentation {
   async detectLoop() {
     await mediaReady(this.detectMedia, false);
     while (!this.signalStop) {
+      let inputForSegmenter = this.detectMedia;
+
+      // If using SelfieSegmentation, make sure the input is actually the size the user expects.
+      // this addresses a sizing bug not present in BodyPix.
+      if (
+        this.modelName == "SelfieSegmentation" &&
+        (inputForSegmenter instanceof HTMLVideoElement ||
+          inputForSegmenter instanceof HTMLImageElement)
+      ) {
+        inputForSegmenter = resizeImageAsTensor(
+          this.detectMedia,
+          this.detectMedia.width,
+          this.detectMedia.height
+        );
+      }
+
       const segmentation = await this.model.segmentPeople(
-        this.detectMedia,
+        inputForSegmenter,
         this.runtimeConfig
       );
 
+      if (inputForSegmenter.dispose) {
+        inputForSegmenter.dispose();
+      }
+
       const result = {};
 
       // add array of raw values to output
@@ -361,6 +403,11 @@ class BodySegmentation {
       }
       result.mask = this.generateP5Image(result.maskImageData);
 
+      // Dispose segmentation tensors
+      segmentation.map((singleSegmentation) =>
+        singleSegmentation.mask.toTensor().then((tensor) => tensor.dispose())
+      );
+
       this.detectCallback(result);
       await tf.nextFrame();
     }
@@ -408,7 +455,7 @@ class BodySegmentation {
 }
 
 /**
- * Factory function that returns a Facemesh instance
+ * Factory function that returns a bodySegmentation instance
  * @returns {Object} A new bodySegmentation instance
  */
 const bodySegmentation = (...inputs) => {
diff --git a/src/utils/imageUtilities.js b/src/utils/imageUtilities.js
@@ -200,6 +200,22 @@ async function mediaReady(input, nextFrame) {
   }
 }
 
+/**
+ * Useful when models are ignoring the display size of the input HTML element
+ * and instead using the intrinsic dimensions. This function will turn the
+ * input into a tensor with the given dimensions and return it.
+ * @param {HTMLImageElement | HTMLCanvasElement | HTMLVideoElement} input
+ * @param {number} width
+ * @param {number} height
+ * @return {tf.Tensor3D}
+ */
+function resizeImageAsTensor(input, width, height) {
+  return tf.tidy(() => {
+    const sourcePixelsTensor = tf.browser.fromPixels(input);
+    return tf.image.resizeBilinear(sourcePixelsTensor, [height, width]).clipByValue(0, 255);
+  });
+}
+
 export {
   array3DToImage,
   processVideo,
@@ -209,4 +225,5 @@ export {
   flipImage,
   imgToPixelArray,
   mediaReady,
+  resizeImageAsTensor
 };