Bring in old posenet code

ziyuan-linn · ziyuan-linn · commit 013b498760ec · 2023-07-17T16:30:48.000-04:00
diff --git a/examples/PoseDetection/index.html b/examples/PoseDetection/index.html
@@ -0,0 +1,26 @@
+<!--
+ Copyright (c) 2018 ml5
+ 
+ This software is released under the MIT License.
+ https://opensource.org/licenses/MIT
+-->
+
+<html>
+  <head>
+    <meta charset="UTF-8" />
+    <title>PoseNet example using p5.js</title>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/p5.js/0.6.0/p5.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/p5.js/0.6.0/addons/p5.dom.min.js"></script>
+    <script
+      src="https://unpkg.com/ml5@0.1.1/dist/ml5.min.js"
+      type="text/javascript"
+    ></script>
+    <link rel="stylesheet" type="text/css" href="style.css" />
+  </head>
+
+  <body>
+    <p id="status">Loading model...</p>
+    <script src="sketch.js"></script>
+  </body>
+</html>
diff --git a/examples/PoseDetection/sketch.js b/examples/PoseDetection/sketch.js
@@ -0,0 +1,80 @@
+// Copyright (c) 2018 ml5
+//
+// This software is released under the MIT License.
+// https://opensource.org/licenses/MIT
+
+/* ===
+ml5 Example
+PoseNet example using p5.js
+=== */
+
+let video;
+let poseNet;
+let poses = [];
+
+function setup() {
+  createCanvas(640, 480);
+  video = createCapture(VIDEO);
+  video.size(width, height);
+
+  // Create a new poseNet method with a single detection
+  poseNet = ml5.poseNet(video, modelReady);
+  // This sets up an event that fills the global variable "poses"
+  // with an array every time new poses are detected
+  poseNet.on("pose", function (results) {
+    poses = results;
+  });
+  // Hide the video element, and just show the canvas
+  video.hide();
+}
+
+function modelReady() {
+  select("#status").html("Model Loaded");
+}
+
+function draw() {
+  image(video, 0, 0, width, height);
+
+  // We can call both functions to draw all keypoints and the skeletons
+  drawKeypoints();
+  drawSkeleton();
+}
+
+// A function to draw ellipses over the detected keypoints
+function drawKeypoints() {
+  // Loop through all the poses detected
+  for (let i = 0; i < poses.length; i++) {
+    // For each pose detected, loop through all the keypoints
+    let pose = poses[i].pose;
+    for (let j = 0; j < pose.keypoints.length; j++) {
+      // A keypoint is an object describing a body part (like rightArm or leftShoulder)
+      let keypoint = pose.keypoints[j];
+      // Only draw an ellipse is the pose probability is bigger than 0.2
+      if (keypoint.score > 0.2) {
+        fill(255, 0, 0);
+        noStroke();
+        ellipse(keypoint.position.x, keypoint.position.y, 10, 10);
+      }
+    }
+  }
+}
+
+// A function to draw the skeletons
+function drawSkeleton() {
+  // Loop through all the skeletons detected
+  for (let i = 0; i < poses.length; i++) {
+    let skeleton = poses[i].skeleton;
+    // For every skeleton, loop through all body connections
+    for (let j = 0; j < skeleton.length; j++) {
+      let partA = skeleton[j][0];
+      let partB = skeleton[j][1];
+      stroke(255, 0, 0);
+      line(
+        partA.position.x,
+        partA.position.y,
+        partB.position.x,
+        partB.position.y
+      );
+    }
+  }
+}
diff --git a/src/PoseDetection/index.js b/src/PoseDetection/index.js
@@ -0,0 +1,210 @@
+// Copyright (c) 2018 ml5
+//
+// This software is released under the MIT License.
+// https://opensource.org/licenses/MIT
+
+/*
+PoseDetection
+Ported from pose-detection at Tensorflow.js
+*/
+
+import EventEmitter from "events";
+import * as tf from "@tensorflow/tfjs";
+import * as posenet from "@tensorflow-models/posenet";
+import callCallback from "../utils/callcallback";
+import handleArguments from "../utils/handleArguments";
+
+const DEFAULTS = {
+  architecture: "MobileNetV1", // 'MobileNetV1', 'ResNet50'
+  outputStride: 16, // 8, 16, 32
+  flipHorizontal: false, // true, false
+  minConfidence: 0.5,
+  maxPoseDetections: 5, // any number > 1
+  scoreThreshold: 0.5,
+  nmsRadius: 20, // any number > 0
+  detectionType: "multiple", // 'single'
+  inputResolution: 256, // or { width: 257, height: 200 }
+  multiplier: 0.75, // 1.01, 1.0, 0.75, or 0.50 -- only for MobileNet
+  quantBytes: 2, // 4, 2, 1
+  modelUrl: null, // url path to model
+};
+
+class PoseNet extends EventEmitter {
+  /**
+   * @typedef {Object} options
+   * @property {string} architecture - default 'MobileNetV1',
+   * @property {number} inputResolution - default 257,
+   * @property {number} outputStride - default 16
+   * @property {boolean} flipHorizontal - default false
+   * @property {number} minConfidence - default 0.5
+   * @property {number} maxPoseDetections - default 5
+   * @property {number} scoreThreshold - default 0.5
+   * @property {number} nmsRadius - default 20
+   * @property {String} detectionType - default single
+   * @property {number} nmsRadius - default 0.75,
+   * @property {number} quantBytes - default 2,
+   * @property {string} modelUrl - default null
+   */
+  /**
+   * Create a PoseNet model.
+   * @param {HTMLVideoElement || p5.Video} video  - Optional. A HTML video element or a p5 video element.
+   * @param {options} options - Optional. An object describing a model accuracy and performance.
+   * @param {String} detectionType - Optional. A String value to run 'single' or 'multiple' estimation.
+   * @param {function} callback  Optional. A function to run once the model has been loaded.
+   *    If no callback is provided, it will return a promise that will be resolved once the
+   *    model has loaded.
+   */
+  constructor(video, options, detectionType, callback) {
+    super();
+    this.video = video;
+    /**
+     * The type of detection. 'single' or 'multiple'
+     * @type {String}
+     * @public
+     */
+    this.modelUrl = options.modelUrl || null;
+    this.architecture = options.architecture || DEFAULTS.architecture;
+    this.detectionType =
+      detectionType || options.detectionType || DEFAULTS.detectionType;
+    this.outputStride = options.outputStride || DEFAULTS.outputStride;
+    this.flipHorizontal = options.flipHorizontal || DEFAULTS.flipHorizontal;
+    this.scoreThreshold = options.scoreThreshold || DEFAULTS.scoreThreshold;
+    this.minConfidence = options.minConfidence || DEFAULTS.minConfidence;
+    this.maxPoseDetections =
+      options.maxPoseDetections || DEFAULTS.maxPoseDetections;
+    this.multiplier = options.multiplier || DEFAULTS.multiplier;
+    this.inputResolution = options.inputResolution || DEFAULTS.inputResolution;
+    this.quantBytes = options.quantBytes || DEFAULTS.quantBytes;
+    this.nmsRadius = options.nmsRadius || DEFAULTS.nmsRadius;
+    this.ready = callCallback(this.load(), callback);
+    // this.then = this.ready.then;
+  }
+
+  async load() {
+    let modelJson;
+    if (this.architecture.toLowerCase() === "mobilenetv1") {
+      modelJson = {
+        architecture: this.architecture,
+        outputStride: this.outputStride,
+        inputResolution: this.inputResolution,
+        multiplier: this.multiplier,
+        quantBytes: this.quantBytes,
+        modelUrl: this.modelUrl,
+      };
+    } else {
+      modelJson = {
+        architecture: this.architecture,
+        outputStride: this.outputStride,
+        inputResolution: this.inputResolution,
+        quantBytes: this.quantBytes,
+      };
+    }
+
+    this.net = await posenet.load(modelJson);
+
+    if (this.video) {
+      if (this.video.readyState === 0) {
+        await new Promise((resolve) => {
+          this.video.onloadeddata = () => resolve();
+        });
+      }
+      if (this.detectionType === "single") {
+        this.singlePose();
+      } else {
+        this.multiPose();
+      }
+    }
+    return this;
+  }
+
+  skeleton(keypoints, confidence = this.minConfidence) {
+    return posenet.getAdjacentKeyPoints(keypoints, confidence);
+  }
+
+  // eslint-disable-next-line class-methods-use-this
+  mapParts(pose) {
+    const newPose = JSON.parse(JSON.stringify(pose));
+    newPose.keypoints.forEach((keypoint) => {
+      newPose[keypoint.part] = {
+        x: keypoint.position.x,
+        y: keypoint.position.y,
+        confidence: keypoint.score,
+      };
+    });
+    return newPose;
+  }
+
+  /**
+   * Given an image or video, returns an array of objects containing pose estimations
+   *    using single or multi-pose detection.
+   * @param {HTMLVideoElement || p5.Video || function} inputOr
+   * @param {function} cb
+   */
+  async singlePose(inputOr, cb) {
+    const { image: input, callback } = handleArguments(this.video, inputOr, cb);
+
+    const pose = await this.net.estimateSinglePose(input, {
+      flipHorizontal: this.flipHorizontal,
+    });
+    const poseWithParts = this.mapParts(pose);
+    const result = [
+      { pose: poseWithParts, skeleton: this.skeleton(pose.keypoints) },
+    ];
+    this.emit("pose", result);
+
+    if (this.video) {
+      return tf.nextFrame().then(() => this.singlePose());
+    }
+
+    if (typeof callback === "function") {
+      callback(result);
+    }
+
+    return result;
+  }
+
+  /**
+   * Given an image or video, returns an array of objects containing pose
+   *    estimations using single or multi-pose detection.
+   * @param {HTMLVideoElement || p5.Video || function} inputOr
+   * @param {function} cb
+   */
+  async multiPose(inputOr, cb) {
+    const { image: input, callback } = handleArguments(this.video, inputOr, cb);
+
+    const poses = await this.net.estimateMultiplePoses(input, {
+      flipHorizontal: this.flipHorizontal,
+      maxDetections: this.maxPoseDetections,
+      scoreThreshold: this.scoreThreshold,
+      nmsRadius: this.nmsRadius,
+    });
+
+    const posesWithParts = poses.map((pose) => this.mapParts(pose));
+    const result = posesWithParts.map((pose) => ({
+      pose,
+      skeleton: this.skeleton(pose.keypoints),
+    }));
+    this.emit("pose", result);
+    if (this.video) {
+      return tf.nextFrame().then(() => this.multiPose());
+    }
+
+    if (typeof callback === "function") {
+      callback(result);
+    }
+
+    return result;
+  }
+}
+
+const poseDetection = (...inputs) => {
+  const {
+    video,
+    options = {},
+    callback,
+    string: detectionType,
+  } = handleArguments(...inputs);
+  return new PoseNet(video, options, detectionType, callback);
+};
+
+export default poseDetection;
diff --git a/src/PoseDetection/index.test.js b/src/PoseDetection/index.test.js
@@ -0,0 +1,60 @@
+// Copyright (c) 2018 ml5
+//
+// This software is released under the MIT License.
+// https://opensource.org/licenses/MIT
+
+import { asyncLoadImage } from "../utils/testingUtils";
+import poseNet from "./index";
+
+const POSENET_IMG =
+  "https://github.com/ml5js/ml5-adjacent/raw/master/02_ImageClassification_Video/starter.png";
+
+const POSENET_DEFAULTS = {
+  architecture: "MobileNetV1",
+  outputStride: 16,
+  flipHorizontal: false,
+  minConfidence: 0.5,
+  maxPoseDetections: 5,
+  scoreThreshold: 0.5,
+  nmsRadius: 20,
+  detectionType: "multiple",
+  inputResolution: 256,
+  multiplier: 0.75,
+  quantBytes: 2,
+};
+
+describe("PoseNet", () => {
+  let net;
+
+  beforeAll(async () => {
+    jest.setTimeout(10000);
+    net = await poseNet();
+  });
+
+  it("instantiates poseNet", () => {
+    expect(net.architecture).toBe(POSENET_DEFAULTS.architecture);
+    expect(net.outputStride).toBe(POSENET_DEFAULTS.outputStride);
+    expect(net.inputResolution).toBe(POSENET_DEFAULTS.inputResolution);
+    expect(net.multiplier).toBe(POSENET_DEFAULTS.multiplier);
+    expect(net.quantBytes).toBe(POSENET_DEFAULTS.quantBytes);
+  });
+
+  it("detects poses in image", async () => {
+    const image = await asyncLoadImage(POSENET_IMG);
+
+    // Result should be an array with a single object containing pose and skeleton.
+    const result = await net.singlePose(image);
+    expect(result).toHaveLength(1);
+    expect(result[0]).toHaveProperty("pose");
+    expect(result[0]).toHaveProperty("skeleton");
+
+    // Verify a known outcome.
+    const nose = result[0].pose.keypoints.find(
+      (keypoint) => keypoint.part === "nose"
+    );
+    expect(nose).toBeTruthy();
+    expect(nose.position.x).toBeCloseTo(448.6, 0);
+    expect(nose.position.y).toBeCloseTo(255.9, 0);
+    expect(nose.score).toBeCloseTo(0.999);
+  });
+});
diff --git a/src/index.js b/src/index.js
@@ -1,5 +1,6 @@
 import neuralNetwork from "./NeuralNetwork";
 import handpose from "./Handpose";
+import poseDetection from "./PoseDetection";
 import * as tf from "@tensorflow/tfjs";
 import * as tfvis from "@tensorflow/tfjs-vis";
 import p5Utils from "./utils/p5Utils";
@@ -11,5 +12,6 @@ export default Object.assign(
     tfvis,
     neuralNetwork,
     handpose,
+    poseDetection,
   }
 );