Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions examples/objectDetection/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>ml5.js objectDetector Webcam Example</title>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/p5.js"></script>
<script src="../../dist/ml5.js"></script>
</head>
<body>
<main>
</main>
<script src="sketch.js"></script>
</body>
</html>
52 changes: 52 additions & 0 deletions examples/objectDetection/sketch.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright (c) 2020 ml5
//
// This software is released under the MIT License.
// https://opensource.org/licenses/MIT

/* ===
ml5 Example
Object Detection using COCOSSD
This example uses a callback pattern to create the classifier
=== */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a tiny thing, but sometime this year we adopted a "friendlier" pattern for comments at the top of an example sketch. I don't think we need the copyright and technically the license should be the ml5.js one. Here is what it looks like from a hand pose example:

/*
 * 👋 Hello! This is an ml5.js example made and shared with ❤️.
 * Learn more about the ml5.js project: https://ml5js.org/
 * ml5.js license and Code of Conduct: https://github.com/ml5js/ml5-next-gen/blob/main/LICENSE.md
 *
 * This example demonstrates face tracking on live video through ml5.faceMesh.
 */

(This also reminds me that we discussed moving the Code of Conduct to the website, I forgot where we are with that though maybe @MOQN remembers?)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll include it for sure!


let video;
let detector;
let detections = [];

function preload(){
detector = ml5.objectDetector("cocossd");
}

function setup() {
createCanvas(640, 480);

video = createCapture(VIDEO);
video.size(width, height);
video.hide();

detector.detectStart(video, gotDetections);
}

function gotDetections(results) {
detections = results;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn't over do it, but a few concise explanatory comments might be good to add.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shiffman thank you for the comments! I will update it for review.


function draw() {
image(video, 0, 0);

for (let i = 0; i < detections.length; i += 1) {
const detection = detections[i];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const detection = detections[i];
let detection = detections[i];

We're adopting p5.js style of using let even where const would be more typical JS.


// draw bounding box
stroke(0, 255, 0);
strokeWeight(4);
noFill();
rect(detection.x, detection.y, detection.width, detection.height);

// draw label
noStroke();
fill(255);
textSize(24);
text(detection.label, detection.x + 10, detection.y + 24);
}
}
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
"@mediapipe/pose": "^0.5.1675469404",
"@mediapipe/selfie_segmentation": "~0.1.0",
"@tensorflow-models/body-segmentation": "^1.0.1",
"@tensorflow-models/coco-ssd": "^2.2.3",
"@tensorflow-models/face-landmarks-detection": "1.0.5",
"@tensorflow-models/hand-pose-detection": "^2.0.0",
"@tensorflow-models/mobilenet": "^2.1.0",
Expand Down
71 changes: 71 additions & 0 deletions src/ObjectDetector/cocossd.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Copyright (c) 2019 ml5
//
// This software is released under the MIT License.
// https://opensource.org/licenses/MIT

/*
COCO-SSD Object detection model
Wraps the coco-ssd model in tfjs to be used in ml5
*/
import * as tf from "@tensorflow/tfjs";
import * as cocoSsd from "@tensorflow-models/coco-ssd";
import { mediaReady } from "../utils/imageUtilities";

const DEFAULTS = {
base: "lite_mobilenet_v2",
modelUrl: undefined,
};

export class CocoSsd {
constructor(options = {}) {
this.model = null;
this.config = {
base: options.base || DEFAULTS.base,
modelUrl: options.modelUrl || DEFAULTS.modelUrl,
};
}

async load() {
await tf.setBackend("webgl"); // this line resolves warning : performance is poor on webgpu backend
await tf.ready();

this.model = await cocoSsd.load(this.config);
return this;
}

/**
* Detect objects that are in the image/video/canvas
* @param {HTMLVideoElement|HTMLImageElement|HTMLCanvasElement|ImageData} imgToPredict - Subject of the detection.
* @returns {Array} Array of detection detections
*/
async detect(imgToPredict) {
mediaReady(imgToPredict, true);
await tf.nextFrame();

const detections = await this.model.detect(imgToPredict);
const formattedDetections = detections.map(prediction => {
return {
label: prediction.class,
confidence: prediction.score,
x: prediction.bbox[0],
y: prediction.bbox[1],
width: prediction.bbox[2],
height: prediction.bbox[3],
normalized: {
x: prediction.bbox[0] / imgToPredict.width,
y: prediction.bbox[1] / imgToPredict.height,
width: prediction.bbox[2] / imgToPredict.width,
height: prediction.bbox[3] / imgToPredict.height,
},
};
});

return formattedDetections;
}
}

export async function load(modelConfig = {}) {
const cocoSsdInstance = new CocoSsd(modelConfig);
await cocoSsdInstance.load();
return cocoSsdInstance;
}
156 changes: 156 additions & 0 deletions src/ObjectDetector/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
// Copyright (c) 2019 ml5
//
// This software is released under the MIT License.
// https://opensource.org/licenses/MIT

/*
ObjectDetection
*/

import * as cocoSsd from "./cocossd.js";
import { handleModelName } from "../utils/handleOptions";
import handleArguments from "../utils/handleArguments";
import callCallback from "../utils/callcallback";
import { mediaReady } from "../utils/imageUtilities";

const MODEL_OPTIONS = ["cocossd"]; // Expandable for other models like YOLO

class ObjectDetector {
/**
* @typedef {Object} options
* @property {number} filterBoxesThreshold - Optional. default 0.01
* @property {number} IOUThreshold - Optional. default 0.4
* @property {number} classProbThreshold - Optional. default 0.4
*/
/**
* Create ObjectDetector model. Works on video and images.
* @param {string} modelNameOrUrl - The name or the URL of the model to use. Current model name options
* are: 'YOLO' and 'CocoSsd'.
* @param {Object} options - Optional. A set of options.
* @param {function} callback - Optional. A callback function that is called once the model has loaded.
*/
constructor(modelNameOrUrl, options = {}, callback) {
this.model = null;
this.modelName = null;
this.modelToUse = null;

// flags for detectStart() and detectStop()
this.isDetecting = false;
this.signalStop = false;
this.prevCall = "";

this.modelName = handleModelName(
modelNameOrUrl,
MODEL_OPTIONS,
"cocossd",
"objectDetector"
);


switch (this.modelName) {
case "cocossd":
this.modelToUse = cocoSsd;
break;
case "yolo":
this.modelToUse = yolo;
break;
// more models... currently only cocossd is supported
default:
console.warn(`Unknown model: ${this.modelName}, defaulting to CocoSsd`);
this.modelToUse = cocoSsd;
Comment on lines 48 to 55
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Where is variable yolo and coco defined?
  2. You can move the switch logic into the wrapper function, so you pass the model directly instead a string.

}

// load model and assign ready promise
this.ready = callCallback(this.loadModel(options), callback);
}

async loadModel(options) {
if (!this.modelToUse || !this.modelToUse.load) {
throw new Error(`Model loader is missing or invalid for: ${this.modelName}`);
}

this.model = await this.modelToUse.load(options);

return this;
}

/**
* @typedef {Object} ObjectDetectorPrediction
* @property {number} x - top left x coordinate of the prediction box in pixels.
* @property {number} y - top left y coordinate of the prediction box in pixels.
* @property {number} width - width of the prediction box in pixels.
* @property {number} height - height of the prediction box in pixels.
* @property {string} label - the label given.
* @property {number} confidence - the confidence score (0 to 1).
* @property {ObjectDetectorPredictionNormalized} normalized - a normalized object of the predicition
*/

/**
* @typedef {Object} ObjectDetectorPredictionNormalized
* @property {number} x - top left x coordinate of the prediction box (0 to 1).
* @property {number} y - top left y coordinate of the prediction box (0 to 1).
* @property {number} width - width of the prediction box (0 to 1).
* @property {number} height - height of the prediction box (0 to 1).
*/

/**
* Detect objects once from the input image/video/canvas.
* @param {HTMLVideoElement|HTMLImageElement|HTMLCanvasElement|ImageData} input - Target element.
* @param {function} cb - Optional callback.
* @returns {ObjectDetectorPrediction}
*/
async detect(input, cb) {
const args = handleArguments(input, cb).require("image", "No valid image input.");
await this.ready;
return callCallback(this.model.detect(args.image), args.callback);
}

/**
* Start continuous detection on video/canvas input
* @param {HTMLVideoElement|HTMLImageElement|HTMLCanvasElement|ImageData} input - Target element.
* @param {function} callback - Callback function called with each detection result.
*/
async detectStart(input, callback) {
const args = handleArguments(input, callback).require("image", "No input provided.");

const detectFrame = async () => {
await mediaReady(args.image, true);
await callCallback(this.model.detect(args.image), args.callback);

if (!this.signalStop) {
requestAnimationFrame(detectFrame);
} else {
this.isDetecting = false;
}
};

this.signalStop = false;
if (!this.isDetecting) {
this.isDetecting = true;
detectFrame();
}

if (this.prevCall === "start") {
console.warn(
"detectStart() called again without detectStop(). Only the latest call is running."
);
}

this.prevCall = "start";
}

detectStop() {
if (this.isDetecting) {
this.signalStop = true;
}
this.prevCall = "stop";
}
}

const objectDetector = (modelNameOrUrl, optionsOrCallback, cb) => {
const { string, options = {}, callback } = handleArguments(modelNameOrUrl, optionsOrCallback, cb);
const instance = new ObjectDetector(string, options, callback);
return instance;
};

export default objectDetector;
4 changes: 3 additions & 1 deletion src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import faceMesh from "./FaceMesh";
import bodyPose from "./BodyPose";
import imageClassifier from "./ImageClassifier";
import soundClassifier from "./SoundClassifier";
import objectDetector from "./ObjectDetector";
import setBackend from "./utils/setBackend";
import bodySegmentation from "./BodySegmentation";
import communityStatement from "./utils/communityStatement";
Expand All @@ -22,6 +23,7 @@ const withPreload = {
neuralNetwork,
sentiment,
soundClassifier,
objectDetector
};

const ml5 = Object.assign({ p5Utils }, withPreload, {
Expand All @@ -36,4 +38,4 @@ p5Utils.shouldPreload(ml5, Object.keys(withPreload));

communityStatement();

export default ml5;
export default ml5;
11 changes: 11 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1731,6 +1731,16 @@ __metadata:
languageName: node
linkType: hard

"@tensorflow-models/coco-ssd@npm:^2.2.3":
version: 2.2.3
resolution: "@tensorflow-models/coco-ssd@npm:2.2.3"
peerDependencies:
"@tensorflow/tfjs-converter": ^4.10.0
"@tensorflow/tfjs-core": ^4.10.0
checksum: 10c0/3d0e54d433e388439a461f9e7b4995bcfab7825d49eaf587818800549c54adc8192dab1b5a82e0ef48d87065ce155261a3ce934000477ac0da65de6939568e2e
languageName: node
linkType: hard

"@tensorflow-models/face-landmarks-detection@npm:1.0.5":
version: 1.0.5
resolution: "@tensorflow-models/face-landmarks-detection@npm:1.0.5"
Expand Down Expand Up @@ -6537,6 +6547,7 @@ __metadata:
"@mediapipe/pose": "npm:^0.5.1675469404"
"@mediapipe/selfie_segmentation": "npm:~0.1.0"
"@tensorflow-models/body-segmentation": "npm:^1.0.1"
"@tensorflow-models/coco-ssd": "npm:^2.2.3"
"@tensorflow-models/face-landmarks-detection": "npm:1.0.5"
"@tensorflow-models/hand-pose-detection": "npm:^2.0.0"
"@tensorflow-models/mobilenet": "npm:^2.1.0"
Expand Down