- update pitch detection to #158

cvalenzuela · cvalenzuela · commit e8bb7ea23073 · 2018-07-10T03:13:43.000-04:00
- fix event emmiter in posenet
- fix constructor in yolo
- async video util
diff --git a/src/PitchDetection/index.js b/src/PitchDetection/index.js
@@ -17,29 +17,24 @@ class PitchDetection {
     this.model = model;
     this.audioContext = audioContext;
     this.stream = stream;
+    this.frequency = null;
     this.ready = callCallback(this.loadModel(model), callback);
   }
 
   async loadModel(model) {
     this.model = await tf.loadModel(`${model}/model.json`);
-    await this.initAudio();
-    return this;
-  }
-
-  initAudio() {
     if (this.audioContext) {
-      try {
-        this.processStream(this.stream);
-      } catch (e) {
-        throw new Error(`Error: Could not access microphone - ${e}`);
-      }
+      await this.processStream();
     } else {
       throw new Error('Could not access microphone - getUserMedia not available');
     }
+    return this;
   }
 
-  processStream(stream) {
-    const mic = this.audioContext.createMediaStreamSource(stream);
+  async processStream() {
+    await tf.nextFrame();
+
+    const mic = this.audioContext.createMediaStreamSource(this.stream);
     const minBufferSize = (this.audioContext.sampleRate / 16000) * 1024;
     let bufferSize = 4;
     while (bufferSize < minBufferSize) bufferSize *= 2;
@@ -58,25 +53,8 @@ class PitchDetection {
     }
   }
 
-  static resample(audioBuffer, onComplete) {
-    const interpolate = (audioBuffer.sampleRate % 16000 !== 0);
-    const multiplier = audioBuffer.sampleRate / 16000;
-    const original = audioBuffer.getChannelData(0);
-    const subsamples = new Float32Array(1024);
-    for (let i = 0; i < 1024; i += 1) {
-      if (!interpolate) {
-        subsamples[i] = original[i * multiplier];
-      } else {
-        const left = Math.floor(i * multiplier);
-        const right = left + 1;
-        const p = (i * multiplier) - left;
-        subsamples[i] = (((1 - p) * original[left]) + (p * original[right]));
-      }
-    }
-    onComplete(subsamples);
-  }
-
-  processMicrophoneBuffer(event) {
+  async processMicrophoneBuffer(event) {
+    await tf.nextFrame();
     this.results = {};
     const centMapping = tf.add(tf.linspace(0, 7180, 360), tf.tensor(1997.3794084376191));
     PitchDetection.resample(event.inputBuffer, (resampled) => {
@@ -103,14 +81,38 @@ class PitchDetection {
         const predictedCent = productSum / weightSum;
         const predictedHz = 10 * ((predictedCent / 1200.0) ** 2);
 
-        const result = (confidence > 0.5) ? `${predictedHz.toFixed(3)} +  Hz` : 'no voice';
-        this.results.result = result;
+        const frequency = (confidence > 0.5) ? predictedHz : null;
+        this.frequency = frequency;
       });
     });
   }
 
-  getResults() {
-    return this.results;
+  async getPitch(callback) {
+    await this.ready;
+    await tf.nextFrame();
+    const { frequency } = this;
+    if (callback) {
+      callback(undefined, frequency);
+    }
+    return frequency;
+  }
+
+  static resample(audioBuffer, onComplete) {
+    const interpolate = (audioBuffer.sampleRate % 16000 !== 0);
+    const multiplier = audioBuffer.sampleRate / 16000;
+    const original = audioBuffer.getChannelData(0);
+    const subsamples = new Float32Array(1024);
+    for (let i = 0; i < 1024; i += 1) {
+      if (!interpolate) {
+        subsamples[i] = original[i * multiplier];
+      } else {
+        const left = Math.floor(i * multiplier);
+        const right = left + 1;
+        const p = (i * multiplier) - left;
+        subsamples[i] = (((1 - p) * original[left]) + (p * original[right]));
+      }
+    }
+    onComplete(subsamples);
   }
 }
 
diff --git a/src/PoseNet/index.js b/src/PoseNet/index.js
@@ -39,16 +39,18 @@ class PoseNet extends EventEmitter {
   }
 
   async load() {
-    const net = await posenet.load(this.multiplier);
-    this.net = net;
-    if (this.video) {
+    this.net = await posenet.load(this.multiplier);
+
+    if (this.video && this.video.readyState === 0) {
       await new Promise((resolve) => {
-        this.video.onplay = resolve;
+        this.video.onloadeddata = () => resolve();
       });
+
       if (this.detectionType === 'single') {
-        return this.singlePose();
+        this.singlePose();
       }
-      return this.multiPose();
+
+      this.multiPose();
     }
     return this;
   }
@@ -65,7 +67,7 @@ class PoseNet extends EventEmitter {
       input = inputOr;
     } else if (typeof inputOr === 'object' && (inputOr.elt instanceof HTMLImageElement || inputOr.elt instanceof HTMLVideoElement)) {
       input = inputOr.elt; // Handle p5.js image and video
-    } else if (typeof inputOr === 'function' && this.video) {
+    } else {
       input = this.video;
     }
 
@@ -85,7 +87,7 @@ class PoseNet extends EventEmitter {
       input = inputOr;
     } else if (typeof inputOr === 'object' && (inputOr.elt instanceof HTMLImageElement || inputOr.elt instanceof HTMLVideoElement)) {
       input = inputOr.elt; // Handle p5.js image and video
-    } else if (typeof inputOr === 'function' && this.video) {
+    } else {
       input = this.video;
     }
 
diff --git a/src/Word2vec/index.js b/src/Word2vec/index.js
@@ -49,7 +49,7 @@ class Word2Vec {
       const sum = Word2Vec.addOrSubtract(this.model, inputs, 'ADD');
       const result = Word2Vec.nearest(this.model, sum, inputs.length, inputs.length + max);
       if (callback) {
-        callback(result);
+        callback(undefined, result);
       }
       return result;
     });
@@ -63,7 +63,7 @@ class Word2Vec {
       const subtraction = Word2Vec.addOrSubtract(this.model, inputs, 'SUBTRACT');
       const result = Word2Vec.nearest(this.model, subtraction, inputs.length, inputs.length + max);
       if (callback) {
-        callback(result);
+        callback(undefined, result);
       }
       return result;
     });
@@ -78,7 +78,7 @@ class Word2Vec {
       const avg = tf.div(sum, tf.tensor(inputs.length));
       const result = Word2Vec.nearest(this.model, avg, inputs.length, inputs.length + max);
       if (callback) {
-        callback(result);
+        callback(undefined, result);
       }
       return result;
     });
@@ -97,7 +97,7 @@ class Word2Vec {
     }
 
     if (callback) {
-      callback(result);
+      callback(undefined, result);
     }
     return result;
   }
@@ -107,7 +107,7 @@ class Word2Vec {
     const words = Object.keys(this.model);
     const result = words[Math.floor(Math.random() * words.length)];
     if (callback) {
-      callback(result);
+      callback(undefined, result);
     }
     return result;
   }
diff --git a/src/YOLO/index.js b/src/YOLO/index.js
@@ -48,17 +48,19 @@ class YOLOBase extends Video {
   }
 
   async loadModel() {
-    if (this.video) {
-      await this.loadVideo();
+    if (!this.video) {
+      this.video = await this.loadVideo();
     }
     this.model = await tf.loadModel(URL);
     this.modelReady = true;
     return this;
   }
 
   async detect(inputOrCallback, cb) {
+    await this.ready;
     let imgToPredict;
     let callback = cb;
+
     if (inputOrCallback instanceof HTMLImageElement || inputOrCallback instanceof HTMLVideoElement) {
       imgToPredict = inputOrCallback;
     } else if (typeof inputOrCallback === 'object' && (inputOrCallback.elt instanceof HTMLImageElement || inputOrCallback.elt instanceof HTMLVideoElement)) {
@@ -67,12 +69,14 @@ class YOLOBase extends Video {
       imgToPredict = this.video;
       callback = inputOrCallback;
     }
+
     return callCallback(this.detectInternal(imgToPredict), callback);
   }
 
   async detectInternal(imgToPredict) {
     await this.ready;
     await tf.nextFrame();
+
     this.isPredicting = true;
     const [allBoxes, boxConfidence, boxClassProbs] = tf.tidy(() => {
       const input = imgToTensor(imgToPredict, [imageSize, imageSize]);
@@ -139,21 +143,25 @@ class YOLOBase extends Video {
   }
 }
 
-const YOLO = (videoOrOptionsOrCallback, optionsOrCallback, cb) => {
-  let callback = cb;
+const YOLO = (videoOr, optionsOr, cb) => {
+  let video = null;
   let options = {};
-  const video = videoOrOptionsOrCallback;
+  let callback = cb;
 
-  if (typeof videoOrOptionsOrCallback === 'object') {
-    options = videoOrOptionsOrCallback;
-  } else if (typeof videoOrOptionsOrCallback === 'function') {
-    callback = videoOrOptionsOrCallback;
+  if (videoOr instanceof HTMLVideoElement) {
+    video = videoOr;
+  } else if (typeof videoOr === 'object' && videoOr.elt instanceof HTMLVideoElement) {
+    video = videoOr.elt; // Handle p5.js image
+  } else if (typeof videoOr === 'function') {
+    callback = videoOr;
+  } else if (typeof videoOr === 'object') {
+    options = videoOr;
   }
 
-  if (typeof optionsOrCallback === 'object') {
-    options = optionsOrCallback;
-  } else if (typeof optionsOrCallback === 'function') {
-    callback = optionsOrCallback;
+  if (typeof optionsOr === 'object') {
+    options = optionsOr;
+  } else if (typeof optionsOr === 'function') {
+    callback = optionsOr;
   }
 
   return new YOLOBase(video, options, callback);
diff --git a/src/utils/Video.js b/src/utils/Video.js
@@ -21,7 +21,7 @@ class Video {
     }
   }
 
-  loadVideo() {
+  async loadVideo() {
     return new Promise((resolve) => {
       this.video = document.createElement('video');
       const stream = this.videoElt.captureStream();

Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,7 @@ class Word2Vec {`
`49`	`49`	`const sum = Word2Vec.addOrSubtract(this.model, inputs, 'ADD');`
`50`	`50`	`const result = Word2Vec.nearest(this.model, sum, inputs.length, inputs.length + max);`
`51`	`51`	`if (callback) {`
`52`		`- callback(result);`
	`52`	`+ callback(undefined, result);`
`53`	`53`	`}`
`54`	`54`	`return result;`
`55`	`55`	`});`
`@@ -63,7 +63,7 @@ class Word2Vec {`
`63`	`63`	`const subtraction = Word2Vec.addOrSubtract(this.model, inputs, 'SUBTRACT');`
`64`	`64`	`const result = Word2Vec.nearest(this.model, subtraction, inputs.length, inputs.length + max);`
`65`	`65`	`if (callback) {`
`66`		`- callback(result);`
	`66`	`+ callback(undefined, result);`
`67`	`67`	`}`
`68`	`68`	`return result;`
`69`	`69`	`});`
`@@ -78,7 +78,7 @@ class Word2Vec {`
`78`	`78`	`const avg = tf.div(sum, tf.tensor(inputs.length));`
`79`	`79`	`const result = Word2Vec.nearest(this.model, avg, inputs.length, inputs.length + max);`
`80`	`80`	`if (callback) {`
`81`		`- callback(result);`
	`81`	`+ callback(undefined, result);`
`82`	`82`	`}`
`83`	`83`	`return result;`
`84`	`84`	`});`
`@@ -97,7 +97,7 @@ class Word2Vec {`
`97`	`97`	`}`
`98`	`98`
`99`	`99`	`if (callback) {`
`100`		`- callback(result);`
	`100`	`+ callback(undefined, result);`
`101`	`101`	`}`
`102`	`102`	`return result;`
`103`	`103`	`}`
`@@ -107,7 +107,7 @@ class Word2Vec {`
`107`	`107`	`const words = Object.keys(this.model);`
`108`	`108`	`const result = words[Math.floor(Math.random() * words.length)];`
`109`	`109`	`if (callback) {`
`110`		`- callback(result);`
	`110`	`+ callback(undefined, result);`
`111`	`111`	`}`
`112`	`112`	`return result;`
`113`	`113`	`}`
Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ class Video {`
`21`	`21`	`}`
`22`	`22`	`}`
`23`	`23`
`24`		`- loadVideo() {`
	`24`	`+ async loadVideo() {`
`25`	`25`	`return new Promise((resolve) => {`
`26`	`26`	`this.video = document.createElement('video');`
`27`	`27`	`const stream = this.videoElt.captureStream();`