Skip to content

Commit 51bed70

Browse files
committed
- update pitch detection to #158
- fix event emmiter in posenet - fix constructor in yolo - async video util
1 parent c834335 commit 51bed70

File tree

7 files changed

+76
-64
lines changed

7 files changed

+76
-64
lines changed

dist/ml5.min.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dist/ml5.min.js.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/PitchDetection/index.js

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -17,29 +17,24 @@ class PitchDetection {
1717
this.model = model;
1818
this.audioContext = audioContext;
1919
this.stream = stream;
20+
this.frequency = null;
2021
this.ready = callCallback(this.loadModel(model), callback);
2122
}
2223

2324
async loadModel(model) {
2425
this.model = await tf.loadModel(`${model}/model.json`);
25-
await this.initAudio();
26-
return this;
27-
}
28-
29-
initAudio() {
3026
if (this.audioContext) {
31-
try {
32-
this.processStream(this.stream);
33-
} catch (e) {
34-
throw new Error(`Error: Could not access microphone - ${e}`);
35-
}
27+
await this.processStream();
3628
} else {
3729
throw new Error('Could not access microphone - getUserMedia not available');
3830
}
31+
return this;
3932
}
4033

41-
processStream(stream) {
42-
const mic = this.audioContext.createMediaStreamSource(stream);
34+
async processStream() {
35+
await tf.nextFrame();
36+
37+
const mic = this.audioContext.createMediaStreamSource(this.stream);
4338
const minBufferSize = (this.audioContext.sampleRate / 16000) * 1024;
4439
let bufferSize = 4;
4540
while (bufferSize < minBufferSize) bufferSize *= 2;
@@ -58,25 +53,8 @@ class PitchDetection {
5853
}
5954
}
6055

61-
static resample(audioBuffer, onComplete) {
62-
const interpolate = (audioBuffer.sampleRate % 16000 !== 0);
63-
const multiplier = audioBuffer.sampleRate / 16000;
64-
const original = audioBuffer.getChannelData(0);
65-
const subsamples = new Float32Array(1024);
66-
for (let i = 0; i < 1024; i += 1) {
67-
if (!interpolate) {
68-
subsamples[i] = original[i * multiplier];
69-
} else {
70-
const left = Math.floor(i * multiplier);
71-
const right = left + 1;
72-
const p = (i * multiplier) - left;
73-
subsamples[i] = (((1 - p) * original[left]) + (p * original[right]));
74-
}
75-
}
76-
onComplete(subsamples);
77-
}
78-
79-
processMicrophoneBuffer(event) {
56+
async processMicrophoneBuffer(event) {
57+
await tf.nextFrame();
8058
this.results = {};
8159
const centMapping = tf.add(tf.linspace(0, 7180, 360), tf.tensor(1997.3794084376191));
8260
PitchDetection.resample(event.inputBuffer, (resampled) => {
@@ -103,14 +81,38 @@ class PitchDetection {
10381
const predictedCent = productSum / weightSum;
10482
const predictedHz = 10 * ((predictedCent / 1200.0) ** 2);
10583

106-
const result = (confidence > 0.5) ? `${predictedHz.toFixed(3)} + Hz` : 'no voice';
107-
this.results.result = result;
84+
const frequency = (confidence > 0.5) ? predictedHz : null;
85+
this.frequency = frequency;
10886
});
10987
});
11088
}
11189

112-
getResults() {
113-
return this.results;
90+
async getPitch(callback) {
91+
await this.ready;
92+
await tf.nextFrame();
93+
const { frequency } = this;
94+
if (callback) {
95+
callback(undefined, frequency);
96+
}
97+
return frequency;
98+
}
99+
100+
static resample(audioBuffer, onComplete) {
101+
const interpolate = (audioBuffer.sampleRate % 16000 !== 0);
102+
const multiplier = audioBuffer.sampleRate / 16000;
103+
const original = audioBuffer.getChannelData(0);
104+
const subsamples = new Float32Array(1024);
105+
for (let i = 0; i < 1024; i += 1) {
106+
if (!interpolate) {
107+
subsamples[i] = original[i * multiplier];
108+
} else {
109+
const left = Math.floor(i * multiplier);
110+
const right = left + 1;
111+
const p = (i * multiplier) - left;
112+
subsamples[i] = (((1 - p) * original[left]) + (p * original[right]));
113+
}
114+
}
115+
onComplete(subsamples);
114116
}
115117
}
116118

src/PoseNet/index.js

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,18 @@ class PoseNet extends EventEmitter {
3939
}
4040

4141
async load() {
42-
const net = await posenet.load(this.multiplier);
43-
this.net = net;
44-
if (this.video) {
42+
this.net = await posenet.load(this.multiplier);
43+
44+
if (this.video && this.video.readyState === 0) {
4545
await new Promise((resolve) => {
46-
this.video.onplay = resolve;
46+
this.video.onloadeddata = () => resolve();
4747
});
48+
4849
if (this.detectionType === 'single') {
49-
return this.singlePose();
50+
this.singlePose();
5051
}
51-
return this.multiPose();
52+
53+
this.multiPose();
5254
}
5355
return this;
5456
}
@@ -65,7 +67,7 @@ class PoseNet extends EventEmitter {
6567
input = inputOr;
6668
} else if (typeof inputOr === 'object' && (inputOr.elt instanceof HTMLImageElement || inputOr.elt instanceof HTMLVideoElement)) {
6769
input = inputOr.elt; // Handle p5.js image and video
68-
} else if (typeof inputOr === 'function' && this.video) {
70+
} else {
6971
input = this.video;
7072
}
7173

@@ -85,7 +87,7 @@ class PoseNet extends EventEmitter {
8587
input = inputOr;
8688
} else if (typeof inputOr === 'object' && (inputOr.elt instanceof HTMLImageElement || inputOr.elt instanceof HTMLVideoElement)) {
8789
input = inputOr.elt; // Handle p5.js image and video
88-
} else if (typeof inputOr === 'function' && this.video) {
90+
} else {
8991
input = this.video;
9092
}
9193

src/Word2vec/index.js

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class Word2Vec {
4949
const sum = Word2Vec.addOrSubtract(this.model, inputs, 'ADD');
5050
const result = Word2Vec.nearest(this.model, sum, inputs.length, inputs.length + max);
5151
if (callback) {
52-
callback(result);
52+
callback(undefined, result);
5353
}
5454
return result;
5555
});
@@ -63,7 +63,7 @@ class Word2Vec {
6363
const subtraction = Word2Vec.addOrSubtract(this.model, inputs, 'SUBTRACT');
6464
const result = Word2Vec.nearest(this.model, subtraction, inputs.length, inputs.length + max);
6565
if (callback) {
66-
callback(result);
66+
callback(undefined, result);
6767
}
6868
return result;
6969
});
@@ -78,7 +78,7 @@ class Word2Vec {
7878
const avg = tf.div(sum, tf.tensor(inputs.length));
7979
const result = Word2Vec.nearest(this.model, avg, inputs.length, inputs.length + max);
8080
if (callback) {
81-
callback(result);
81+
callback(undefined, result);
8282
}
8383
return result;
8484
});
@@ -97,7 +97,7 @@ class Word2Vec {
9797
}
9898

9999
if (callback) {
100-
callback(result);
100+
callback(undefined, result);
101101
}
102102
return result;
103103
}
@@ -107,7 +107,7 @@ class Word2Vec {
107107
const words = Object.keys(this.model);
108108
const result = words[Math.floor(Math.random() * words.length)];
109109
if (callback) {
110-
callback(result);
110+
callback(undefined, result);
111111
}
112112
return result;
113113
}

src/YOLO/index.js

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,19 @@ class YOLOBase extends Video {
4848
}
4949

5050
async loadModel() {
51-
if (this.video) {
52-
await this.loadVideo();
51+
if (!this.video) {
52+
this.video = await this.loadVideo();
5353
}
5454
this.model = await tf.loadModel(URL);
5555
this.modelReady = true;
5656
return this;
5757
}
5858

5959
async detect(inputOrCallback, cb) {
60+
await this.ready;
6061
let imgToPredict;
6162
let callback = cb;
63+
6264
if (inputOrCallback instanceof HTMLImageElement || inputOrCallback instanceof HTMLVideoElement) {
6365
imgToPredict = inputOrCallback;
6466
} else if (typeof inputOrCallback === 'object' && (inputOrCallback.elt instanceof HTMLImageElement || inputOrCallback.elt instanceof HTMLVideoElement)) {
@@ -67,12 +69,14 @@ class YOLOBase extends Video {
6769
imgToPredict = this.video;
6870
callback = inputOrCallback;
6971
}
72+
7073
return callCallback(this.detectInternal(imgToPredict), callback);
7174
}
7275

7376
async detectInternal(imgToPredict) {
7477
await this.ready;
7578
await tf.nextFrame();
79+
7680
this.isPredicting = true;
7781
const [allBoxes, boxConfidence, boxClassProbs] = tf.tidy(() => {
7882
const input = imgToTensor(imgToPredict, [imageSize, imageSize]);
@@ -139,21 +143,25 @@ class YOLOBase extends Video {
139143
}
140144
}
141145

142-
const YOLO = (videoOrOptionsOrCallback, optionsOrCallback, cb) => {
143-
let callback = cb;
146+
const YOLO = (videoOr, optionsOr, cb) => {
147+
let video = null;
144148
let options = {};
145-
const video = videoOrOptionsOrCallback;
149+
let callback = cb;
146150

147-
if (typeof videoOrOptionsOrCallback === 'object') {
148-
options = videoOrOptionsOrCallback;
149-
} else if (typeof videoOrOptionsOrCallback === 'function') {
150-
callback = videoOrOptionsOrCallback;
151+
if (videoOr instanceof HTMLVideoElement) {
152+
video = videoOr;
153+
} else if (typeof videoOr === 'object' && videoOr.elt instanceof HTMLVideoElement) {
154+
video = videoOr.elt; // Handle p5.js image
155+
} else if (typeof videoOr === 'function') {
156+
callback = videoOr;
157+
} else if (typeof videoOr === 'object') {
158+
options = videoOr;
151159
}
152160

153-
if (typeof optionsOrCallback === 'object') {
154-
options = optionsOrCallback;
155-
} else if (typeof optionsOrCallback === 'function') {
156-
callback = optionsOrCallback;
161+
if (typeof optionsOr === 'object') {
162+
options = optionsOr;
163+
} else if (typeof optionsOr === 'function') {
164+
callback = optionsOr;
157165
}
158166

159167
return new YOLOBase(video, options, callback);

src/utils/Video.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class Video {
2121
}
2222
}
2323

24-
loadVideo() {
24+
async loadVideo() {
2525
return new Promise((resolve) => {
2626
this.video = document.createElement('video');
2727
const stream = this.videoElt.captureStream();

0 commit comments

Comments
 (0)