Skip to content

Commit 03fc3e1

Browse files
committed
cleanup and new api update to Crepe
1 parent 22822a3 commit 03fc3e1

File tree

1 file changed

+43
-76
lines changed

1 file changed

+43
-76
lines changed

src/Crepe/index.js

Lines changed: 43 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -3,33 +3,58 @@
33
// This software is released under the MIT License.
44
// https://opensource.org/licenses/MIT
55

6-
// Crepe Pitch Detection model
7-
// https://github.com/marl/crepe/tree/gh-pages
8-
// https://marl.github.io/crepe/crepe.js
6+
/*
7+
Crepe Pitch Detection model
8+
https://github.com/marl/crepe/tree/gh-pages
9+
https://marl.github.io/crepe/crepe.js
10+
*/
911

1012
import * as tf from '@tensorflow/tfjs';
1113

1214
class Crepe {
13-
// in here are the functions to make exposed
1415
constructor(audioContext, stream) {
1516
this.audioContext = audioContext;
1617
this.stream = stream;
17-
this.initTF();
18+
this.loadModel();
1819
}
1920

20-
async initTF() {
21-
try {
22-
console.log('Loading Keras model...');
23-
this.model = await tf.loadModel('model/model.json');
24-
console.log('Model loading complete');
25-
} catch (e) {
26-
console.error(e);
27-
}
21+
async loadModel() {
22+
this.model = await tf.loadModel('model/model.json');
2823
this.initAudio();
2924
}
3025

31-
// perform resampling the audio to 16000 Hz, on which the model is trained.
32-
// setting a sample rate in AudioContext is not supported by most browsers at the moment.
26+
initAudio() {
27+
if (this.audioContext) {
28+
try {
29+
this.processStream(this.stream);
30+
} catch (e) {
31+
throw new Error(`Error: Could not access microphone - ${e}`);
32+
}
33+
} else {
34+
throw new Error('Could not access microphone - getUserMedia not available');
35+
}
36+
}
37+
38+
processStream(stream) {
39+
const mic = this.audioContext.createMediaStreamSource(stream);
40+
const minBufferSize = (this.audioContext.sampleRate / 16000) * 1024;
41+
let bufferSize = 4;
42+
while (bufferSize < minBufferSize) bufferSize *= 2;
43+
44+
const scriptNode = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
45+
scriptNode.onaudioprocess = this.processMicrophoneBuffer.bind(this);
46+
const gain = this.audioContext.createGain();
47+
gain.gain.setValueAtTime(0, this.audioContext.currentTime);
48+
49+
mic.connect(scriptNode);
50+
scriptNode.connect(gain);
51+
gain.connect(this.audioContext.destination);
52+
53+
if (this.audioContext.state !== 'running') {
54+
console.warn('User gesture needed to start AudioContext, please click');
55+
}
56+
}
57+
3358
static resample(audioBuffer, onComplete) {
3459
const interpolate = (audioBuffer.sampleRate % 16000 !== 0);
3560
const multiplier = audioBuffer.sampleRate / 16000;
@@ -39,7 +64,6 @@ class Crepe {
3964
if (!interpolate) {
4065
subsamples[i] = original[i * multiplier];
4166
} else {
42-
// simplistic, linear resampling
4367
const left = Math.floor(i * multiplier);
4468
const right = left + 1;
4569
const p = (i * multiplier) - left;
@@ -51,42 +75,32 @@ class Crepe {
5175

5276
processMicrophoneBuffer(event) {
5377
this.results = {};
54-
// bin number -> cent value mapping
5578
const centMapping = tf.add(tf.linspace(0, 7180, 360), tf.tensor(1997.3794084376191));
5679
Crepe.resample(event.inputBuffer, (resampled) => {
5780
tf.tidy(() => {
5881
this.running = true;
59-
60-
// run the prediction on the model
6182
const frame = tf.tensor(resampled.slice(0, 1024));
6283
const zeromean = tf.sub(frame, tf.mean(frame));
6384
const framestd = tf.tensor(tf.norm(zeromean).dataSync() / Math.sqrt(1024));
6485
const normalized = tf.div(zeromean, framestd);
6586
const input = normalized.reshape([1, 1024]);
6687
const activation = this.model.predict([input]).reshape([360]);
67-
68-
// the confidence of voicing activity and the argmax bin
6988
const confidence = activation.max().dataSync()[0];
7089
const center = activation.argMax().dataSync()[0];
7190
this.results.confidence = confidence.toFixed(3);
7291

73-
// slice the local neighborhood around the argmax bin
7492
const start = Math.max(0, center - 4);
7593
const end = Math.min(360, center + 5);
7694
const weights = activation.slice([start], [end - start]);
7795
const cents = centMapping.slice([start], [end - start]);
7896

79-
// take the local weighted average to get the predicted pitch
8097
const products = tf.mul(weights, cents);
8198
const productSum = products.dataSync().reduce((a, b) => a + b, 0);
8299
const weightSum = weights.dataSync().reduce((a, b) => a + b, 0);
83100
const predictedCent = productSum / weightSum;
84101
const predictedHz = 10 * ((predictedCent / 1200.0) ** 2);
85102

86-
// update
87103
const result = (confidence > 0.5) ? `${predictedHz.toFixed(3)} + Hz` : 'no voice';
88-
// const strlen = result.length;
89-
// for (let i = 0; i < 11 - strlen; i += 1) result = result;
90104
this.results.result = result;
91105
});
92106
});
@@ -95,55 +109,8 @@ class Crepe {
95109
getResults() {
96110
return this.results;
97111
}
98-
99-
processStream(stream) {
100-
console.log('Setting up AudioContext ...');
101-
console.log(`Audio context sample rate = + ${this.audioContext.sampleRate}`);
102-
const mic = this.audioContext.createMediaStreamSource(stream);
103-
104-
// We need the buffer size that is a power of two
105-
// and is longer than 1024 samples when resampled to 16000 Hz.
106-
// In most platforms where the sample rate is 44.1 kHz or 48 kHz,
107-
// this will be 4096, giving 10-12 updates/sec.
108-
const minBufferSize = (this.audioContext.sampleRate / 16000) * 1024;
109-
let bufferSize = 4;
110-
while (bufferSize < minBufferSize) bufferSize *= 2;
111-
console.log(`Buffer size = ${bufferSize}`);
112-
const scriptNode = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
113-
scriptNode.onaudioprocess = this.processMicrophoneBuffer.bind(this);
114-
// It seems necessary to connect the stream to a sink
115-
// for the pipeline to work, contrary to documentataions.
116-
// As a workaround, here we create a gain node with zero gain,
117-
// and connect temp to the system audio output.
118-
const gain = this.audioContext.createGain();
119-
gain.gain.setValueAtTime(0, this.audioContext.currentTime);
120-
121-
mic.connect(scriptNode);
122-
scriptNode.connect(gain);
123-
gain.connect(this.audioContext.destination);
124-
125-
if (this.audioContext.state === 'running') {
126-
console.log('Running ...');
127-
} else {
128-
console.error('User gesture needed to start AudioContext, please click');
129-
// user gesture (like click) is required to start AudioContext, in some browser versions
130-
// status('<a href="javascript:crepe.resume();" style="color:red;">*
131-
// Click here to start the demo *</a>')
132-
}
133-
}
134-
135-
initAudio() {
136-
if (this.audioContext) {
137-
console.log('Initializing audio');
138-
try {
139-
this.processStream(this.stream);
140-
} catch (e) {
141-
console.error('Error: Could not access microphone - ', e);
142-
}
143-
} else {
144-
console.error('Could not access microphone - getUserMedia not available');
145-
}
146-
}
147112
}
148113

149-
export default Crepe;
114+
const crepe = (context, stream) => new Crepe(context, stream);
115+
116+
export default crepe;

0 commit comments

Comments
 (0)