3
3
// This software is released under the MIT License.
4
4
// https://opensource.org/licenses/MIT
5
5
6
- // Crepe Pitch Detection model
7
- // https://github.com/marl/crepe/tree/gh-pages
8
- // https://marl.github.io/crepe/crepe.js
6
+ /*
7
+ Crepe Pitch Detection model
8
+ https://github.com/marl/crepe/tree/gh-pages
9
+ https://marl.github.io/crepe/crepe.js
10
+ */
9
11
10
12
import * as tf from '@tensorflow/tfjs' ;
11
13
12
- class Crepe {
13
- // in here are the functions to make exposed
14
- constructor ( audioContext , stream ) {
14
+ class PitchDetection {
15
+ constructor ( modelName , audioContext , stream ) {
16
+ this . modelName = modelName ;
15
17
this . audioContext = audioContext ;
16
18
this . stream = stream ;
17
- this . initTF ( ) ;
19
+ this . loadModel ( ) ;
18
20
}
19
21
20
- async initTF ( ) {
21
- try {
22
- console . log ( 'Loading Keras model...' ) ;
23
- this . model = await tf . loadModel ( 'model/model.json' ) ;
24
- console . log ( 'Model loading complete' ) ;
25
- } catch ( e ) {
26
- console . error ( e ) ;
27
- }
22
+ async loadModel ( ) {
23
+ this . model = await tf . loadModel ( 'model/model.json' ) ;
28
24
this . initAudio ( ) ;
29
25
}
30
26
31
- // perform resampling the audio to 16000 Hz, on which the model is trained.
32
- // setting a sample rate in AudioContext is not supported by most browsers at the moment.
27
+ initAudio ( ) {
28
+ if ( this . audioContext ) {
29
+ try {
30
+ this . processStream ( this . stream ) ;
31
+ } catch ( e ) {
32
+ throw new Error ( `Error: Could not access microphone - ${ e } ` ) ;
33
+ }
34
+ } else {
35
+ throw new Error ( 'Could not access microphone - getUserMedia not available' ) ;
36
+ }
37
+ }
38
+
39
+ processStream ( stream ) {
40
+ const mic = this . audioContext . createMediaStreamSource ( stream ) ;
41
+ const minBufferSize = ( this . audioContext . sampleRate / 16000 ) * 1024 ;
42
+ let bufferSize = 4 ;
43
+ while ( bufferSize < minBufferSize ) bufferSize *= 2 ;
44
+
45
+ const scriptNode = this . audioContext . createScriptProcessor ( bufferSize , 1 , 1 ) ;
46
+ scriptNode . onaudioprocess = this . processMicrophoneBuffer . bind ( this ) ;
47
+ const gain = this . audioContext . createGain ( ) ;
48
+ gain . gain . setValueAtTime ( 0 , this . audioContext . currentTime ) ;
49
+
50
+ mic . connect ( scriptNode ) ;
51
+ scriptNode . connect ( gain ) ;
52
+ gain . connect ( this . audioContext . destination ) ;
53
+
54
+ if ( this . audioContext . state !== 'running' ) {
55
+ console . warn ( 'User gesture needed to start AudioContext, please click' ) ;
56
+ }
57
+ }
58
+
33
59
static resample ( audioBuffer , onComplete ) {
34
60
const interpolate = ( audioBuffer . sampleRate % 16000 !== 0 ) ;
35
61
const multiplier = audioBuffer . sampleRate / 16000 ;
@@ -39,7 +65,6 @@ class Crepe {
39
65
if ( ! interpolate ) {
40
66
subsamples [ i ] = original [ i * multiplier ] ;
41
67
} else {
42
- // simplistic, linear resampling
43
68
const left = Math . floor ( i * multiplier ) ;
44
69
const right = left + 1 ;
45
70
const p = ( i * multiplier ) - left ;
@@ -51,42 +76,32 @@ class Crepe {
51
76
52
77
processMicrophoneBuffer ( event ) {
53
78
this . results = { } ;
54
- // bin number -> cent value mapping
55
79
const centMapping = tf . add ( tf . linspace ( 0 , 7180 , 360 ) , tf . tensor ( 1997.3794084376191 ) ) ;
56
- Crepe . resample ( event . inputBuffer , ( resampled ) => {
80
+ PitchDetection . resample ( event . inputBuffer , ( resampled ) => {
57
81
tf . tidy ( ( ) => {
58
82
this . running = true ;
59
-
60
- // run the prediction on the model
61
83
const frame = tf . tensor ( resampled . slice ( 0 , 1024 ) ) ;
62
84
const zeromean = tf . sub ( frame , tf . mean ( frame ) ) ;
63
85
const framestd = tf . tensor ( tf . norm ( zeromean ) . dataSync ( ) / Math . sqrt ( 1024 ) ) ;
64
86
const normalized = tf . div ( zeromean , framestd ) ;
65
87
const input = normalized . reshape ( [ 1 , 1024 ] ) ;
66
88
const activation = this . model . predict ( [ input ] ) . reshape ( [ 360 ] ) ;
67
-
68
- // the confidence of voicing activity and the argmax bin
69
89
const confidence = activation . max ( ) . dataSync ( ) [ 0 ] ;
70
90
const center = activation . argMax ( ) . dataSync ( ) [ 0 ] ;
71
91
this . results . confidence = confidence . toFixed ( 3 ) ;
72
92
73
- // slice the local neighborhood around the argmax bin
74
93
const start = Math . max ( 0 , center - 4 ) ;
75
94
const end = Math . min ( 360 , center + 5 ) ;
76
95
const weights = activation . slice ( [ start ] , [ end - start ] ) ;
77
96
const cents = centMapping . slice ( [ start ] , [ end - start ] ) ;
78
97
79
- // take the local weighted average to get the predicted pitch
80
98
const products = tf . mul ( weights , cents ) ;
81
99
const productSum = products . dataSync ( ) . reduce ( ( a , b ) => a + b , 0 ) ;
82
100
const weightSum = weights . dataSync ( ) . reduce ( ( a , b ) => a + b , 0 ) ;
83
101
const predictedCent = productSum / weightSum ;
84
102
const predictedHz = 10 * ( ( predictedCent / 1200.0 ) ** 2 ) ;
85
103
86
- // update
87
104
const result = ( confidence > 0.5 ) ? `${ predictedHz . toFixed ( 3 ) } + Hz` : 'no voice' ;
88
- // const strlen = result.length;
89
- // for (let i = 0; i < 11 - strlen; i += 1) result = result;
90
105
this . results . result = result ;
91
106
} ) ;
92
107
} ) ;
@@ -95,55 +110,20 @@ class Crepe {
95
110
getResults ( ) {
96
111
return this . results ;
97
112
}
113
+ }
98
114
99
- processStream ( stream ) {
100
- console . log ( 'Setting up AudioContext ...' ) ;
101
- console . log ( `Audio context sample rate = + ${ this . audioContext . sampleRate } ` ) ;
102
- const mic = this . audioContext . createMediaStreamSource ( stream ) ;
103
-
104
- // We need the buffer size that is a power of two
105
- // and is longer than 1024 samples when resampled to 16000 Hz.
106
- // In most platforms where the sample rate is 44.1 kHz or 48 kHz,
107
- // this will be 4096, giving 10-12 updates/sec.
108
- const minBufferSize = ( this . audioContext . sampleRate / 16000 ) * 1024 ;
109
- let bufferSize = 4 ;
110
- while ( bufferSize < minBufferSize ) bufferSize *= 2 ;
111
- console . log ( `Buffer size = ${ bufferSize } ` ) ;
112
- const scriptNode = this . audioContext . createScriptProcessor ( bufferSize , 1 , 1 ) ;
113
- scriptNode . onaudioprocess = this . processMicrophoneBuffer . bind ( this ) ;
114
- // It seems necessary to connect the stream to a sink
115
- // for the pipeline to work, contrary to documentataions.
116
- // As a workaround, here we create a gain node with zero gain,
117
- // and connect temp to the system audio output.
118
- const gain = this . audioContext . createGain ( ) ;
119
- gain . gain . setValueAtTime ( 0 , this . audioContext . currentTime ) ;
120
-
121
- mic . connect ( scriptNode ) ;
122
- scriptNode . connect ( gain ) ;
123
- gain . connect ( this . audioContext . destination ) ;
124
-
125
- if ( this . audioContext . state === 'running' ) {
126
- console . log ( 'Running ...' ) ;
127
- } else {
128
- console . error ( 'User gesture needed to start AudioContext, please click' ) ;
129
- // user gesture (like click) is required to start AudioContext, in some browser versions
130
- // status('<a href="javascript:crepe.resume();" style="color:red;">*
131
- // Click here to start the demo *</a>')
132
- }
115
+ const pitchDetection = ( modelName , context , stream ) => {
116
+ let model ;
117
+ if ( typeof modelName === 'string' ) {
118
+ model = modelName . toLowerCase ( ) ;
119
+ } else {
120
+ throw new Error ( 'Please specify a model to use. E.g: "Crepe"' ) ;
133
121
}
134
122
135
- initAudio ( ) {
136
- if ( this . audioContext ) {
137
- console . log ( 'Initializing audio' ) ;
138
- try {
139
- this . processStream ( this . stream ) ;
140
- } catch ( e ) {
141
- console . error ( 'Error: Could not access microphone - ' , e ) ;
142
- }
143
- } else {
144
- console . error ( 'Could not access microphone - getUserMedia not available' ) ;
145
- }
123
+ if ( model === 'crepe' ) {
124
+ return new PitchDetection ( model , context , stream ) ;
146
125
}
147
- }
126
+ throw new Error ( `${ model } is not a valid model to use in pitchDetection()` ) ;
127
+ } ;
148
128
149
- export default Crepe ;
129
+ export default pitchDetection ;
0 commit comments