3
3
// This software is released under the MIT License.
4
4
// https://opensource.org/licenses/MIT
5
5
6
- // Crepe Pitch Detection model
7
- // https://github.com/marl/crepe/tree/gh-pages
8
- // https://marl.github.io/crepe/crepe.js
6
+ /*
7
+ Crepe Pitch Detection model
8
+ https://github.com/marl/crepe/tree/gh-pages
9
+ https://marl.github.io/crepe/crepe.js
10
+ */
9
11
10
12
import * as tf from '@tensorflow/tfjs' ;
11
13
12
14
class Crepe {
13
- // in here are the functions to make exposed
14
15
constructor ( audioContext , stream ) {
15
16
this . audioContext = audioContext ;
16
17
this . stream = stream ;
17
- this . initTF ( ) ;
18
+ this . loadModel ( ) ;
18
19
}
19
20
20
- async initTF ( ) {
21
- try {
22
- console . log ( 'Loading Keras model...' ) ;
23
- this . model = await tf . loadModel ( 'model/model.json' ) ;
24
- console . log ( 'Model loading complete' ) ;
25
- } catch ( e ) {
26
- console . error ( e ) ;
27
- }
21
+ async loadModel ( ) {
22
+ this . model = await tf . loadModel ( 'model/model.json' ) ;
28
23
this . initAudio ( ) ;
29
24
}
30
25
31
- // perform resampling the audio to 16000 Hz, on which the model is trained.
32
- // setting a sample rate in AudioContext is not supported by most browsers at the moment.
26
+ initAudio ( ) {
27
+ if ( this . audioContext ) {
28
+ try {
29
+ this . processStream ( this . stream ) ;
30
+ } catch ( e ) {
31
+ throw new Error ( `Error: Could not access microphone - ${ e } ` ) ;
32
+ }
33
+ } else {
34
+ throw new Error ( 'Could not access microphone - getUserMedia not available' ) ;
35
+ }
36
+ }
37
+
38
+ processStream ( stream ) {
39
+ const mic = this . audioContext . createMediaStreamSource ( stream ) ;
40
+ const minBufferSize = ( this . audioContext . sampleRate / 16000 ) * 1024 ;
41
+ let bufferSize = 4 ;
42
+ while ( bufferSize < minBufferSize ) bufferSize *= 2 ;
43
+
44
+ const scriptNode = this . audioContext . createScriptProcessor ( bufferSize , 1 , 1 ) ;
45
+ scriptNode . onaudioprocess = this . processMicrophoneBuffer . bind ( this ) ;
46
+ const gain = this . audioContext . createGain ( ) ;
47
+ gain . gain . setValueAtTime ( 0 , this . audioContext . currentTime ) ;
48
+
49
+ mic . connect ( scriptNode ) ;
50
+ scriptNode . connect ( gain ) ;
51
+ gain . connect ( this . audioContext . destination ) ;
52
+
53
+ if ( this . audioContext . state !== 'running' ) {
54
+ console . warn ( 'User gesture needed to start AudioContext, please click' ) ;
55
+ }
56
+ }
57
+
33
58
static resample ( audioBuffer , onComplete ) {
34
59
const interpolate = ( audioBuffer . sampleRate % 16000 !== 0 ) ;
35
60
const multiplier = audioBuffer . sampleRate / 16000 ;
@@ -39,7 +64,6 @@ class Crepe {
39
64
if ( ! interpolate ) {
40
65
subsamples [ i ] = original [ i * multiplier ] ;
41
66
} else {
42
- // simplistic, linear resampling
43
67
const left = Math . floor ( i * multiplier ) ;
44
68
const right = left + 1 ;
45
69
const p = ( i * multiplier ) - left ;
@@ -51,42 +75,32 @@ class Crepe {
51
75
52
76
processMicrophoneBuffer ( event ) {
53
77
this . results = { } ;
54
- // bin number -> cent value mapping
55
78
const centMapping = tf . add ( tf . linspace ( 0 , 7180 , 360 ) , tf . tensor ( 1997.3794084376191 ) ) ;
56
79
Crepe . resample ( event . inputBuffer , ( resampled ) => {
57
80
tf . tidy ( ( ) => {
58
81
this . running = true ;
59
-
60
- // run the prediction on the model
61
82
const frame = tf . tensor ( resampled . slice ( 0 , 1024 ) ) ;
62
83
const zeromean = tf . sub ( frame , tf . mean ( frame ) ) ;
63
84
const framestd = tf . tensor ( tf . norm ( zeromean ) . dataSync ( ) / Math . sqrt ( 1024 ) ) ;
64
85
const normalized = tf . div ( zeromean , framestd ) ;
65
86
const input = normalized . reshape ( [ 1 , 1024 ] ) ;
66
87
const activation = this . model . predict ( [ input ] ) . reshape ( [ 360 ] ) ;
67
-
68
- // the confidence of voicing activity and the argmax bin
69
88
const confidence = activation . max ( ) . dataSync ( ) [ 0 ] ;
70
89
const center = activation . argMax ( ) . dataSync ( ) [ 0 ] ;
71
90
this . results . confidence = confidence . toFixed ( 3 ) ;
72
91
73
- // slice the local neighborhood around the argmax bin
74
92
const start = Math . max ( 0 , center - 4 ) ;
75
93
const end = Math . min ( 360 , center + 5 ) ;
76
94
const weights = activation . slice ( [ start ] , [ end - start ] ) ;
77
95
const cents = centMapping . slice ( [ start ] , [ end - start ] ) ;
78
96
79
- // take the local weighted average to get the predicted pitch
80
97
const products = tf . mul ( weights , cents ) ;
81
98
const productSum = products . dataSync ( ) . reduce ( ( a , b ) => a + b , 0 ) ;
82
99
const weightSum = weights . dataSync ( ) . reduce ( ( a , b ) => a + b , 0 ) ;
83
100
const predictedCent = productSum / weightSum ;
84
101
const predictedHz = 10 * ( ( predictedCent / 1200.0 ) ** 2 ) ;
85
102
86
- // update
87
103
const result = ( confidence > 0.5 ) ? `${ predictedHz . toFixed ( 3 ) } + Hz` : 'no voice' ;
88
- // const strlen = result.length;
89
- // for (let i = 0; i < 11 - strlen; i += 1) result = result;
90
104
this . results . result = result ;
91
105
} ) ;
92
106
} ) ;
@@ -95,55 +109,8 @@ class Crepe {
95
109
getResults ( ) {
96
110
return this . results ;
97
111
}
98
-
99
- processStream ( stream ) {
100
- console . log ( 'Setting up AudioContext ...' ) ;
101
- console . log ( `Audio context sample rate = + ${ this . audioContext . sampleRate } ` ) ;
102
- const mic = this . audioContext . createMediaStreamSource ( stream ) ;
103
-
104
- // We need the buffer size that is a power of two
105
- // and is longer than 1024 samples when resampled to 16000 Hz.
106
- // In most platforms where the sample rate is 44.1 kHz or 48 kHz,
107
- // this will be 4096, giving 10-12 updates/sec.
108
- const minBufferSize = ( this . audioContext . sampleRate / 16000 ) * 1024 ;
109
- let bufferSize = 4 ;
110
- while ( bufferSize < minBufferSize ) bufferSize *= 2 ;
111
- console . log ( `Buffer size = ${ bufferSize } ` ) ;
112
- const scriptNode = this . audioContext . createScriptProcessor ( bufferSize , 1 , 1 ) ;
113
- scriptNode . onaudioprocess = this . processMicrophoneBuffer . bind ( this ) ;
114
- // It seems necessary to connect the stream to a sink
115
- // for the pipeline to work, contrary to documentataions.
116
- // As a workaround, here we create a gain node with zero gain,
117
- // and connect temp to the system audio output.
118
- const gain = this . audioContext . createGain ( ) ;
119
- gain . gain . setValueAtTime ( 0 , this . audioContext . currentTime ) ;
120
-
121
- mic . connect ( scriptNode ) ;
122
- scriptNode . connect ( gain ) ;
123
- gain . connect ( this . audioContext . destination ) ;
124
-
125
- if ( this . audioContext . state === 'running' ) {
126
- console . log ( 'Running ...' ) ;
127
- } else {
128
- console . error ( 'User gesture needed to start AudioContext, please click' ) ;
129
- // user gesture (like click) is required to start AudioContext, in some browser versions
130
- // status('<a href="javascript:crepe.resume();" style="color:red;">*
131
- // Click here to start the demo *</a>')
132
- }
133
- }
134
-
135
- initAudio ( ) {
136
- if ( this . audioContext ) {
137
- console . log ( 'Initializing audio' ) ;
138
- try {
139
- this . processStream ( this . stream ) ;
140
- } catch ( e ) {
141
- console . error ( 'Error: Could not access microphone - ' , e ) ;
142
- }
143
- } else {
144
- console . error ( 'Could not access microphone - getUserMedia not available' ) ;
145
- }
146
- }
147
112
}
148
113
149
- export default Crepe ;
114
+ const crepe = ( context , stream ) => new Crepe ( context , stream ) ;
115
+
116
+ export default crepe ;
0 commit comments