@@ -23,12 +23,10 @@ import {SimpleObservable} from "matrix-widget-api";
2323const CHANNELS = 1 ; // stereo isn't important
2424const SAMPLE_RATE = 48000 ; // 48khz is what WebRTC uses. 12khz is where we lose quality.
2525const BITRATE = 24000 ; // 24kbps is pretty high quality for our use case in opus.
26- const FREQ_SAMPLE_RATE = 10 ; // Target rate of frequency data (samples / sec). We don't need this super often.
2726
2827export interface IRecordingUpdate {
2928 waveform : number [ ] ; // floating points between 0 (low) and 1 (high).
30-
31- // TODO: @@ TravisR: Generalize this for a timing package?
29+ timeSeconds : number ; // float
3230}
3331
3432export class VoiceRecorder {
@@ -37,11 +35,11 @@ export class VoiceRecorder {
3735 private recorderSource : MediaStreamAudioSourceNode ;
3836 private recorderStream : MediaStream ;
3937 private recorderFFT : AnalyserNode ;
38+ private recorderProcessor : ScriptProcessorNode ;
4039 private buffer = new Uint8Array ( 0 ) ;
4140 private mxc : string ;
4241 private recording = false ;
4342 private observable : SimpleObservable < IRecordingUpdate > ;
44- private freqTimerId : number ;
4543
4644 public constructor ( private client : MatrixClient ) {
4745 }
@@ -71,7 +69,20 @@ export class VoiceRecorder {
7169 // it makes the time domain less than helpful.
7270 this . recorderFFT . fftSize = 64 ;
7371
72+ // We use an audio processor to get accurate timing information.
73+ // The size of the audio buffer largely decides how quickly we push timing/waveform data
74+ // out of this class. Smaller buffers mean we update more frequently as we can't hold as
75+ // many bytes. Larger buffers mean slower updates. For scale, 1024 gives us about 30Hz of
76+ // updates and 2048 gives us about 20Hz. We use 2048 because it updates frequently enough
77+ // to feel realtime (~20fps, which is what humans perceive as "realtime"). Must be a power
78+ // of 2.
79+ this . recorderProcessor = this . recorderContext . createScriptProcessor ( 2048 , CHANNELS , CHANNELS ) ;
80+
81+ // Connect our inputs and outputs
7482 this . recorderSource . connect ( this . recorderFFT ) ;
83+ this . recorderSource . connect ( this . recorderProcessor ) ;
84+ this . recorderProcessor . connect ( this . recorderContext . destination ) ;
85+
7586 this . recorder = new Recorder ( {
7687 encoderPath, // magic from webpack
7788 encoderSampleRate : SAMPLE_RATE ,
@@ -117,6 +128,37 @@ export class VoiceRecorder {
117128 return this . mxc ;
118129 }
119130
131+ private tryUpdateLiveData = ( ev : AudioProcessingEvent ) => {
132+ if ( ! this . recording ) return ;
133+
134+ // The time domain is the input to the FFT, which means we use an array of the same
135+ // size. The time domain is also known as the audio waveform. We're ignoring the
136+ // output of the FFT here (frequency data) because we're not interested in it.
137+ //
138+ // We use bytes out of the analyser because floats have weird precision problems
139+ // and are slightly more difficult to work with. The bytes are easy to work with,
140+ // which is why we pick them (they're also more precise, but we care less about that).
141+ const data = new Uint8Array ( this . recorderFFT . fftSize ) ;
142+ this . recorderFFT . getByteTimeDomainData ( data ) ;
143+
144+ // Because we're dealing with a uint array we need to do math a bit differently.
145+ // If we just `Array.from()` the uint array, we end up with 1s and 0s, which aren't
146+ // what we're after. Instead, we have to use a bit of manual looping to correctly end
147+ // up with the right values
148+ const translatedData : number [ ] = [ ] ;
149+ for ( let i = 0 ; i < data . length ; i ++ ) {
150+ // All we're doing here is inverting the amplitude and putting the metric somewhere
151+ // between zero and one. Without the inversion, lower values are "louder", which is
152+ // not super helpful.
153+ translatedData . push ( 1 - ( data [ i ] / 128.0 ) ) ;
154+ }
155+
156+ this . observable . update ( {
157+ waveform : translatedData ,
158+ timeSeconds : ev . playbackTime ,
159+ } ) ;
160+ } ;
161+
120162 public async start ( ) : Promise < void > {
121163 if ( this . mxc || this . hasRecording ) {
122164 throw new Error ( "Recording already prepared" ) ;
@@ -129,35 +171,7 @@ export class VoiceRecorder {
129171 }
130172 this . observable = new SimpleObservable < IRecordingUpdate > ( ) ;
131173 await this . makeRecorder ( ) ;
132- this . freqTimerId = setInterval ( ( ) => {
133- if ( ! this . recording ) return ;
134-
135- // The time domain is the input to the FFT, which means we use an array of the same
136- // size. The time domain is also known as the audio waveform. We're ignoring the
137- // output of the FFT here (frequency data) because we're not interested in it.
138- //
139- // We use bytes out of the analyser because floats have weird precision problems
140- // and are slightly more difficult to work with. The bytes are easy to work with,
141- // which is why we pick them (they're also more precise, but we care less about that).
142- const data = new Uint8Array ( this . recorderFFT . fftSize ) ;
143- this . recorderFFT . getByteTimeDomainData ( data ) ;
144-
145- // Because we're dealing with a uint array we need to do math a bit differently.
146- // If we just `Array.from()` the uint array, we end up with 1s and 0s, which aren't
147- // what we're after. Instead, we have to use a bit of manual looping to correctly end
148- // up with the right values
149- const translatedData : number [ ] = [ ] ;
150- for ( let i = 0 ; i < data . length ; i ++ ) {
151- // All we're doing here is inverting the amplitude and putting the metric somewhere
152- // between zero and one. Without the inversion, lower values are "louder", which is
153- // not super helpful.
154- translatedData . push ( 1 - ( data [ i ] / 128.0 ) ) ;
155- }
156-
157- this . observable . update ( {
158- waveform : translatedData ,
159- } ) ;
160- } , 1000 / FREQ_SAMPLE_RATE ) as any as number ; // XXX: Linter doesn't understand timer environment
174+ this . recorderProcessor . addEventListener ( "audioprocess" , this . tryUpdateLiveData ) ;
161175 await this . recorder . start ( ) ;
162176 this . recording = true ;
163177 }
@@ -179,8 +193,8 @@ export class VoiceRecorder {
179193 this . recorderStream . getTracks ( ) . forEach ( t => t . stop ( ) ) ;
180194
181195 // Finally do our post-processing and clean up
182- clearInterval ( this . freqTimerId ) ;
183196 this . recording = false ;
197+ this . recorderProcessor . removeEventListener ( "audioprocess" , this . tryUpdateLiveData ) ;
184198 await this . recorder . close ( ) ;
185199
186200 return this . buffer ;
0 commit comments